From 39ce85f3b18597b17a3db74906db2c6360d66c16 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Mon, 23 Sep 2019 13:41:57 +0300 Subject: RDMA/bnxt_re: Remove unsupported modify_device callback There is no need to return always zero for function which is not supported. Link: https://lore.kernel.org/r/20190923104158.5331-3-kamalheib1@gmail.com Signed-off-by: Kamal Heib Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 18 ------------------ drivers/infiniband/hw/bnxt_re/ib_verbs.h | 3 --- drivers/infiniband/hw/bnxt_re/main.c | 1 - 3 files changed, 22 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index b4149dc9e824..8afd7d93cfe4 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -191,24 +191,6 @@ int bnxt_re_query_device(struct ib_device *ibdev, return 0; } -int bnxt_re_modify_device(struct ib_device *ibdev, - int device_modify_mask, - struct ib_device_modify *device_modify) -{ - switch (device_modify_mask) { - case IB_DEVICE_MODIFY_SYS_IMAGE_GUID: - /* Modify the GUID requires the modification of the GID table */ - /* GUID should be made as READ-ONLY */ - break; - case IB_DEVICE_MODIFY_NODE_DESC: - /* Node Desc should be made as READ-ONLY */ - break; - default: - break; - } - return 0; -} - /* Port */ int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num, struct ib_port_attr *port_attr) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index 31662b1ee35a..23d972da5652 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -145,9 +145,6 @@ struct bnxt_re_ucontext { int bnxt_re_query_device(struct ib_device *ibdev, struct ib_device_attr *ib_attr, struct ib_udata *udata); -int bnxt_re_modify_device(struct ib_device *ibdev, - int device_modify_mask, - struct ib_device_modify *device_modify); int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num, struct ib_port_attr *port_attr); int bnxt_re_get_port_immutable(struct ib_device *ibdev, u8 port_num, diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 30a54f8aa42c..7b914bdd7c13 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -625,7 +625,6 @@ static const struct ib_device_ops bnxt_re_dev_ops = { .map_mr_sg = bnxt_re_map_mr_sg, .mmap = bnxt_re_mmap, .modify_ah = bnxt_re_modify_ah, - .modify_device = bnxt_re_modify_device, .modify_qp = bnxt_re_modify_qp, .modify_srq = bnxt_re_modify_srq, .poll_cq = bnxt_re_poll_cq, -- cgit From 30e0f6cf5acb39cd04316d1eecbf4c6087c7ee02 Mon Sep 17 00:00:00 2001 From: Potnuri Bharat Teja Date: Mon, 30 Sep 2019 13:12:52 +0530 Subject: RDMA/iw_cxgb3: Remove the iw_cxgb3 module from kernel Remove iw_cxgb3 module from kernel as the corresponding HW Chelsio T3 has reached EOL. Link: https://lore.kernel.org/r/20190930074252.20133-1-bharat@chelsio.com Signed-off-by: Potnuri Bharat Teja Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/Makefile | 1 - drivers/infiniband/hw/cxgb3/Kconfig | 19 - drivers/infiniband/hw/cxgb3/Makefile | 7 - drivers/infiniband/hw/cxgb3/cxio_hal.c | 1312 ---------------- drivers/infiniband/hw/cxgb3/cxio_hal.h | 204 --- drivers/infiniband/hw/cxgb3/cxio_resource.c | 344 ---- drivers/infiniband/hw/cxgb3/cxio_resource.h | 69 - drivers/infiniband/hw/cxgb3/cxio_wr.h | 802 ---------- drivers/infiniband/hw/cxgb3/iwch.c | 282 ---- drivers/infiniband/hw/cxgb3/iwch.h | 155 -- drivers/infiniband/hw/cxgb3/iwch_cm.c | 2258 --------------------------- drivers/infiniband/hw/cxgb3/iwch_cm.h | 233 --- drivers/infiniband/hw/cxgb3/iwch_cq.c | 230 --- drivers/infiniband/hw/cxgb3/iwch_ev.c | 232 --- drivers/infiniband/hw/cxgb3/iwch_mem.c | 101 -- drivers/infiniband/hw/cxgb3/iwch_provider.c | 1321 ---------------- drivers/infiniband/hw/cxgb3/iwch_provider.h | 347 ---- drivers/infiniband/hw/cxgb3/iwch_qp.c | 1082 ------------- drivers/infiniband/hw/cxgb3/tcb.h | 632 -------- 19 files changed, 9631 deletions(-) delete mode 100644 drivers/infiniband/hw/cxgb3/Kconfig delete mode 100644 drivers/infiniband/hw/cxgb3/Makefile delete mode 100644 drivers/infiniband/hw/cxgb3/cxio_hal.c delete mode 100644 drivers/infiniband/hw/cxgb3/cxio_hal.h delete mode 100644 drivers/infiniband/hw/cxgb3/cxio_resource.c delete mode 100644 drivers/infiniband/hw/cxgb3/cxio_resource.h delete mode 100644 drivers/infiniband/hw/cxgb3/cxio_wr.h delete mode 100644 drivers/infiniband/hw/cxgb3/iwch.c delete mode 100644 drivers/infiniband/hw/cxgb3/iwch.h delete mode 100644 drivers/infiniband/hw/cxgb3/iwch_cm.c delete mode 100644 drivers/infiniband/hw/cxgb3/iwch_cm.h delete mode 100644 drivers/infiniband/hw/cxgb3/iwch_cq.c delete mode 100644 drivers/infiniband/hw/cxgb3/iwch_ev.c delete mode 100644 drivers/infiniband/hw/cxgb3/iwch_mem.c delete mode 100644 drivers/infiniband/hw/cxgb3/iwch_provider.c delete mode 100644 drivers/infiniband/hw/cxgb3/iwch_provider.h delete mode 100644 drivers/infiniband/hw/cxgb3/iwch_qp.c delete mode 100644 drivers/infiniband/hw/cxgb3/tcb.h (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile index 433fca59febd..0aeccd984889 100644 --- a/drivers/infiniband/hw/Makefile +++ b/drivers/infiniband/hw/Makefile @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_INFINIBAND_MTHCA) += mthca/ obj-$(CONFIG_INFINIBAND_QIB) += qib/ -obj-$(CONFIG_INFINIBAND_CXGB3) += cxgb3/ obj-$(CONFIG_INFINIBAND_CXGB4) += cxgb4/ obj-$(CONFIG_INFINIBAND_EFA) += efa/ obj-$(CONFIG_INFINIBAND_I40IW) += i40iw/ diff --git a/drivers/infiniband/hw/cxgb3/Kconfig b/drivers/infiniband/hw/cxgb3/Kconfig deleted file mode 100644 index 8c1a72bff447..000000000000 --- a/drivers/infiniband/hw/cxgb3/Kconfig +++ /dev/null @@ -1,19 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -config INFINIBAND_CXGB3 - tristate "Chelsio RDMA Driver" - depends on CHELSIO_T3 - select GENERIC_ALLOCATOR - ---help--- - This is an iWARP/RDMA driver for the Chelsio T3 1GbE and - 10GbE adapters. - - For general information about Chelsio and our products, visit - our website at . - - For customer support, please visit our customer support page at - . - - Please send feedback to . - - To compile this driver as a module, choose M here: the module - will be called iw_cxgb3. diff --git a/drivers/infiniband/hw/cxgb3/Makefile b/drivers/infiniband/hw/cxgb3/Makefile deleted file mode 100644 index 34bb86a6ae3a..000000000000 --- a/drivers/infiniband/hw/cxgb3/Makefile +++ /dev/null @@ -1,7 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -ccflags-y := -I $(srctree)/drivers/net/ethernet/chelsio/cxgb3 - -obj-$(CONFIG_INFINIBAND_CXGB3) += iw_cxgb3.o - -iw_cxgb3-y := iwch_cm.o iwch_ev.o iwch_cq.o iwch_qp.o iwch_mem.o \ - iwch_provider.o iwch.o cxio_hal.o cxio_resource.o diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c deleted file mode 100644 index 95b22a651673..000000000000 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.c +++ /dev/null @@ -1,1312 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "cxio_resource.h" -#include "cxio_hal.h" -#include "cxgb3_offload.h" -#include "sge_defs.h" - -static LIST_HEAD(rdev_list); -static cxio_hal_ev_callback_func_t cxio_ev_cb = NULL; - -static struct cxio_rdev *cxio_hal_find_rdev_by_name(char *dev_name) -{ - struct cxio_rdev *rdev; - - list_for_each_entry(rdev, &rdev_list, entry) - if (!strcmp(rdev->dev_name, dev_name)) - return rdev; - return NULL; -} - -static struct cxio_rdev *cxio_hal_find_rdev_by_t3cdev(struct t3cdev *tdev) -{ - struct cxio_rdev *rdev; - - list_for_each_entry(rdev, &rdev_list, entry) - if (rdev->t3cdev_p == tdev) - return rdev; - return NULL; -} - -int cxio_hal_cq_op(struct cxio_rdev *rdev_p, struct t3_cq *cq, - enum t3_cq_opcode op, u32 credit) -{ - int ret; - struct t3_cqe *cqe; - u32 rptr; - - struct rdma_cq_op setup; - setup.id = cq->cqid; - setup.credits = (op == CQ_CREDIT_UPDATE) ? credit : 0; - setup.op = op; - ret = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_OP, &setup); - - if ((ret < 0) || (op == CQ_CREDIT_UPDATE)) - return ret; - - /* - * If the rearm returned an index other than our current index, - * then there might be CQE's in flight (being DMA'd). We must wait - * here for them to complete or the consumer can miss a notification. - */ - if (Q_PTR2IDX((cq->rptr), cq->size_log2) != ret) { - int i=0; - - rptr = cq->rptr; - - /* - * Keep the generation correct by bumping rptr until it - * matches the index returned by the rearm - 1. - */ - while (Q_PTR2IDX((rptr+1), cq->size_log2) != ret) - rptr++; - - /* - * Now rptr is the index for the (last) cqe that was - * in-flight at the time the HW rearmed the CQ. We - * spin until that CQE is valid. - */ - cqe = cq->queue + Q_PTR2IDX(rptr, cq->size_log2); - while (!CQ_VLD_ENTRY(rptr, cq->size_log2, cqe)) { - udelay(1); - if (i++ > 1000000) { - pr_err("%s: stalled rnic\n", rdev_p->dev_name); - return -EIO; - } - } - - return 1; - } - - return 0; -} - -static int cxio_hal_clear_cq_ctx(struct cxio_rdev *rdev_p, u32 cqid) -{ - struct rdma_cq_setup setup; - setup.id = cqid; - setup.base_addr = 0; /* NULL address */ - setup.size = 0; /* disaable the CQ */ - setup.credits = 0; - setup.credit_thres = 0; - setup.ovfl_mode = 0; - return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup)); -} - -static int cxio_hal_clear_qp_ctx(struct cxio_rdev *rdev_p, u32 qpid) -{ - u64 sge_cmd; - struct t3_modify_qp_wr *wqe; - struct sk_buff *skb = alloc_skb(sizeof(*wqe), GFP_KERNEL); - if (!skb) { - pr_debug("%s alloc_skb failed\n", __func__); - return -ENOMEM; - } - wqe = skb_put_zero(skb, sizeof(*wqe)); - build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, - T3_COMPLETION_FLAG | T3_NOTIFY_FLAG, 0, qpid, 7, - T3_SOPEOP); - wqe->flags = cpu_to_be32(MODQP_WRITE_EC); - sge_cmd = qpid << 8 | 3; - wqe->sge_cmd = cpu_to_be64(sge_cmd); - skb->priority = CPL_PRIORITY_CONTROL; - return iwch_cxgb3_ofld_send(rdev_p->t3cdev_p, skb); -} - -int cxio_create_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq, int kernel) -{ - struct rdma_cq_setup setup; - int size = (1UL << (cq->size_log2)) * sizeof(struct t3_cqe); - - size += 1; /* one extra page for storing cq-in-err state */ - cq->cqid = cxio_hal_get_cqid(rdev_p->rscp); - if (!cq->cqid) - return -ENOMEM; - if (kernel) { - cq->sw_queue = kzalloc(size, GFP_KERNEL); - if (!cq->sw_queue) - return -ENOMEM; - } - cq->queue = dma_alloc_coherent(&(rdev_p->rnic_info.pdev->dev), size, - &(cq->dma_addr), GFP_KERNEL); - if (!cq->queue) { - kfree(cq->sw_queue); - return -ENOMEM; - } - dma_unmap_addr_set(cq, mapping, cq->dma_addr); - setup.id = cq->cqid; - setup.base_addr = (u64) (cq->dma_addr); - setup.size = 1UL << cq->size_log2; - setup.credits = 65535; - setup.credit_thres = 1; - if (rdev_p->t3cdev_p->type != T3A) - setup.ovfl_mode = 0; - else - setup.ovfl_mode = 1; - return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup)); -} - -static u32 get_qpid(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx) -{ - struct cxio_qpid_list *entry; - u32 qpid; - int i; - - mutex_lock(&uctx->lock); - if (!list_empty(&uctx->qpids)) { - entry = list_entry(uctx->qpids.next, struct cxio_qpid_list, - entry); - list_del(&entry->entry); - qpid = entry->qpid; - kfree(entry); - } else { - qpid = cxio_hal_get_qpid(rdev_p->rscp); - if (!qpid) - goto out; - for (i = qpid+1; i & rdev_p->qpmask; i++) { - entry = kmalloc(sizeof(*entry), GFP_KERNEL); - if (!entry) - break; - entry->qpid = i; - list_add_tail(&entry->entry, &uctx->qpids); - } - } -out: - mutex_unlock(&uctx->lock); - pr_debug("%s qpid 0x%x\n", __func__, qpid); - return qpid; -} - -static void put_qpid(struct cxio_rdev *rdev_p, u32 qpid, - struct cxio_ucontext *uctx) -{ - struct cxio_qpid_list *entry; - - entry = kmalloc(sizeof(*entry), GFP_KERNEL); - if (!entry) - return; - pr_debug("%s qpid 0x%x\n", __func__, qpid); - entry->qpid = qpid; - mutex_lock(&uctx->lock); - list_add_tail(&entry->entry, &uctx->qpids); - mutex_unlock(&uctx->lock); -} - -void cxio_release_ucontext(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx) -{ - struct list_head *pos, *nxt; - struct cxio_qpid_list *entry; - - mutex_lock(&uctx->lock); - list_for_each_safe(pos, nxt, &uctx->qpids) { - entry = list_entry(pos, struct cxio_qpid_list, entry); - list_del_init(&entry->entry); - if (!(entry->qpid & rdev_p->qpmask)) - cxio_hal_put_qpid(rdev_p->rscp, entry->qpid); - kfree(entry); - } - mutex_unlock(&uctx->lock); -} - -void cxio_init_ucontext(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx) -{ - INIT_LIST_HEAD(&uctx->qpids); - mutex_init(&uctx->lock); -} - -int cxio_create_qp(struct cxio_rdev *rdev_p, u32 kernel_domain, - struct t3_wq *wq, struct cxio_ucontext *uctx) -{ - int depth = 1UL << wq->size_log2; - int rqsize = 1UL << wq->rq_size_log2; - - wq->qpid = get_qpid(rdev_p, uctx); - if (!wq->qpid) - return -ENOMEM; - - wq->rq = kcalloc(depth, sizeof(struct t3_swrq), GFP_KERNEL); - if (!wq->rq) - goto err1; - - wq->rq_addr = cxio_hal_rqtpool_alloc(rdev_p, rqsize); - if (!wq->rq_addr) - goto err2; - - wq->sq = kcalloc(depth, sizeof(struct t3_swsq), GFP_KERNEL); - if (!wq->sq) - goto err3; - - wq->queue = dma_alloc_coherent(&(rdev_p->rnic_info.pdev->dev), - depth * sizeof(union t3_wr), - &(wq->dma_addr), GFP_KERNEL); - if (!wq->queue) - goto err4; - - dma_unmap_addr_set(wq, mapping, wq->dma_addr); - wq->doorbell = (void __iomem *)rdev_p->rnic_info.kdb_addr; - if (!kernel_domain) - wq->udb = (u64)rdev_p->rnic_info.udbell_physbase + - (wq->qpid << rdev_p->qpshift); - wq->rdev = rdev_p; - pr_debug("%s qpid 0x%x doorbell 0x%p udb 0x%llx\n", - __func__, wq->qpid, wq->doorbell, (unsigned long long)wq->udb); - return 0; -err4: - kfree(wq->sq); -err3: - cxio_hal_rqtpool_free(rdev_p, wq->rq_addr, rqsize); -err2: - kfree(wq->rq); -err1: - put_qpid(rdev_p, wq->qpid, uctx); - return -ENOMEM; -} - -void cxio_destroy_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq) -{ - cxio_hal_clear_cq_ctx(rdev_p, cq->cqid); - kfree(cq->sw_queue); - dma_free_coherent(&(rdev_p->rnic_info.pdev->dev), - (1UL << (cq->size_log2)) - * sizeof(struct t3_cqe) + 1, cq->queue, - dma_unmap_addr(cq, mapping)); - cxio_hal_put_cqid(rdev_p->rscp, cq->cqid); -} - -int cxio_destroy_qp(struct cxio_rdev *rdev_p, struct t3_wq *wq, - struct cxio_ucontext *uctx) -{ - dma_free_coherent(&(rdev_p->rnic_info.pdev->dev), - (1UL << (wq->size_log2)) - * sizeof(union t3_wr), wq->queue, - dma_unmap_addr(wq, mapping)); - kfree(wq->sq); - cxio_hal_rqtpool_free(rdev_p, wq->rq_addr, (1UL << wq->rq_size_log2)); - kfree(wq->rq); - put_qpid(rdev_p, wq->qpid, uctx); - return 0; -} - -static void insert_recv_cqe(struct t3_wq *wq, struct t3_cq *cq) -{ - struct t3_cqe cqe; - - pr_debug("%s wq %p cq %p sw_rptr 0x%x sw_wptr 0x%x\n", __func__, - wq, cq, cq->sw_rptr, cq->sw_wptr); - memset(&cqe, 0, sizeof(cqe)); - cqe.header = cpu_to_be32(V_CQE_STATUS(TPT_ERR_SWFLUSH) | - V_CQE_OPCODE(T3_SEND) | - V_CQE_TYPE(0) | - V_CQE_SWCQE(1) | - V_CQE_QPID(wq->qpid) | - V_CQE_GENBIT(Q_GENBIT(cq->sw_wptr, - cq->size_log2))); - *(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2)) = cqe; - cq->sw_wptr++; -} - -int cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count) -{ - u32 ptr; - int flushed = 0; - - pr_debug("%s wq %p cq %p\n", __func__, wq, cq); - - /* flush RQ */ - pr_debug("%s rq_rptr %u rq_wptr %u skip count %u\n", __func__, - wq->rq_rptr, wq->rq_wptr, count); - ptr = wq->rq_rptr + count; - while (ptr++ != wq->rq_wptr) { - insert_recv_cqe(wq, cq); - flushed++; - } - return flushed; -} - -static void insert_sq_cqe(struct t3_wq *wq, struct t3_cq *cq, - struct t3_swsq *sqp) -{ - struct t3_cqe cqe; - - pr_debug("%s wq %p cq %p sw_rptr 0x%x sw_wptr 0x%x\n", __func__, - wq, cq, cq->sw_rptr, cq->sw_wptr); - memset(&cqe, 0, sizeof(cqe)); - cqe.header = cpu_to_be32(V_CQE_STATUS(TPT_ERR_SWFLUSH) | - V_CQE_OPCODE(sqp->opcode) | - V_CQE_TYPE(1) | - V_CQE_SWCQE(1) | - V_CQE_QPID(wq->qpid) | - V_CQE_GENBIT(Q_GENBIT(cq->sw_wptr, - cq->size_log2))); - cqe.u.scqe.wrid_hi = sqp->sq_wptr; - - *(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2)) = cqe; - cq->sw_wptr++; -} - -int cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count) -{ - __u32 ptr = wq->sq_rptr + count; - int flushed = 0; - struct t3_swsq *sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2); - - while (ptr != wq->sq_wptr) { - sqp->signaled = 0; - insert_sq_cqe(wq, cq, sqp); - ptr++; - sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2); - flushed++; - } - return flushed; -} - -/* - * Move all CQEs from the HWCQ into the SWCQ. - */ -void cxio_flush_hw_cq(struct t3_cq *cq) -{ - struct t3_cqe *cqe, *swcqe; - - pr_debug("%s cq %p cqid 0x%x\n", __func__, cq, cq->cqid); - cqe = cxio_next_hw_cqe(cq); - while (cqe) { - pr_debug("%s flushing hwcq rptr 0x%x to swcq wptr 0x%x\n", - __func__, cq->rptr, cq->sw_wptr); - swcqe = cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2); - *swcqe = *cqe; - swcqe->header |= cpu_to_be32(V_CQE_SWCQE(1)); - cq->sw_wptr++; - cq->rptr++; - cqe = cxio_next_hw_cqe(cq); - } -} - -static int cqe_completes_wr(struct t3_cqe *cqe, struct t3_wq *wq) -{ - if (CQE_OPCODE(*cqe) == T3_TERMINATE) - return 0; - - if ((CQE_OPCODE(*cqe) == T3_RDMA_WRITE) && RQ_TYPE(*cqe)) - return 0; - - if ((CQE_OPCODE(*cqe) == T3_READ_RESP) && SQ_TYPE(*cqe)) - return 0; - - if (CQE_SEND_OPCODE(*cqe) && RQ_TYPE(*cqe) && - Q_EMPTY(wq->rq_rptr, wq->rq_wptr)) - return 0; - - return 1; -} - -void cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count) -{ - struct t3_cqe *cqe; - u32 ptr; - - *count = 0; - ptr = cq->sw_rptr; - while (!Q_EMPTY(ptr, cq->sw_wptr)) { - cqe = cq->sw_queue + (Q_PTR2IDX(ptr, cq->size_log2)); - if ((SQ_TYPE(*cqe) || - ((CQE_OPCODE(*cqe) == T3_READ_RESP) && wq->oldest_read)) && - (CQE_QPID(*cqe) == wq->qpid)) - (*count)++; - ptr++; - } - pr_debug("%s cq %p count %d\n", __func__, cq, *count); -} - -void cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count) -{ - struct t3_cqe *cqe; - u32 ptr; - - *count = 0; - pr_debug("%s count zero %d\n", __func__, *count); - ptr = cq->sw_rptr; - while (!Q_EMPTY(ptr, cq->sw_wptr)) { - cqe = cq->sw_queue + (Q_PTR2IDX(ptr, cq->size_log2)); - if (RQ_TYPE(*cqe) && (CQE_OPCODE(*cqe) != T3_READ_RESP) && - (CQE_QPID(*cqe) == wq->qpid) && cqe_completes_wr(cqe, wq)) - (*count)++; - ptr++; - } - pr_debug("%s cq %p count %d\n", __func__, cq, *count); -} - -static int cxio_hal_init_ctrl_cq(struct cxio_rdev *rdev_p) -{ - struct rdma_cq_setup setup; - setup.id = 0; - setup.base_addr = 0; /* NULL address */ - setup.size = 1; /* enable the CQ */ - setup.credits = 0; - - /* force SGE to redirect to RspQ and interrupt */ - setup.credit_thres = 0; - setup.ovfl_mode = 1; - return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup)); -} - -static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p) -{ - int err; - u64 sge_cmd, ctx0, ctx1; - u64 base_addr; - struct t3_modify_qp_wr *wqe; - struct sk_buff *skb; - - skb = alloc_skb(sizeof(*wqe), GFP_KERNEL); - if (!skb) { - pr_debug("%s alloc_skb failed\n", __func__); - return -ENOMEM; - } - err = cxio_hal_init_ctrl_cq(rdev_p); - if (err) { - pr_debug("%s err %d initializing ctrl_cq\n", __func__, err); - goto err; - } - rdev_p->ctrl_qp.workq = dma_alloc_coherent( - &(rdev_p->rnic_info.pdev->dev), - (1 << T3_CTRL_QP_SIZE_LOG2) * - sizeof(union t3_wr), - &(rdev_p->ctrl_qp.dma_addr), - GFP_KERNEL); - if (!rdev_p->ctrl_qp.workq) { - pr_debug("%s dma_alloc_coherent failed\n", __func__); - err = -ENOMEM; - goto err; - } - dma_unmap_addr_set(&rdev_p->ctrl_qp, mapping, - rdev_p->ctrl_qp.dma_addr); - rdev_p->ctrl_qp.doorbell = (void __iomem *)rdev_p->rnic_info.kdb_addr; - - mutex_init(&rdev_p->ctrl_qp.lock); - init_waitqueue_head(&rdev_p->ctrl_qp.waitq); - - /* update HW Ctrl QP context */ - base_addr = rdev_p->ctrl_qp.dma_addr; - base_addr >>= 12; - ctx0 = (V_EC_SIZE((1 << T3_CTRL_QP_SIZE_LOG2)) | - V_EC_BASE_LO((u32) base_addr & 0xffff)); - ctx0 <<= 32; - ctx0 |= V_EC_CREDITS(FW_WR_NUM); - base_addr >>= 16; - ctx1 = (u32) base_addr; - base_addr >>= 32; - ctx1 |= ((u64) (V_EC_BASE_HI((u32) base_addr & 0xf) | V_EC_RESPQ(0) | - V_EC_TYPE(0) | V_EC_GEN(1) | - V_EC_UP_TOKEN(T3_CTL_QP_TID) | F_EC_VALID)) << 32; - wqe = skb_put_zero(skb, sizeof(*wqe)); - build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 0, 0, - T3_CTL_QP_TID, 7, T3_SOPEOP); - wqe->flags = cpu_to_be32(MODQP_WRITE_EC); - sge_cmd = (3ULL << 56) | FW_RI_SGEEC_START << 8 | 3; - wqe->sge_cmd = cpu_to_be64(sge_cmd); - wqe->ctx1 = cpu_to_be64(ctx1); - wqe->ctx0 = cpu_to_be64(ctx0); - pr_debug("CtrlQP dma_addr %pad workq %p size %d\n", - &rdev_p->ctrl_qp.dma_addr, rdev_p->ctrl_qp.workq, - 1 << T3_CTRL_QP_SIZE_LOG2); - skb->priority = CPL_PRIORITY_CONTROL; - return iwch_cxgb3_ofld_send(rdev_p->t3cdev_p, skb); -err: - kfree_skb(skb); - return err; -} - -static int cxio_hal_destroy_ctrl_qp(struct cxio_rdev *rdev_p) -{ - dma_free_coherent(&(rdev_p->rnic_info.pdev->dev), - (1UL << T3_CTRL_QP_SIZE_LOG2) - * sizeof(union t3_wr), rdev_p->ctrl_qp.workq, - dma_unmap_addr(&rdev_p->ctrl_qp, mapping)); - return cxio_hal_clear_qp_ctx(rdev_p, T3_CTRL_QP_ID); -} - -/* write len bytes of data into addr (32B aligned address) - * If data is NULL, clear len byte of memory to zero. - * caller acquires the ctrl_qp lock before the call - */ -static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr, - u32 len, void *data) -{ - u32 i, nr_wqe, copy_len; - u8 *copy_data; - u8 wr_len, utx_len; /* length in 8 byte flit */ - enum t3_wr_flags flag; - __be64 *wqe; - u64 utx_cmd; - addr &= 0x7FFFFFF; - nr_wqe = len % 96 ? len / 96 + 1 : len / 96; /* 96B max per WQE */ - pr_debug("%s wptr 0x%x rptr 0x%x len %d, nr_wqe %d data %p addr 0x%0x\n", - __func__, rdev_p->ctrl_qp.wptr, rdev_p->ctrl_qp.rptr, len, - nr_wqe, data, addr); - utx_len = 3; /* in 32B unit */ - for (i = 0; i < nr_wqe; i++) { - if (Q_FULL(rdev_p->ctrl_qp.rptr, rdev_p->ctrl_qp.wptr, - T3_CTRL_QP_SIZE_LOG2)) { - pr_debug("%s ctrl_qp full wtpr 0x%0x rptr 0x%0x, wait for more space i %d\n", - __func__, - rdev_p->ctrl_qp.wptr, rdev_p->ctrl_qp.rptr, i); - if (wait_event_interruptible(rdev_p->ctrl_qp.waitq, - !Q_FULL(rdev_p->ctrl_qp.rptr, - rdev_p->ctrl_qp.wptr, - T3_CTRL_QP_SIZE_LOG2))) { - pr_debug("%s ctrl_qp workq interrupted\n", - __func__); - return -ERESTARTSYS; - } - pr_debug("%s ctrl_qp wakeup, continue posting work request i %d\n", - __func__, i); - } - wqe = (__be64 *)(rdev_p->ctrl_qp.workq + (rdev_p->ctrl_qp.wptr % - (1 << T3_CTRL_QP_SIZE_LOG2))); - flag = 0; - if (i == (nr_wqe - 1)) { - /* last WQE */ - flag = T3_COMPLETION_FLAG; - if (len % 32) - utx_len = len / 32 + 1; - else - utx_len = len / 32; - } - - /* - * Force a CQE to return the credit to the workq in case - * we posted more than half the max QP size of WRs - */ - if ((i != 0) && - (i % (((1 << T3_CTRL_QP_SIZE_LOG2)) >> 1) == 0)) { - flag = T3_COMPLETION_FLAG; - pr_debug("%s force completion at i %d\n", __func__, i); - } - - /* build the utx mem command */ - wqe += (sizeof(struct t3_bypass_wr) >> 3); - utx_cmd = (T3_UTX_MEM_WRITE << 28) | (addr + i * 3); - utx_cmd <<= 32; - utx_cmd |= (utx_len << 28) | ((utx_len << 2) + 1); - *wqe = cpu_to_be64(utx_cmd); - wqe++; - copy_data = (u8 *) data + i * 96; - copy_len = len > 96 ? 96 : len; - - /* clear memory content if data is NULL */ - if (data) - memcpy(wqe, copy_data, copy_len); - else - memset(wqe, 0, copy_len); - if (copy_len % 32) - memset(((u8 *) wqe) + copy_len, 0, - 32 - (copy_len % 32)); - wr_len = ((sizeof(struct t3_bypass_wr)) >> 3) + 1 + - (utx_len << 2); - wqe = (__be64 *)(rdev_p->ctrl_qp.workq + (rdev_p->ctrl_qp.wptr % - (1 << T3_CTRL_QP_SIZE_LOG2))); - - /* wptr in the WRID[31:0] */ - ((union t3_wrid *)(wqe+1))->id0.low = rdev_p->ctrl_qp.wptr; - - /* - * This must be the last write with a memory barrier - * for the genbit - */ - build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_BP, flag, - Q_GENBIT(rdev_p->ctrl_qp.wptr, - T3_CTRL_QP_SIZE_LOG2), T3_CTRL_QP_ID, - wr_len, T3_SOPEOP); - if (flag == T3_COMPLETION_FLAG) - ring_doorbell(rdev_p->ctrl_qp.doorbell, T3_CTRL_QP_ID); - len -= 96; - rdev_p->ctrl_qp.wptr++; - } - return 0; -} - -/* IN: stag key, pdid, perm, zbva, to, len, page_size, pbl_size and pbl_addr - * OUT: stag index - * TBD: shared memory region support - */ -static int __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry, - u32 *stag, u8 stag_state, u32 pdid, - enum tpt_mem_type type, enum tpt_mem_perm perm, - u32 zbva, u64 to, u32 len, u8 page_size, - u32 pbl_size, u32 pbl_addr) -{ - int err; - struct tpt_entry tpt; - u32 stag_idx; - u32 wptr; - - if (cxio_fatal_error(rdev_p)) - return -EIO; - - stag_state = stag_state > 0; - stag_idx = (*stag) >> 8; - - if ((!reset_tpt_entry) && !(*stag != T3_STAG_UNSET)) { - stag_idx = cxio_hal_get_stag(rdev_p->rscp); - if (!stag_idx) - return -ENOMEM; - *stag = (stag_idx << 8) | ((*stag) & 0xFF); - } - pr_debug("%s stag_state 0x%0x type 0x%0x pdid 0x%0x, stag_idx 0x%x\n", - __func__, stag_state, type, pdid, stag_idx); - - mutex_lock(&rdev_p->ctrl_qp.lock); - - /* write TPT entry */ - if (reset_tpt_entry) - memset(&tpt, 0, sizeof(tpt)); - else { - tpt.valid_stag_pdid = cpu_to_be32(F_TPT_VALID | - V_TPT_STAG_KEY((*stag) & M_TPT_STAG_KEY) | - V_TPT_STAG_STATE(stag_state) | - V_TPT_STAG_TYPE(type) | V_TPT_PDID(pdid)); - BUG_ON(page_size >= 28); - tpt.flags_pagesize_qpid = cpu_to_be32(V_TPT_PERM(perm) | - ((perm & TPT_MW_BIND) ? F_TPT_MW_BIND_ENABLE : 0) | - V_TPT_ADDR_TYPE((zbva ? TPT_ZBTO : TPT_VATO)) | - V_TPT_PAGE_SIZE(page_size)); - tpt.rsvd_pbl_addr = cpu_to_be32(V_TPT_PBL_ADDR(PBL_OFF(rdev_p, pbl_addr)>>3)); - tpt.len = cpu_to_be32(len); - tpt.va_hi = cpu_to_be32((u32) (to >> 32)); - tpt.va_low_or_fbo = cpu_to_be32((u32) (to & 0xFFFFFFFFULL)); - tpt.rsvd_bind_cnt_or_pstag = 0; - tpt.rsvd_pbl_size = cpu_to_be32(V_TPT_PBL_SIZE(pbl_size >> 2)); - } - err = cxio_hal_ctrl_qp_write_mem(rdev_p, - stag_idx + - (rdev_p->rnic_info.tpt_base >> 5), - sizeof(tpt), &tpt); - - /* release the stag index to free pool */ - if (reset_tpt_entry) - cxio_hal_put_stag(rdev_p->rscp, stag_idx); - - wptr = rdev_p->ctrl_qp.wptr; - mutex_unlock(&rdev_p->ctrl_qp.lock); - if (!err) - if (wait_event_interruptible(rdev_p->ctrl_qp.waitq, - SEQ32_GE(rdev_p->ctrl_qp.rptr, - wptr))) - return -ERESTARTSYS; - return err; -} - -int cxio_write_pbl(struct cxio_rdev *rdev_p, __be64 *pbl, - u32 pbl_addr, u32 pbl_size) -{ - u32 wptr; - int err; - - pr_debug("%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d\n", - __func__, pbl_addr, rdev_p->rnic_info.pbl_base, - pbl_size); - - mutex_lock(&rdev_p->ctrl_qp.lock); - err = cxio_hal_ctrl_qp_write_mem(rdev_p, pbl_addr >> 5, pbl_size << 3, - pbl); - wptr = rdev_p->ctrl_qp.wptr; - mutex_unlock(&rdev_p->ctrl_qp.lock); - if (err) - return err; - - if (wait_event_interruptible(rdev_p->ctrl_qp.waitq, - SEQ32_GE(rdev_p->ctrl_qp.rptr, - wptr))) - return -ERESTARTSYS; - - return 0; -} - -int cxio_register_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid, - enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len, - u8 page_size, u32 pbl_size, u32 pbl_addr) -{ - *stag = T3_STAG_UNSET; - return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm, - zbva, to, len, page_size, pbl_size, pbl_addr); -} - -int cxio_reregister_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid, - enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len, - u8 page_size, u32 pbl_size, u32 pbl_addr) -{ - return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm, - zbva, to, len, page_size, pbl_size, pbl_addr); -} - -int cxio_dereg_mem(struct cxio_rdev *rdev_p, u32 stag, u32 pbl_size, - u32 pbl_addr) -{ - return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0, - pbl_size, pbl_addr); -} - -int cxio_allocate_window(struct cxio_rdev *rdev_p, u32 * stag, u32 pdid) -{ - *stag = T3_STAG_UNSET; - return __cxio_tpt_op(rdev_p, 0, stag, 0, pdid, TPT_MW, 0, 0, 0ULL, 0, 0, - 0, 0); -} - -int cxio_deallocate_window(struct cxio_rdev *rdev_p, u32 stag) -{ - return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0, - 0, 0); -} - -int cxio_allocate_stag(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid, u32 pbl_size, u32 pbl_addr) -{ - *stag = T3_STAG_UNSET; - return __cxio_tpt_op(rdev_p, 0, stag, 0, pdid, TPT_NON_SHARED_MR, - 0, 0, 0ULL, 0, 0, pbl_size, pbl_addr); -} - -int cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr) -{ - struct t3_rdma_init_wr *wqe; - struct sk_buff *skb = alloc_skb(sizeof(*wqe), GFP_ATOMIC); - if (!skb) - return -ENOMEM; - pr_debug("%s rdev_p %p\n", __func__, rdev_p); - wqe = __skb_put(skb, sizeof(*wqe)); - wqe->wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_INIT)); - wqe->wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(attr->tid) | - V_FW_RIWR_LEN(sizeof(*wqe) >> 3)); - wqe->wrid.id1 = 0; - wqe->qpid = cpu_to_be32(attr->qpid); - wqe->pdid = cpu_to_be32(attr->pdid); - wqe->scqid = cpu_to_be32(attr->scqid); - wqe->rcqid = cpu_to_be32(attr->rcqid); - wqe->rq_addr = cpu_to_be32(attr->rq_addr - rdev_p->rnic_info.rqt_base); - wqe->rq_size = cpu_to_be32(attr->rq_size); - wqe->mpaattrs = attr->mpaattrs; - wqe->qpcaps = attr->qpcaps; - wqe->ulpdu_size = cpu_to_be16(attr->tcp_emss); - wqe->rqe_count = cpu_to_be16(attr->rqe_count); - wqe->flags_rtr_type = cpu_to_be16(attr->flags | - V_RTR_TYPE(attr->rtr_type) | - V_CHAN(attr->chan)); - wqe->ord = cpu_to_be32(attr->ord); - wqe->ird = cpu_to_be32(attr->ird); - wqe->qp_dma_addr = cpu_to_be64(attr->qp_dma_addr); - wqe->qp_dma_size = cpu_to_be32(attr->qp_dma_size); - wqe->irs = cpu_to_be32(attr->irs); - skb->priority = 0; /* 0=>ToeQ; 1=>CtrlQ */ - return iwch_cxgb3_ofld_send(rdev_p->t3cdev_p, skb); -} - -void cxio_register_ev_cb(cxio_hal_ev_callback_func_t ev_cb) -{ - cxio_ev_cb = ev_cb; -} - -void cxio_unregister_ev_cb(cxio_hal_ev_callback_func_t ev_cb) -{ - cxio_ev_cb = NULL; -} - -static int cxio_hal_ev_handler(struct t3cdev *t3cdev_p, struct sk_buff *skb) -{ - static int cnt; - struct cxio_rdev *rdev_p = NULL; - struct respQ_msg_t *rsp_msg = (struct respQ_msg_t *) skb->data; - pr_debug("%d: %s cq_id 0x%x cq_ptr 0x%x genbit %0x overflow %0x an %0x se %0x notify %0x cqbranch %0x creditth %0x\n", - cnt, __func__, RSPQ_CQID(rsp_msg), RSPQ_CQPTR(rsp_msg), - RSPQ_GENBIT(rsp_msg), RSPQ_OVERFLOW(rsp_msg), RSPQ_AN(rsp_msg), - RSPQ_SE(rsp_msg), RSPQ_NOTIFY(rsp_msg), RSPQ_CQBRANCH(rsp_msg), - RSPQ_CREDIT_THRESH(rsp_msg)); - pr_debug("CQE: QPID 0x%0x genbit %0x type 0x%0x status 0x%0x opcode %d len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n", - CQE_QPID(rsp_msg->cqe), CQE_GENBIT(rsp_msg->cqe), - CQE_TYPE(rsp_msg->cqe), CQE_STATUS(rsp_msg->cqe), - CQE_OPCODE(rsp_msg->cqe), CQE_LEN(rsp_msg->cqe), - CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe)); - rdev_p = (struct cxio_rdev *)t3cdev_p->ulp; - if (!rdev_p) { - pr_debug("%s called by t3cdev %p with null ulp\n", __func__, - t3cdev_p); - return 0; - } - if (CQE_QPID(rsp_msg->cqe) == T3_CTRL_QP_ID) { - rdev_p->ctrl_qp.rptr = CQE_WRID_LOW(rsp_msg->cqe) + 1; - wake_up_interruptible(&rdev_p->ctrl_qp.waitq); - dev_kfree_skb_irq(skb); - } else if (CQE_QPID(rsp_msg->cqe) == 0xfff8) - dev_kfree_skb_irq(skb); - else if (cxio_ev_cb) - (*cxio_ev_cb) (rdev_p, skb); - else - dev_kfree_skb_irq(skb); - cnt++; - return 0; -} - -/* Caller takes care of locking if needed */ -int cxio_rdev_open(struct cxio_rdev *rdev_p) -{ - struct net_device *netdev_p = NULL; - int err = 0; - if (strlen(rdev_p->dev_name)) { - if (cxio_hal_find_rdev_by_name(rdev_p->dev_name)) { - return -EBUSY; - } - netdev_p = dev_get_by_name(&init_net, rdev_p->dev_name); - if (!netdev_p) { - return -EINVAL; - } - dev_put(netdev_p); - } else if (rdev_p->t3cdev_p) { - if (cxio_hal_find_rdev_by_t3cdev(rdev_p->t3cdev_p)) { - return -EBUSY; - } - netdev_p = rdev_p->t3cdev_p->lldev; - strncpy(rdev_p->dev_name, rdev_p->t3cdev_p->name, - T3_MAX_DEV_NAME_LEN); - } else { - pr_debug("%s t3cdev_p or dev_name must be set\n", __func__); - return -EINVAL; - } - - list_add_tail(&rdev_p->entry, &rdev_list); - - pr_debug("%s opening rnic dev %s\n", __func__, rdev_p->dev_name); - memset(&rdev_p->ctrl_qp, 0, sizeof(rdev_p->ctrl_qp)); - if (!rdev_p->t3cdev_p) - rdev_p->t3cdev_p = dev2t3cdev(netdev_p); - rdev_p->t3cdev_p->ulp = (void *) rdev_p; - - err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, GET_EMBEDDED_INFO, - &(rdev_p->fw_info)); - if (err) { - pr_err("%s t3cdev_p(%p)->ctl returned error %d\n", - __func__, rdev_p->t3cdev_p, err); - goto err1; - } - if (G_FW_VERSION_MAJOR(rdev_p->fw_info.fw_vers) != CXIO_FW_MAJ) { - pr_err("fatal firmware version mismatch: need version %u but adapter has version %u\n", - CXIO_FW_MAJ, - G_FW_VERSION_MAJOR(rdev_p->fw_info.fw_vers)); - err = -EINVAL; - goto err1; - } - - err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_GET_PARAMS, - &(rdev_p->rnic_info)); - if (err) { - pr_err("%s t3cdev_p(%p)->ctl returned error %d\n", - __func__, rdev_p->t3cdev_p, err); - goto err1; - } - err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, GET_PORTS, - &(rdev_p->port_info)); - if (err) { - pr_err("%s t3cdev_p(%p)->ctl returned error %d\n", - __func__, rdev_p->t3cdev_p, err); - goto err1; - } - - /* - * qpshift is the number of bits to shift the qpid left in order - * to get the correct address of the doorbell for that qp. - */ - cxio_init_ucontext(rdev_p, &rdev_p->uctx); - rdev_p->qpshift = PAGE_SHIFT - - ilog2(65536 >> - ilog2(rdev_p->rnic_info.udbell_len >> - PAGE_SHIFT)); - rdev_p->qpnr = rdev_p->rnic_info.udbell_len >> PAGE_SHIFT; - rdev_p->qpmask = (65536 >> ilog2(rdev_p->qpnr)) - 1; - pr_debug("%s rnic %s info: tpt_base 0x%0x tpt_top 0x%0x num stags %d pbl_base 0x%0x pbl_top 0x%0x rqt_base 0x%0x, rqt_top 0x%0x\n", - __func__, rdev_p->dev_name, rdev_p->rnic_info.tpt_base, - rdev_p->rnic_info.tpt_top, cxio_num_stags(rdev_p), - rdev_p->rnic_info.pbl_base, - rdev_p->rnic_info.pbl_top, rdev_p->rnic_info.rqt_base, - rdev_p->rnic_info.rqt_top); - pr_debug("udbell_len 0x%0x udbell_physbase 0x%lx kdb_addr %p qpshift %lu qpnr %d qpmask 0x%x\n", - rdev_p->rnic_info.udbell_len, - rdev_p->rnic_info.udbell_physbase, rdev_p->rnic_info.kdb_addr, - rdev_p->qpshift, rdev_p->qpnr, rdev_p->qpmask); - - err = cxio_hal_init_ctrl_qp(rdev_p); - if (err) { - pr_err("%s error %d initializing ctrl_qp\n", __func__, err); - goto err1; - } - err = cxio_hal_init_resource(rdev_p, cxio_num_stags(rdev_p), 0, - 0, T3_MAX_NUM_QP, T3_MAX_NUM_CQ, - T3_MAX_NUM_PD); - if (err) { - pr_err("%s error %d initializing hal resources\n", - __func__, err); - goto err2; - } - err = cxio_hal_pblpool_create(rdev_p); - if (err) { - pr_err("%s error %d initializing pbl mem pool\n", - __func__, err); - goto err3; - } - err = cxio_hal_rqtpool_create(rdev_p); - if (err) { - pr_err("%s error %d initializing rqt mem pool\n", - __func__, err); - goto err4; - } - return 0; -err4: - cxio_hal_pblpool_destroy(rdev_p); -err3: - cxio_hal_destroy_resource(rdev_p->rscp); -err2: - cxio_hal_destroy_ctrl_qp(rdev_p); -err1: - rdev_p->t3cdev_p->ulp = NULL; - list_del(&rdev_p->entry); - return err; -} - -void cxio_rdev_close(struct cxio_rdev *rdev_p) -{ - if (rdev_p) { - cxio_hal_pblpool_destroy(rdev_p); - cxio_hal_rqtpool_destroy(rdev_p); - list_del(&rdev_p->entry); - cxio_hal_destroy_ctrl_qp(rdev_p); - cxio_hal_destroy_resource(rdev_p->rscp); - rdev_p->t3cdev_p->ulp = NULL; - } -} - -int __init cxio_hal_init(void) -{ - if (cxio_hal_init_rhdl_resource(T3_MAX_NUM_RI)) - return -ENOMEM; - t3_register_cpl_handler(CPL_ASYNC_NOTIF, cxio_hal_ev_handler); - return 0; -} - -void __exit cxio_hal_exit(void) -{ - struct cxio_rdev *rdev, *tmp; - - t3_register_cpl_handler(CPL_ASYNC_NOTIF, NULL); - list_for_each_entry_safe(rdev, tmp, &rdev_list, entry) - cxio_rdev_close(rdev); - cxio_hal_destroy_rhdl_resource(); -} - -static void flush_completed_wrs(struct t3_wq *wq, struct t3_cq *cq) -{ - struct t3_swsq *sqp; - __u32 ptr = wq->sq_rptr; - int count = Q_COUNT(wq->sq_rptr, wq->sq_wptr); - - sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2); - while (count--) - if (!sqp->signaled) { - ptr++; - sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2); - } else if (sqp->complete) { - - /* - * Insert this completed cqe into the swcq. - */ - pr_debug("%s moving cqe into swcq sq idx %ld cq idx %ld\n", - __func__, Q_PTR2IDX(ptr, wq->sq_size_log2), - Q_PTR2IDX(cq->sw_wptr, cq->size_log2)); - sqp->cqe.header |= htonl(V_CQE_SWCQE(1)); - *(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2)) - = sqp->cqe; - cq->sw_wptr++; - sqp->signaled = 0; - break; - } else - break; -} - -static void create_read_req_cqe(struct t3_wq *wq, struct t3_cqe *hw_cqe, - struct t3_cqe *read_cqe) -{ - read_cqe->u.scqe.wrid_hi = wq->oldest_read->sq_wptr; - read_cqe->len = wq->oldest_read->read_len; - read_cqe->header = htonl(V_CQE_QPID(CQE_QPID(*hw_cqe)) | - V_CQE_SWCQE(SW_CQE(*hw_cqe)) | - V_CQE_OPCODE(T3_READ_REQ) | - V_CQE_TYPE(1)); -} - -/* - * Return a ptr to the next read wr in the SWSQ or NULL. - */ -static void advance_oldest_read(struct t3_wq *wq) -{ - - u32 rptr = wq->oldest_read - wq->sq + 1; - u32 wptr = Q_PTR2IDX(wq->sq_wptr, wq->sq_size_log2); - - while (Q_PTR2IDX(rptr, wq->sq_size_log2) != wptr) { - wq->oldest_read = wq->sq + Q_PTR2IDX(rptr, wq->sq_size_log2); - - if (wq->oldest_read->opcode == T3_READ_REQ) - return; - rptr++; - } - wq->oldest_read = NULL; -} - -/* - * cxio_poll_cq - * - * Caller must: - * check the validity of the first CQE, - * supply the wq assicated with the qpid. - * - * credit: cq credit to return to sge. - * cqe_flushed: 1 iff the CQE is flushed. - * cqe: copy of the polled CQE. - * - * return value: - * 0 CQE returned, - * -1 CQE skipped, try again. - */ -int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe, - u8 *cqe_flushed, u64 *cookie, u32 *credit) -{ - int ret = 0; - struct t3_cqe *hw_cqe, read_cqe; - - *cqe_flushed = 0; - *credit = 0; - hw_cqe = cxio_next_cqe(cq); - - pr_debug("%s CQE OOO %d qpid 0x%0x genbit %d type %d status 0x%0x opcode 0x%0x len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n", - __func__, CQE_OOO(*hw_cqe), CQE_QPID(*hw_cqe), - CQE_GENBIT(*hw_cqe), CQE_TYPE(*hw_cqe), CQE_STATUS(*hw_cqe), - CQE_OPCODE(*hw_cqe), CQE_LEN(*hw_cqe), CQE_WRID_HI(*hw_cqe), - CQE_WRID_LOW(*hw_cqe)); - - /* - * skip cqe's not affiliated with a QP. - */ - if (wq == NULL) { - ret = -1; - goto skip_cqe; - } - - /* - * Gotta tweak READ completions: - * 1) the cqe doesn't contain the sq_wptr from the wr. - * 2) opcode not reflected from the wr. - * 3) read_len not reflected from the wr. - * 4) cq_type is RQ_TYPE not SQ_TYPE. - */ - if (RQ_TYPE(*hw_cqe) && (CQE_OPCODE(*hw_cqe) == T3_READ_RESP)) { - - /* - * If this is an unsolicited read response, then the read - * was generated by the kernel driver as part of peer-2-peer - * connection setup. So ignore the completion. - */ - if (!wq->oldest_read) { - if (CQE_STATUS(*hw_cqe)) - wq->error = 1; - ret = -1; - goto skip_cqe; - } - - /* - * Don't write to the HWCQ, so create a new read req CQE - * in local memory. - */ - create_read_req_cqe(wq, hw_cqe, &read_cqe); - hw_cqe = &read_cqe; - advance_oldest_read(wq); - } - - /* - * T3A: Discard TERMINATE CQEs. - */ - if (CQE_OPCODE(*hw_cqe) == T3_TERMINATE) { - ret = -1; - wq->error = 1; - goto skip_cqe; - } - - if (CQE_STATUS(*hw_cqe) || wq->error) { - *cqe_flushed = wq->error; - wq->error = 1; - - /* - * T3A inserts errors into the CQE. We cannot return - * these as work completions. - */ - /* incoming write failures */ - if ((CQE_OPCODE(*hw_cqe) == T3_RDMA_WRITE) - && RQ_TYPE(*hw_cqe)) { - ret = -1; - goto skip_cqe; - } - /* incoming read request failures */ - if ((CQE_OPCODE(*hw_cqe) == T3_READ_RESP) && SQ_TYPE(*hw_cqe)) { - ret = -1; - goto skip_cqe; - } - - /* incoming SEND with no receive posted failures */ - if (CQE_SEND_OPCODE(*hw_cqe) && RQ_TYPE(*hw_cqe) && - Q_EMPTY(wq->rq_rptr, wq->rq_wptr)) { - ret = -1; - goto skip_cqe; - } - BUG_ON((*cqe_flushed == 0) && !SW_CQE(*hw_cqe)); - goto proc_cqe; - } - - /* - * RECV completion. - */ - if (RQ_TYPE(*hw_cqe)) { - - /* - * HW only validates 4 bits of MSN. So we must validate that - * the MSN in the SEND is the next expected MSN. If its not, - * then we complete this with TPT_ERR_MSN and mark the wq in - * error. - */ - - if (Q_EMPTY(wq->rq_rptr, wq->rq_wptr)) { - wq->error = 1; - ret = -1; - goto skip_cqe; - } - - if (unlikely((CQE_WRID_MSN(*hw_cqe) != (wq->rq_rptr + 1)))) { - wq->error = 1; - hw_cqe->header |= htonl(V_CQE_STATUS(TPT_ERR_MSN)); - goto proc_cqe; - } - goto proc_cqe; - } - - /* - * If we get here its a send completion. - * - * Handle out of order completion. These get stuffed - * in the SW SQ. Then the SW SQ is walked to move any - * now in-order completions into the SW CQ. This handles - * 2 cases: - * 1) reaping unsignaled WRs when the first subsequent - * signaled WR is completed. - * 2) out of order read completions. - */ - if (!SW_CQE(*hw_cqe) && (CQE_WRID_SQ_WPTR(*hw_cqe) != wq->sq_rptr)) { - struct t3_swsq *sqp; - - pr_debug("%s out of order completion going in swsq at idx %ld\n", - __func__, - Q_PTR2IDX(CQE_WRID_SQ_WPTR(*hw_cqe), - wq->sq_size_log2)); - sqp = wq->sq + - Q_PTR2IDX(CQE_WRID_SQ_WPTR(*hw_cqe), wq->sq_size_log2); - sqp->cqe = *hw_cqe; - sqp->complete = 1; - ret = -1; - goto flush_wq; - } - -proc_cqe: - *cqe = *hw_cqe; - - /* - * Reap the associated WR(s) that are freed up with this - * completion. - */ - if (SQ_TYPE(*hw_cqe)) { - wq->sq_rptr = CQE_WRID_SQ_WPTR(*hw_cqe); - pr_debug("%s completing sq idx %ld\n", __func__, - Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2)); - *cookie = wq->sq[Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2)].wr_id; - wq->sq_rptr++; - } else { - pr_debug("%s completing rq idx %ld\n", __func__, - Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)); - *cookie = wq->rq[Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)].wr_id; - if (wq->rq[Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)].pbl_addr) - cxio_hal_pblpool_free(wq->rdev, - wq->rq[Q_PTR2IDX(wq->rq_rptr, - wq->rq_size_log2)].pbl_addr, T3_STAG0_PBL_SIZE); - BUG_ON(Q_EMPTY(wq->rq_rptr, wq->rq_wptr)); - wq->rq_rptr++; - } - -flush_wq: - /* - * Flush any completed cqes that are now in-order. - */ - flush_completed_wrs(wq, cq); - -skip_cqe: - if (SW_CQE(*hw_cqe)) { - pr_debug("%s cq %p cqid 0x%x skip sw cqe sw_rptr 0x%x\n", - __func__, cq, cq->cqid, cq->sw_rptr); - ++cq->sw_rptr; - } else { - pr_debug("%s cq %p cqid 0x%x skip hw cqe rptr 0x%x\n", - __func__, cq, cq->cqid, cq->rptr); - ++cq->rptr; - - /* - * T3A: compute credits. - */ - if (((cq->rptr - cq->wptr) > (1 << (cq->size_log2 - 1))) - || ((cq->rptr - cq->wptr) >= 128)) { - *credit = cq->rptr - cq->wptr; - cq->wptr = cq->rptr; - } - } - return ret; -} diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.h b/drivers/infiniband/hw/cxgb3/cxio_hal.h deleted file mode 100644 index 40c029ffa425..000000000000 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.h +++ /dev/null @@ -1,204 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __CXIO_HAL_H__ -#define __CXIO_HAL_H__ - -#include -#include -#include - -#include "t3_cpl.h" -#include "t3cdev.h" -#include "cxgb3_ctl_defs.h" -#include "cxio_wr.h" - -#define T3_CTRL_QP_ID FW_RI_SGEEC_START -#define T3_CTL_QP_TID FW_RI_TID_START -#define T3_CTRL_QP_SIZE_LOG2 8 -#define T3_CTRL_CQ_ID 0 - -#define T3_MAX_NUM_RI (1<<15) -#define T3_MAX_NUM_QP (1<<15) -#define T3_MAX_NUM_CQ (1<<15) -#define T3_MAX_NUM_PD (1<<15) -#define T3_MAX_PBL_SIZE 256 -#define T3_MAX_RQ_SIZE 1024 -#define T3_MAX_QP_DEPTH (T3_MAX_RQ_SIZE-1) -#define T3_MAX_CQ_DEPTH 65536 -#define T3_MAX_NUM_STAG (1<<15) -#define T3_MAX_MR_SIZE 0x100000000ULL -#define T3_PAGESIZE_MASK 0xffff000 /* 4KB-128MB */ - -#define T3_STAG_UNSET 0xffffffff - -#define T3_MAX_DEV_NAME_LEN 32 - -#define CXIO_FW_MAJ 7 - -struct cxio_hal_ctrl_qp { - u32 wptr; - u32 rptr; - struct mutex lock; /* for the wtpr, can sleep */ - wait_queue_head_t waitq;/* wait for RspQ/CQE msg */ - union t3_wr *workq; /* the work request queue */ - dma_addr_t dma_addr; /* pci bus address of the workq */ - DEFINE_DMA_UNMAP_ADDR(mapping); - void __iomem *doorbell; -}; - -struct cxio_hal_resource { - struct kfifo tpt_fifo; - spinlock_t tpt_fifo_lock; - struct kfifo qpid_fifo; - spinlock_t qpid_fifo_lock; - struct kfifo cqid_fifo; - spinlock_t cqid_fifo_lock; - struct kfifo pdid_fifo; - spinlock_t pdid_fifo_lock; -}; - -struct cxio_qpid_list { - struct list_head entry; - u32 qpid; -}; - -struct cxio_ucontext { - struct list_head qpids; - struct mutex lock; -}; - -struct cxio_rdev { - char dev_name[T3_MAX_DEV_NAME_LEN]; - struct t3cdev *t3cdev_p; - struct rdma_info rnic_info; - struct adap_ports port_info; - struct cxio_hal_resource *rscp; - struct cxio_hal_ctrl_qp ctrl_qp; - void *ulp; - unsigned long qpshift; - u32 qpnr; - u32 qpmask; - struct cxio_ucontext uctx; - struct gen_pool *pbl_pool; - struct gen_pool *rqt_pool; - struct list_head entry; - struct ch_embedded_info fw_info; - u32 flags; -#define CXIO_ERROR_FATAL 1 -}; - -static inline int cxio_fatal_error(struct cxio_rdev *rdev_p) -{ - return rdev_p->flags & CXIO_ERROR_FATAL; -} - -static inline int cxio_num_stags(struct cxio_rdev *rdev_p) -{ - return min((int)T3_MAX_NUM_STAG, (int)((rdev_p->rnic_info.tpt_top - rdev_p->rnic_info.tpt_base) >> 5)); -} - -typedef void (*cxio_hal_ev_callback_func_t) (struct cxio_rdev * rdev_p, - struct sk_buff * skb); - -#define RSPQ_CQID(rsp) (be32_to_cpu(rsp->cq_ptrid) & 0xffff) -#define RSPQ_CQPTR(rsp) ((be32_to_cpu(rsp->cq_ptrid) >> 16) & 0xffff) -#define RSPQ_GENBIT(rsp) ((be32_to_cpu(rsp->flags) >> 16) & 1) -#define RSPQ_OVERFLOW(rsp) ((be32_to_cpu(rsp->flags) >> 17) & 1) -#define RSPQ_AN(rsp) ((be32_to_cpu(rsp->flags) >> 18) & 1) -#define RSPQ_SE(rsp) ((be32_to_cpu(rsp->flags) >> 19) & 1) -#define RSPQ_NOTIFY(rsp) ((be32_to_cpu(rsp->flags) >> 20) & 1) -#define RSPQ_CQBRANCH(rsp) ((be32_to_cpu(rsp->flags) >> 21) & 1) -#define RSPQ_CREDIT_THRESH(rsp) ((be32_to_cpu(rsp->flags) >> 22) & 1) - -struct respQ_msg_t { - __be32 flags; /* flit 0 */ - __be32 cq_ptrid; - __be64 rsvd; /* flit 1 */ - struct t3_cqe cqe; /* flits 2-3 */ -}; - -enum t3_cq_opcode { - CQ_ARM_AN = 0x2, - CQ_ARM_SE = 0x6, - CQ_FORCE_AN = 0x3, - CQ_CREDIT_UPDATE = 0x7 -}; - -int cxio_rdev_open(struct cxio_rdev *rdev); -void cxio_rdev_close(struct cxio_rdev *rdev); -int cxio_hal_cq_op(struct cxio_rdev *rdev, struct t3_cq *cq, - enum t3_cq_opcode op, u32 credit); -int cxio_create_cq(struct cxio_rdev *rdev, struct t3_cq *cq, int kernel); -void cxio_destroy_cq(struct cxio_rdev *rdev, struct t3_cq *cq); -void cxio_release_ucontext(struct cxio_rdev *rdev, struct cxio_ucontext *uctx); -void cxio_init_ucontext(struct cxio_rdev *rdev, struct cxio_ucontext *uctx); -int cxio_create_qp(struct cxio_rdev *rdev, u32 kernel_domain, struct t3_wq *wq, - struct cxio_ucontext *uctx); -int cxio_destroy_qp(struct cxio_rdev *rdev, struct t3_wq *wq, - struct cxio_ucontext *uctx); -int cxio_peek_cq(struct t3_wq *wr, struct t3_cq *cq, int opcode); -int cxio_write_pbl(struct cxio_rdev *rdev_p, __be64 *pbl, - u32 pbl_addr, u32 pbl_size); -int cxio_register_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid, - enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len, - u8 page_size, u32 pbl_size, u32 pbl_addr); -int cxio_reregister_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid, - enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len, - u8 page_size, u32 pbl_size, u32 pbl_addr); -int cxio_dereg_mem(struct cxio_rdev *rdev, u32 stag, u32 pbl_size, - u32 pbl_addr); -int cxio_allocate_window(struct cxio_rdev *rdev, u32 * stag, u32 pdid); -int cxio_allocate_stag(struct cxio_rdev *rdev, u32 *stag, u32 pdid, u32 pbl_size, u32 pbl_addr); -int cxio_deallocate_window(struct cxio_rdev *rdev, u32 stag); -int cxio_rdma_init(struct cxio_rdev *rdev, struct t3_rdma_init_attr *attr); -void cxio_register_ev_cb(cxio_hal_ev_callback_func_t ev_cb); -void cxio_unregister_ev_cb(cxio_hal_ev_callback_func_t ev_cb); -u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp); -void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid); -int __init cxio_hal_init(void); -void __exit cxio_hal_exit(void); -int cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count); -int cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count); -void cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count); -void cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count); -void cxio_flush_hw_cq(struct t3_cq *cq); -int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe, - u8 *cqe_flushed, u64 *cookie, u32 *credit); -int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb); - -#ifdef pr_fmt -#undef pr_fmt -#endif - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#endif diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.c b/drivers/infiniband/hw/cxgb3/cxio_resource.c deleted file mode 100644 index c6e7bc4420b6..000000000000 --- a/drivers/infiniband/hw/cxgb3/cxio_resource.c +++ /dev/null @@ -1,344 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -/* Crude resource management */ -#include -#include -#include -#include -#include -#include -#include "cxio_resource.h" -#include "cxio_hal.h" - -static struct kfifo rhdl_fifo; -static spinlock_t rhdl_fifo_lock; - -#define RANDOM_SIZE 16 - -static int __cxio_init_resource_fifo(struct kfifo *fifo, - spinlock_t *fifo_lock, - u32 nr, u32 skip_low, - u32 skip_high, - int random) -{ - u32 i, j, entry = 0, idx; - u32 random_bytes; - u32 rarray[16]; - spin_lock_init(fifo_lock); - - if (kfifo_alloc(fifo, nr * sizeof(u32), GFP_KERNEL)) - return -ENOMEM; - - for (i = 0; i < skip_low + skip_high; i++) - kfifo_in(fifo, (unsigned char *) &entry, sizeof(u32)); - if (random) { - j = 0; - random_bytes = prandom_u32(); - for (i = 0; i < RANDOM_SIZE; i++) - rarray[i] = i + skip_low; - for (i = skip_low + RANDOM_SIZE; i < nr - skip_high; i++) { - if (j >= RANDOM_SIZE) { - j = 0; - random_bytes = prandom_u32(); - } - idx = (random_bytes >> (j * 2)) & 0xF; - kfifo_in(fifo, - (unsigned char *) &rarray[idx], - sizeof(u32)); - rarray[idx] = i; - j++; - } - for (i = 0; i < RANDOM_SIZE; i++) - kfifo_in(fifo, - (unsigned char *) &rarray[i], - sizeof(u32)); - } else - for (i = skip_low; i < nr - skip_high; i++) - kfifo_in(fifo, (unsigned char *) &i, sizeof(u32)); - - for (i = 0; i < skip_low + skip_high; i++) - if (kfifo_out_locked(fifo, (unsigned char *) &entry, - sizeof(u32), fifo_lock) != sizeof(u32)) - break; - return 0; -} - -static int cxio_init_resource_fifo(struct kfifo *fifo, spinlock_t * fifo_lock, - u32 nr, u32 skip_low, u32 skip_high) -{ - return (__cxio_init_resource_fifo(fifo, fifo_lock, nr, skip_low, - skip_high, 0)); -} - -static int cxio_init_resource_fifo_random(struct kfifo *fifo, - spinlock_t * fifo_lock, - u32 nr, u32 skip_low, u32 skip_high) -{ - - return (__cxio_init_resource_fifo(fifo, fifo_lock, nr, skip_low, - skip_high, 1)); -} - -static int cxio_init_qpid_fifo(struct cxio_rdev *rdev_p) -{ - u32 i; - - spin_lock_init(&rdev_p->rscp->qpid_fifo_lock); - - if (kfifo_alloc(&rdev_p->rscp->qpid_fifo, T3_MAX_NUM_QP * sizeof(u32), - GFP_KERNEL)) - return -ENOMEM; - - for (i = 16; i < T3_MAX_NUM_QP; i++) - if (!(i & rdev_p->qpmask)) - kfifo_in(&rdev_p->rscp->qpid_fifo, - (unsigned char *) &i, sizeof(u32)); - return 0; -} - -int cxio_hal_init_rhdl_resource(u32 nr_rhdl) -{ - return cxio_init_resource_fifo(&rhdl_fifo, &rhdl_fifo_lock, nr_rhdl, 1, - 0); -} - -void cxio_hal_destroy_rhdl_resource(void) -{ - kfifo_free(&rhdl_fifo); -} - -/* nr_* must be power of 2 */ -int cxio_hal_init_resource(struct cxio_rdev *rdev_p, - u32 nr_tpt, u32 nr_pbl, - u32 nr_rqt, u32 nr_qpid, u32 nr_cqid, u32 nr_pdid) -{ - int err = 0; - struct cxio_hal_resource *rscp; - - rscp = kmalloc(sizeof(*rscp), GFP_KERNEL); - if (!rscp) - return -ENOMEM; - rdev_p->rscp = rscp; - err = cxio_init_resource_fifo_random(&rscp->tpt_fifo, - &rscp->tpt_fifo_lock, - nr_tpt, 1, 0); - if (err) - goto tpt_err; - err = cxio_init_qpid_fifo(rdev_p); - if (err) - goto qpid_err; - err = cxio_init_resource_fifo(&rscp->cqid_fifo, &rscp->cqid_fifo_lock, - nr_cqid, 1, 0); - if (err) - goto cqid_err; - err = cxio_init_resource_fifo(&rscp->pdid_fifo, &rscp->pdid_fifo_lock, - nr_pdid, 1, 0); - if (err) - goto pdid_err; - return 0; -pdid_err: - kfifo_free(&rscp->cqid_fifo); -cqid_err: - kfifo_free(&rscp->qpid_fifo); -qpid_err: - kfifo_free(&rscp->tpt_fifo); -tpt_err: - return -ENOMEM; -} - -/* - * returns 0 if no resource available - */ -static u32 cxio_hal_get_resource(struct kfifo *fifo, spinlock_t * lock) -{ - u32 entry; - if (kfifo_out_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock)) - return entry; - else - return 0; /* fifo emptry */ -} - -static void cxio_hal_put_resource(struct kfifo *fifo, spinlock_t * lock, - u32 entry) -{ - BUG_ON( - kfifo_in_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock) - == 0); -} - -u32 cxio_hal_get_stag(struct cxio_hal_resource *rscp) -{ - return cxio_hal_get_resource(&rscp->tpt_fifo, &rscp->tpt_fifo_lock); -} - -void cxio_hal_put_stag(struct cxio_hal_resource *rscp, u32 stag) -{ - cxio_hal_put_resource(&rscp->tpt_fifo, &rscp->tpt_fifo_lock, stag); -} - -u32 cxio_hal_get_qpid(struct cxio_hal_resource *rscp) -{ - u32 qpid = cxio_hal_get_resource(&rscp->qpid_fifo, - &rscp->qpid_fifo_lock); - pr_debug("%s qpid 0x%x\n", __func__, qpid); - return qpid; -} - -void cxio_hal_put_qpid(struct cxio_hal_resource *rscp, u32 qpid) -{ - pr_debug("%s qpid 0x%x\n", __func__, qpid); - cxio_hal_put_resource(&rscp->qpid_fifo, &rscp->qpid_fifo_lock, qpid); -} - -u32 cxio_hal_get_cqid(struct cxio_hal_resource *rscp) -{ - return cxio_hal_get_resource(&rscp->cqid_fifo, &rscp->cqid_fifo_lock); -} - -void cxio_hal_put_cqid(struct cxio_hal_resource *rscp, u32 cqid) -{ - cxio_hal_put_resource(&rscp->cqid_fifo, &rscp->cqid_fifo_lock, cqid); -} - -u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp) -{ - return cxio_hal_get_resource(&rscp->pdid_fifo, &rscp->pdid_fifo_lock); -} - -void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid) -{ - cxio_hal_put_resource(&rscp->pdid_fifo, &rscp->pdid_fifo_lock, pdid); -} - -void cxio_hal_destroy_resource(struct cxio_hal_resource *rscp) -{ - kfifo_free(&rscp->tpt_fifo); - kfifo_free(&rscp->cqid_fifo); - kfifo_free(&rscp->qpid_fifo); - kfifo_free(&rscp->pdid_fifo); - kfree(rscp); -} - -/* - * PBL Memory Manager. Uses Linux generic allocator. - */ - -#define MIN_PBL_SHIFT 8 /* 256B == min PBL size (32 entries) */ - -u32 cxio_hal_pblpool_alloc(struct cxio_rdev *rdev_p, int size) -{ - unsigned long addr = gen_pool_alloc(rdev_p->pbl_pool, size); - pr_debug("%s addr 0x%x size %d\n", __func__, (u32)addr, size); - return (u32)addr; -} - -void cxio_hal_pblpool_free(struct cxio_rdev *rdev_p, u32 addr, int size) -{ - pr_debug("%s addr 0x%x size %d\n", __func__, addr, size); - gen_pool_free(rdev_p->pbl_pool, (unsigned long)addr, size); -} - -int cxio_hal_pblpool_create(struct cxio_rdev *rdev_p) -{ - unsigned pbl_start, pbl_chunk; - - rdev_p->pbl_pool = gen_pool_create(MIN_PBL_SHIFT, -1); - if (!rdev_p->pbl_pool) - return -ENOMEM; - - pbl_start = rdev_p->rnic_info.pbl_base; - pbl_chunk = rdev_p->rnic_info.pbl_top - pbl_start + 1; - - while (pbl_start < rdev_p->rnic_info.pbl_top) { - pbl_chunk = min(rdev_p->rnic_info.pbl_top - pbl_start + 1, - pbl_chunk); - if (gen_pool_add(rdev_p->pbl_pool, pbl_start, pbl_chunk, -1)) { - pr_debug("%s failed to add PBL chunk (%x/%x)\n", - __func__, pbl_start, pbl_chunk); - if (pbl_chunk <= 1024 << MIN_PBL_SHIFT) { - pr_warn("%s: Failed to add all PBL chunks (%x/%x)\n", - __func__, pbl_start, - rdev_p->rnic_info.pbl_top - pbl_start); - return 0; - } - pbl_chunk >>= 1; - } else { - pr_debug("%s added PBL chunk (%x/%x)\n", - __func__, pbl_start, pbl_chunk); - pbl_start += pbl_chunk; - } - } - - return 0; -} - -void cxio_hal_pblpool_destroy(struct cxio_rdev *rdev_p) -{ - gen_pool_destroy(rdev_p->pbl_pool); -} - -/* - * RQT Memory Manager. Uses Linux generic allocator. - */ - -#define MIN_RQT_SHIFT 10 /* 1KB == mini RQT size (16 entries) */ -#define RQT_CHUNK 2*1024*1024 - -u32 cxio_hal_rqtpool_alloc(struct cxio_rdev *rdev_p, int size) -{ - unsigned long addr = gen_pool_alloc(rdev_p->rqt_pool, size << 6); - pr_debug("%s addr 0x%x size %d\n", __func__, (u32)addr, size << 6); - return (u32)addr; -} - -void cxio_hal_rqtpool_free(struct cxio_rdev *rdev_p, u32 addr, int size) -{ - pr_debug("%s addr 0x%x size %d\n", __func__, addr, size << 6); - gen_pool_free(rdev_p->rqt_pool, (unsigned long)addr, size << 6); -} - -int cxio_hal_rqtpool_create(struct cxio_rdev *rdev_p) -{ - unsigned long i; - rdev_p->rqt_pool = gen_pool_create(MIN_RQT_SHIFT, -1); - if (rdev_p->rqt_pool) - for (i = rdev_p->rnic_info.rqt_base; - i <= rdev_p->rnic_info.rqt_top - RQT_CHUNK + 1; - i += RQT_CHUNK) - gen_pool_add(rdev_p->rqt_pool, i, RQT_CHUNK, -1); - return rdev_p->rqt_pool ? 0 : -ENOMEM; -} - -void cxio_hal_rqtpool_destroy(struct cxio_rdev *rdev_p) -{ - gen_pool_destroy(rdev_p->rqt_pool); -} diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.h b/drivers/infiniband/hw/cxgb3/cxio_resource.h deleted file mode 100644 index a2703a3d882d..000000000000 --- a/drivers/infiniband/hw/cxgb3/cxio_resource.h +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __CXIO_RESOURCE_H__ -#define __CXIO_RESOURCE_H__ - -#include -#include -#include -#include -#include -#include -#include -#include "cxio_hal.h" - -extern int cxio_hal_init_rhdl_resource(u32 nr_rhdl); -extern void cxio_hal_destroy_rhdl_resource(void); -extern int cxio_hal_init_resource(struct cxio_rdev *rdev_p, - u32 nr_tpt, u32 nr_pbl, - u32 nr_rqt, u32 nr_qpid, u32 nr_cqid, - u32 nr_pdid); -extern u32 cxio_hal_get_stag(struct cxio_hal_resource *rscp); -extern void cxio_hal_put_stag(struct cxio_hal_resource *rscp, u32 stag); -extern u32 cxio_hal_get_qpid(struct cxio_hal_resource *rscp); -extern void cxio_hal_put_qpid(struct cxio_hal_resource *rscp, u32 qpid); -extern u32 cxio_hal_get_cqid(struct cxio_hal_resource *rscp); -extern void cxio_hal_put_cqid(struct cxio_hal_resource *rscp, u32 cqid); -extern void cxio_hal_destroy_resource(struct cxio_hal_resource *rscp); - -#define PBL_OFF(rdev_p, a) ( (a) - (rdev_p)->rnic_info.pbl_base ) -extern int cxio_hal_pblpool_create(struct cxio_rdev *rdev_p); -extern void cxio_hal_pblpool_destroy(struct cxio_rdev *rdev_p); -extern u32 cxio_hal_pblpool_alloc(struct cxio_rdev *rdev_p, int size); -extern void cxio_hal_pblpool_free(struct cxio_rdev *rdev_p, u32 addr, int size); - -#define RQT_OFF(rdev_p, a) ( (a) - (rdev_p)->rnic_info.rqt_base ) -extern int cxio_hal_rqtpool_create(struct cxio_rdev *rdev_p); -extern void cxio_hal_rqtpool_destroy(struct cxio_rdev *rdev_p); -extern u32 cxio_hal_rqtpool_alloc(struct cxio_rdev *rdev_p, int size); -extern void cxio_hal_rqtpool_free(struct cxio_rdev *rdev_p, u32 addr, int size); -#endif diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h deleted file mode 100644 index 53aa5c36247a..000000000000 --- a/drivers/infiniband/hw/cxgb3/cxio_wr.h +++ /dev/null @@ -1,802 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __CXIO_WR_H__ -#define __CXIO_WR_H__ - -#include -#include -#include -#include "firmware_exports.h" - -#define T3_MAX_SGE 4 -#define T3_MAX_INLINE 64 -#define T3_STAG0_PBL_SIZE (2 * T3_MAX_SGE << 3) -#define T3_STAG0_MAX_PBE_LEN (128 * 1024 * 1024) -#define T3_STAG0_PAGE_SHIFT 15 - -#define Q_EMPTY(rptr,wptr) ((rptr)==(wptr)) -#define Q_FULL(rptr,wptr,size_log2) ( (((wptr)-(rptr))>>(size_log2)) && \ - ((rptr)!=(wptr)) ) -#define Q_GENBIT(ptr,size_log2) (!(((ptr)>>size_log2)&0x1)) -#define Q_FREECNT(rptr,wptr,size_log2) ((1UL<> S_FW_RIWR_OP)) & M_FW_RIWR_OP) - -#define S_FW_RIWR_SOPEOP 22 -#define M_FW_RIWR_SOPEOP 0x3 -#define V_FW_RIWR_SOPEOP(x) ((x) << S_FW_RIWR_SOPEOP) - -#define S_FW_RIWR_FLAGS 8 -#define M_FW_RIWR_FLAGS 0x3fffff -#define V_FW_RIWR_FLAGS(x) ((x) << S_FW_RIWR_FLAGS) -#define G_FW_RIWR_FLAGS(x) ((((x) >> S_FW_RIWR_FLAGS)) & M_FW_RIWR_FLAGS) - -#define S_FW_RIWR_TID 8 -#define V_FW_RIWR_TID(x) ((x) << S_FW_RIWR_TID) - -#define S_FW_RIWR_LEN 0 -#define V_FW_RIWR_LEN(x) ((x) << S_FW_RIWR_LEN) - -#define S_FW_RIWR_GEN 31 -#define V_FW_RIWR_GEN(x) ((x) << S_FW_RIWR_GEN) - -struct t3_sge { - __be32 stag; - __be32 len; - __be64 to; -}; - -/* If num_sgle is zero, flit 5+ contains immediate data.*/ -struct t3_send_wr { - struct fw_riwrh wrh; /* 0 */ - union t3_wrid wrid; /* 1 */ - - u8 rdmaop; /* 2 */ - u8 reserved[3]; - __be32 rem_stag; - __be32 plen; /* 3 */ - __be32 num_sgle; - struct t3_sge sgl[T3_MAX_SGE]; /* 4+ */ -}; - -#define T3_MAX_FASTREG_DEPTH 10 -#define T3_MAX_FASTREG_FRAG 10 - -struct t3_fastreg_wr { - struct fw_riwrh wrh; /* 0 */ - union t3_wrid wrid; /* 1 */ - __be32 stag; /* 2 */ - __be32 len; - __be32 va_base_hi; /* 3 */ - __be32 va_base_lo_fbo; - __be32 page_type_perms; /* 4 */ - __be32 reserved1; - __be64 pbl_addrs[0]; /* 5+ */ -}; - -/* - * If a fastreg wr spans multiple wqes, then the 2nd fragment look like this. - */ -struct t3_pbl_frag { - struct fw_riwrh wrh; /* 0 */ - __be64 pbl_addrs[14]; /* 1..14 */ -}; - -#define S_FR_PAGE_COUNT 24 -#define M_FR_PAGE_COUNT 0xff -#define V_FR_PAGE_COUNT(x) ((x) << S_FR_PAGE_COUNT) -#define G_FR_PAGE_COUNT(x) ((((x) >> S_FR_PAGE_COUNT)) & M_FR_PAGE_COUNT) - -#define S_FR_PAGE_SIZE 16 -#define M_FR_PAGE_SIZE 0x1f -#define V_FR_PAGE_SIZE(x) ((x) << S_FR_PAGE_SIZE) -#define G_FR_PAGE_SIZE(x) ((((x) >> S_FR_PAGE_SIZE)) & M_FR_PAGE_SIZE) - -#define S_FR_TYPE 8 -#define M_FR_TYPE 0x1 -#define V_FR_TYPE(x) ((x) << S_FR_TYPE) -#define G_FR_TYPE(x) ((((x) >> S_FR_TYPE)) & M_FR_TYPE) - -#define S_FR_PERMS 0 -#define M_FR_PERMS 0xff -#define V_FR_PERMS(x) ((x) << S_FR_PERMS) -#define G_FR_PERMS(x) ((((x) >> S_FR_PERMS)) & M_FR_PERMS) - -struct t3_local_inv_wr { - struct fw_riwrh wrh; /* 0 */ - union t3_wrid wrid; /* 1 */ - __be32 stag; /* 2 */ - __be32 reserved; -}; - -struct t3_rdma_write_wr { - struct fw_riwrh wrh; /* 0 */ - union t3_wrid wrid; /* 1 */ - u8 rdmaop; /* 2 */ - u8 reserved[3]; - __be32 stag_sink; - __be64 to_sink; /* 3 */ - __be32 plen; /* 4 */ - __be32 num_sgle; - struct t3_sge sgl[T3_MAX_SGE]; /* 5+ */ -}; - -struct t3_rdma_read_wr { - struct fw_riwrh wrh; /* 0 */ - union t3_wrid wrid; /* 1 */ - u8 rdmaop; /* 2 */ - u8 local_inv; - u8 reserved[2]; - __be32 rem_stag; - __be64 rem_to; /* 3 */ - __be32 local_stag; /* 4 */ - __be32 local_len; - __be64 local_to; /* 5 */ -}; - -struct t3_bind_mw_wr { - struct fw_riwrh wrh; /* 0 */ - union t3_wrid wrid; /* 1 */ - u16 reserved; /* 2 */ - u8 type; - u8 perms; - __be32 mr_stag; - __be32 mw_stag; /* 3 */ - __be32 mw_len; - __be64 mw_va; /* 4 */ - __be32 mr_pbl_addr; /* 5 */ - u8 reserved2[3]; - u8 mr_pagesz; -}; - -struct t3_receive_wr { - struct fw_riwrh wrh; /* 0 */ - union t3_wrid wrid; /* 1 */ - u8 pagesz[T3_MAX_SGE]; - __be32 num_sgle; /* 2 */ - struct t3_sge sgl[T3_MAX_SGE]; /* 3+ */ - __be32 pbl_addr[T3_MAX_SGE]; -}; - -struct t3_bypass_wr { - struct fw_riwrh wrh; - union t3_wrid wrid; /* 1 */ -}; - -struct t3_modify_qp_wr { - struct fw_riwrh wrh; /* 0 */ - union t3_wrid wrid; /* 1 */ - __be32 flags; /* 2 */ - __be32 quiesce; /* 2 */ - __be32 max_ird; /* 3 */ - __be32 max_ord; /* 3 */ - __be64 sge_cmd; /* 4 */ - __be64 ctx1; /* 5 */ - __be64 ctx0; /* 6 */ -}; - -enum t3_modify_qp_flags { - MODQP_QUIESCE = 0x01, - MODQP_MAX_IRD = 0x02, - MODQP_MAX_ORD = 0x04, - MODQP_WRITE_EC = 0x08, - MODQP_READ_EC = 0x10, -}; - - -enum t3_mpa_attrs { - uP_RI_MPA_RX_MARKER_ENABLE = 0x1, - uP_RI_MPA_TX_MARKER_ENABLE = 0x2, - uP_RI_MPA_CRC_ENABLE = 0x4, - uP_RI_MPA_IETF_ENABLE = 0x8 -} __packed; - -enum t3_qp_caps { - uP_RI_QP_RDMA_READ_ENABLE = 0x01, - uP_RI_QP_RDMA_WRITE_ENABLE = 0x02, - uP_RI_QP_BIND_ENABLE = 0x04, - uP_RI_QP_FAST_REGISTER_ENABLE = 0x08, - uP_RI_QP_STAG0_ENABLE = 0x10 -} __packed; - -enum rdma_init_rtr_types { - RTR_READ = 1, - RTR_WRITE = 2, - RTR_SEND = 3, -}; - -#define S_RTR_TYPE 2 -#define M_RTR_TYPE 0x3 -#define V_RTR_TYPE(x) ((x) << S_RTR_TYPE) -#define G_RTR_TYPE(x) ((((x) >> S_RTR_TYPE)) & M_RTR_TYPE) - -#define S_CHAN 4 -#define M_CHAN 0x3 -#define V_CHAN(x) ((x) << S_CHAN) -#define G_CHAN(x) ((((x) >> S_CHAN)) & M_CHAN) - -struct t3_rdma_init_attr { - u32 tid; - u32 qpid; - u32 pdid; - u32 scqid; - u32 rcqid; - u32 rq_addr; - u32 rq_size; - enum t3_mpa_attrs mpaattrs; - enum t3_qp_caps qpcaps; - u16 tcp_emss; - u32 ord; - u32 ird; - u64 qp_dma_addr; - u32 qp_dma_size; - enum rdma_init_rtr_types rtr_type; - u16 flags; - u16 rqe_count; - u32 irs; - u32 chan; -}; - -struct t3_rdma_init_wr { - struct fw_riwrh wrh; /* 0 */ - union t3_wrid wrid; /* 1 */ - __be32 qpid; /* 2 */ - __be32 pdid; - __be32 scqid; /* 3 */ - __be32 rcqid; - __be32 rq_addr; /* 4 */ - __be32 rq_size; - u8 mpaattrs; /* 5 */ - u8 qpcaps; - __be16 ulpdu_size; - __be16 flags_rtr_type; - __be16 rqe_count; - __be32 ord; /* 6 */ - __be32 ird; - __be64 qp_dma_addr; /* 7 */ - __be32 qp_dma_size; /* 8 */ - __be32 irs; -}; - -struct t3_genbit { - u64 flit[15]; - __be64 genbit; -}; - -struct t3_wq_in_err { - u64 flit[13]; - u64 err; -}; - -enum rdma_init_wr_flags { - MPA_INITIATOR = (1<<0), - PRIV_QP = (1<<1), -}; - -union t3_wr { - struct t3_send_wr send; - struct t3_rdma_write_wr write; - struct t3_rdma_read_wr read; - struct t3_receive_wr recv; - struct t3_fastreg_wr fastreg; - struct t3_pbl_frag pbl_frag; - struct t3_local_inv_wr local_inv; - struct t3_bind_mw_wr bind; - struct t3_bypass_wr bypass; - struct t3_rdma_init_wr init; - struct t3_modify_qp_wr qp_mod; - struct t3_genbit genbit; - struct t3_wq_in_err wq_in_err; - __be64 flit[16]; -}; - -#define T3_SQ_CQE_FLIT 13 -#define T3_SQ_COOKIE_FLIT 14 - -#define T3_RQ_COOKIE_FLIT 13 -#define T3_RQ_CQE_FLIT 14 - -static inline enum t3_wr_opcode fw_riwrh_opcode(struct fw_riwrh *wqe) -{ - return G_FW_RIWR_OP(be32_to_cpu(wqe->op_seop_flags)); -} - -enum t3_wr_hdr_bits { - T3_EOP = 1, - T3_SOP = 2, - T3_SOPEOP = T3_EOP|T3_SOP, -}; - -static inline void build_fw_riwrh(struct fw_riwrh *wqe, enum t3_wr_opcode op, - enum t3_wr_flags flags, u8 genbit, u32 tid, - u8 len, u8 sopeop) -{ - wqe->op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(op) | - V_FW_RIWR_SOPEOP(sopeop) | - V_FW_RIWR_FLAGS(flags)); - wmb(); - wqe->gen_tid_len = cpu_to_be32(V_FW_RIWR_GEN(genbit) | - V_FW_RIWR_TID(tid) | - V_FW_RIWR_LEN(len)); - /* 2nd gen bit... */ - ((union t3_wr *)wqe)->genbit.genbit = cpu_to_be64(genbit); -} - -/* - * T3 ULP2_TX commands - */ -enum t3_utx_mem_op { - T3_UTX_MEM_READ = 2, - T3_UTX_MEM_WRITE = 3 -}; - -/* T3 MC7 RDMA TPT entry format */ - -enum tpt_mem_type { - TPT_NON_SHARED_MR = 0x0, - TPT_SHARED_MR = 0x1, - TPT_MW = 0x2, - TPT_MW_RELAXED_PROTECTION = 0x3 -}; - -enum tpt_addr_type { - TPT_ZBTO = 0, - TPT_VATO = 1 -}; - -enum tpt_mem_perm { - TPT_MW_BIND = 0x10, - TPT_LOCAL_READ = 0x8, - TPT_LOCAL_WRITE = 0x4, - TPT_REMOTE_READ = 0x2, - TPT_REMOTE_WRITE = 0x1 -}; - -struct tpt_entry { - __be32 valid_stag_pdid; - __be32 flags_pagesize_qpid; - - __be32 rsvd_pbl_addr; - __be32 len; - __be32 va_hi; - __be32 va_low_or_fbo; - - __be32 rsvd_bind_cnt_or_pstag; - __be32 rsvd_pbl_size; -}; - -#define S_TPT_VALID 31 -#define V_TPT_VALID(x) ((x) << S_TPT_VALID) -#define F_TPT_VALID V_TPT_VALID(1U) - -#define S_TPT_STAG_KEY 23 -#define M_TPT_STAG_KEY 0xFF -#define V_TPT_STAG_KEY(x) ((x) << S_TPT_STAG_KEY) -#define G_TPT_STAG_KEY(x) (((x) >> S_TPT_STAG_KEY) & M_TPT_STAG_KEY) - -#define S_TPT_STAG_STATE 22 -#define V_TPT_STAG_STATE(x) ((x) << S_TPT_STAG_STATE) -#define F_TPT_STAG_STATE V_TPT_STAG_STATE(1U) - -#define S_TPT_STAG_TYPE 20 -#define M_TPT_STAG_TYPE 0x3 -#define V_TPT_STAG_TYPE(x) ((x) << S_TPT_STAG_TYPE) -#define G_TPT_STAG_TYPE(x) (((x) >> S_TPT_STAG_TYPE) & M_TPT_STAG_TYPE) - -#define S_TPT_PDID 0 -#define M_TPT_PDID 0xFFFFF -#define V_TPT_PDID(x) ((x) << S_TPT_PDID) -#define G_TPT_PDID(x) (((x) >> S_TPT_PDID) & M_TPT_PDID) - -#define S_TPT_PERM 28 -#define M_TPT_PERM 0xF -#define V_TPT_PERM(x) ((x) << S_TPT_PERM) -#define G_TPT_PERM(x) (((x) >> S_TPT_PERM) & M_TPT_PERM) - -#define S_TPT_REM_INV_DIS 27 -#define V_TPT_REM_INV_DIS(x) ((x) << S_TPT_REM_INV_DIS) -#define F_TPT_REM_INV_DIS V_TPT_REM_INV_DIS(1U) - -#define S_TPT_ADDR_TYPE 26 -#define V_TPT_ADDR_TYPE(x) ((x) << S_TPT_ADDR_TYPE) -#define F_TPT_ADDR_TYPE V_TPT_ADDR_TYPE(1U) - -#define S_TPT_MW_BIND_ENABLE 25 -#define V_TPT_MW_BIND_ENABLE(x) ((x) << S_TPT_MW_BIND_ENABLE) -#define F_TPT_MW_BIND_ENABLE V_TPT_MW_BIND_ENABLE(1U) - -#define S_TPT_PAGE_SIZE 20 -#define M_TPT_PAGE_SIZE 0x1F -#define V_TPT_PAGE_SIZE(x) ((x) << S_TPT_PAGE_SIZE) -#define G_TPT_PAGE_SIZE(x) (((x) >> S_TPT_PAGE_SIZE) & M_TPT_PAGE_SIZE) - -#define S_TPT_PBL_ADDR 0 -#define M_TPT_PBL_ADDR 0x1FFFFFFF -#define V_TPT_PBL_ADDR(x) ((x) << S_TPT_PBL_ADDR) -#define G_TPT_PBL_ADDR(x) (((x) >> S_TPT_PBL_ADDR) & M_TPT_PBL_ADDR) - -#define S_TPT_QPID 0 -#define M_TPT_QPID 0xFFFFF -#define V_TPT_QPID(x) ((x) << S_TPT_QPID) -#define G_TPT_QPID(x) (((x) >> S_TPT_QPID) & M_TPT_QPID) - -#define S_TPT_PSTAG 0 -#define M_TPT_PSTAG 0xFFFFFF -#define V_TPT_PSTAG(x) ((x) << S_TPT_PSTAG) -#define G_TPT_PSTAG(x) (((x) >> S_TPT_PSTAG) & M_TPT_PSTAG) - -#define S_TPT_PBL_SIZE 0 -#define M_TPT_PBL_SIZE 0xFFFFF -#define V_TPT_PBL_SIZE(x) ((x) << S_TPT_PBL_SIZE) -#define G_TPT_PBL_SIZE(x) (((x) >> S_TPT_PBL_SIZE) & M_TPT_PBL_SIZE) - -/* - * CQE defs - */ -struct t3_cqe { - __be32 header; - __be32 len; - union { - struct { - __be32 stag; - __be32 msn; - } rcqe; - struct { - u32 wrid_hi; - u32 wrid_low; - } scqe; - } u; -}; - -#define S_CQE_OOO 31 -#define M_CQE_OOO 0x1 -#define G_CQE_OOO(x) ((((x) >> S_CQE_OOO)) & M_CQE_OOO) -#define V_CEQ_OOO(x) ((x)<> S_CQE_QPID)) & M_CQE_QPID) -#define V_CQE_QPID(x) ((x)<> S_CQE_SWCQE)) & M_CQE_SWCQE) -#define V_CQE_SWCQE(x) ((x)<> S_CQE_GENBIT) & M_CQE_GENBIT) -#define V_CQE_GENBIT(x) ((x)<> S_CQE_STATUS)) & M_CQE_STATUS) -#define V_CQE_STATUS(x) ((x)<> S_CQE_TYPE)) & M_CQE_TYPE) -#define V_CQE_TYPE(x) ((x)<> S_CQE_OPCODE)) & M_CQE_OPCODE) -#define V_CQE_OPCODE(x) ((x)<queue[1 << cq->size_log2])->cq_err; -} - -static inline void cxio_set_cq_in_error(struct t3_cq *cq) -{ - ((struct t3_cq_status_page *) - &cq->queue[1 << cq->size_log2])->cq_err = 1; -} - -static inline void cxio_set_wq_in_error(struct t3_wq *wq) -{ - wq->queue->wq_in_err.err |= 1; -} - -static inline void cxio_disable_wq_db(struct t3_wq *wq) -{ - wq->queue->wq_in_err.err |= 2; -} - -static inline void cxio_enable_wq_db(struct t3_wq *wq) -{ - wq->queue->wq_in_err.err &= ~2; -} - -static inline int cxio_wq_db_enabled(struct t3_wq *wq) -{ - return !(wq->queue->wq_in_err.err & 2); -} - -static inline struct t3_cqe *cxio_next_hw_cqe(struct t3_cq *cq) -{ - struct t3_cqe *cqe; - - cqe = cq->queue + (Q_PTR2IDX(cq->rptr, cq->size_log2)); - if (CQ_VLD_ENTRY(cq->rptr, cq->size_log2, cqe)) - return cqe; - return NULL; -} - -static inline struct t3_cqe *cxio_next_sw_cqe(struct t3_cq *cq) -{ - struct t3_cqe *cqe; - - if (!Q_EMPTY(cq->sw_rptr, cq->sw_wptr)) { - cqe = cq->sw_queue + (Q_PTR2IDX(cq->sw_rptr, cq->size_log2)); - return cqe; - } - return NULL; -} - -static inline struct t3_cqe *cxio_next_cqe(struct t3_cq *cq) -{ - struct t3_cqe *cqe; - - if (!Q_EMPTY(cq->sw_rptr, cq->sw_wptr)) { - cqe = cq->sw_queue + (Q_PTR2IDX(cq->sw_rptr, cq->size_log2)); - return cqe; - } - cqe = cq->queue + (Q_PTR2IDX(cq->rptr, cq->size_log2)); - if (CQ_VLD_ENTRY(cq->rptr, cq->size_log2, cqe)) - return cqe; - return NULL; -} - -#endif diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c deleted file mode 100644 index 56a8ab6210cf..000000000000 --- a/drivers/infiniband/hw/cxgb3/iwch.c +++ /dev/null @@ -1,282 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include -#include - -#include - -#include "cxgb3_offload.h" -#include "iwch_provider.h" -#include -#include "iwch.h" -#include "iwch_cm.h" - -#define DRV_VERSION "1.1" - -MODULE_AUTHOR("Boyd Faulkner, Steve Wise"); -MODULE_DESCRIPTION("Chelsio T3 RDMA Driver"); -MODULE_LICENSE("Dual BSD/GPL"); - -static void open_rnic_dev(struct t3cdev *); -static void close_rnic_dev(struct t3cdev *); -static void iwch_event_handler(struct t3cdev *, u32, u32); - -struct cxgb3_client t3c_client = { - .name = "iw_cxgb3", - .add = open_rnic_dev, - .remove = close_rnic_dev, - .handlers = t3c_handlers, - .redirect = iwch_ep_redirect, - .event_handler = iwch_event_handler -}; - -static LIST_HEAD(dev_list); -static DEFINE_MUTEX(dev_mutex); - -static void disable_dbs(struct iwch_dev *rnicp) -{ - unsigned long index; - struct iwch_qp *qhp; - - xa_lock_irq(&rnicp->qps); - xa_for_each(&rnicp->qps, index, qhp) - cxio_disable_wq_db(&qhp->wq); - xa_unlock_irq(&rnicp->qps); -} - -static void enable_dbs(struct iwch_dev *rnicp, int ring_db) -{ - unsigned long index; - struct iwch_qp *qhp; - - xa_lock_irq(&rnicp->qps); - xa_for_each(&rnicp->qps, index, qhp) { - if (ring_db) - ring_doorbell(qhp->rhp->rdev.ctrl_qp.doorbell, - qhp->wq.qpid); - cxio_enable_wq_db(&qhp->wq); - } - xa_unlock_irq(&rnicp->qps); -} - -static void iwch_db_drop_task(struct work_struct *work) -{ - struct iwch_dev *rnicp = container_of(work, struct iwch_dev, - db_drop_task.work); - enable_dbs(rnicp, 1); -} - -static void rnic_init(struct iwch_dev *rnicp) -{ - pr_debug("%s iwch_dev %p\n", __func__, rnicp); - xa_init_flags(&rnicp->cqs, XA_FLAGS_LOCK_IRQ); - xa_init_flags(&rnicp->qps, XA_FLAGS_LOCK_IRQ); - xa_init_flags(&rnicp->mrs, XA_FLAGS_LOCK_IRQ); - INIT_DELAYED_WORK(&rnicp->db_drop_task, iwch_db_drop_task); - - rnicp->attr.max_qps = T3_MAX_NUM_QP - 32; - rnicp->attr.max_wrs = T3_MAX_QP_DEPTH; - rnicp->attr.max_sge_per_wr = T3_MAX_SGE; - rnicp->attr.max_sge_per_rdma_write_wr = T3_MAX_SGE; - rnicp->attr.max_cqs = T3_MAX_NUM_CQ - 1; - rnicp->attr.max_cqes_per_cq = T3_MAX_CQ_DEPTH; - rnicp->attr.max_mem_regs = cxio_num_stags(&rnicp->rdev); - rnicp->attr.max_phys_buf_entries = T3_MAX_PBL_SIZE; - rnicp->attr.max_pds = T3_MAX_NUM_PD - 1; - rnicp->attr.mem_pgsizes_bitmask = T3_PAGESIZE_MASK; - rnicp->attr.max_mr_size = T3_MAX_MR_SIZE; - rnicp->attr.can_resize_wq = 0; - rnicp->attr.max_rdma_reads_per_qp = 8; - rnicp->attr.max_rdma_read_resources = - rnicp->attr.max_rdma_reads_per_qp * rnicp->attr.max_qps; - rnicp->attr.max_rdma_read_qp_depth = 8; /* IRD */ - rnicp->attr.max_rdma_read_depth = - rnicp->attr.max_rdma_read_qp_depth * rnicp->attr.max_qps; - rnicp->attr.rq_overflow_handled = 0; - rnicp->attr.can_modify_ird = 0; - rnicp->attr.can_modify_ord = 0; - rnicp->attr.max_mem_windows = rnicp->attr.max_mem_regs - 1; - rnicp->attr.stag0_value = 1; - rnicp->attr.zbva_support = 1; - rnicp->attr.local_invalidate_fence = 1; - rnicp->attr.cq_overflow_detection = 1; - return; -} - -static void open_rnic_dev(struct t3cdev *tdev) -{ - struct iwch_dev *rnicp; - - pr_debug("%s t3cdev %p\n", __func__, tdev); - pr_info_once("Chelsio T3 RDMA Driver - version %s\n", DRV_VERSION); - rnicp = ib_alloc_device(iwch_dev, ibdev); - if (!rnicp) { - pr_err("Cannot allocate ib device\n"); - return; - } - rnicp->rdev.ulp = rnicp; - rnicp->rdev.t3cdev_p = tdev; - - mutex_lock(&dev_mutex); - - if (cxio_rdev_open(&rnicp->rdev)) { - mutex_unlock(&dev_mutex); - pr_err("Unable to open CXIO rdev\n"); - ib_dealloc_device(&rnicp->ibdev); - return; - } - - rnic_init(rnicp); - - list_add_tail(&rnicp->entry, &dev_list); - mutex_unlock(&dev_mutex); - - if (iwch_register_device(rnicp)) { - pr_err("Unable to register device\n"); - close_rnic_dev(tdev); - } - pr_info("Initialized device %s\n", - pci_name(rnicp->rdev.rnic_info.pdev)); - return; -} - -static void close_rnic_dev(struct t3cdev *tdev) -{ - struct iwch_dev *dev, *tmp; - pr_debug("%s t3cdev %p\n", __func__, tdev); - mutex_lock(&dev_mutex); - list_for_each_entry_safe(dev, tmp, &dev_list, entry) { - if (dev->rdev.t3cdev_p == tdev) { - dev->rdev.flags = CXIO_ERROR_FATAL; - synchronize_net(); - cancel_delayed_work_sync(&dev->db_drop_task); - list_del(&dev->entry); - iwch_unregister_device(dev); - cxio_rdev_close(&dev->rdev); - WARN_ON(!xa_empty(&dev->cqs)); - WARN_ON(!xa_empty(&dev->qps)); - WARN_ON(!xa_empty(&dev->mrs)); - ib_dealloc_device(&dev->ibdev); - break; - } - } - mutex_unlock(&dev_mutex); -} - -static void iwch_event_handler(struct t3cdev *tdev, u32 evt, u32 port_id) -{ - struct cxio_rdev *rdev = tdev->ulp; - struct iwch_dev *rnicp; - struct ib_event event; - u32 portnum = port_id + 1; - int dispatch = 0; - - if (!rdev) - return; - rnicp = rdev_to_iwch_dev(rdev); - switch (evt) { - case OFFLOAD_STATUS_DOWN: { - rdev->flags = CXIO_ERROR_FATAL; - synchronize_net(); - event.event = IB_EVENT_DEVICE_FATAL; - dispatch = 1; - break; - } - case OFFLOAD_PORT_DOWN: { - event.event = IB_EVENT_PORT_ERR; - dispatch = 1; - break; - } - case OFFLOAD_PORT_UP: { - event.event = IB_EVENT_PORT_ACTIVE; - dispatch = 1; - break; - } - case OFFLOAD_DB_FULL: { - disable_dbs(rnicp); - break; - } - case OFFLOAD_DB_EMPTY: { - enable_dbs(rnicp, 1); - break; - } - case OFFLOAD_DB_DROP: { - unsigned long delay = 1000; - unsigned short r; - - disable_dbs(rnicp); - get_random_bytes(&r, 2); - delay += r & 1023; - - /* - * delay is between 1000-2023 usecs. - */ - schedule_delayed_work(&rnicp->db_drop_task, - usecs_to_jiffies(delay)); - break; - } - } - - if (dispatch) { - event.device = &rnicp->ibdev; - event.element.port_num = portnum; - ib_dispatch_event(&event); - } - - return; -} - -static int __init iwch_init_module(void) -{ - int err; - - err = cxio_hal_init(); - if (err) - return err; - err = iwch_cm_init(); - if (err) - return err; - cxio_register_ev_cb(iwch_ev_dispatch); - cxgb3_register_client(&t3c_client); - return 0; -} - -static void __exit iwch_exit_module(void) -{ - cxgb3_unregister_client(&t3c_client); - cxio_unregister_ev_cb(iwch_ev_dispatch); - iwch_cm_term(); - cxio_hal_exit(); -} - -module_init(iwch_init_module); -module_exit(iwch_exit_module); diff --git a/drivers/infiniband/hw/cxgb3/iwch.h b/drivers/infiniband/hw/cxgb3/iwch.h deleted file mode 100644 index 310a937bffcf..000000000000 --- a/drivers/infiniband/hw/cxgb3/iwch.h +++ /dev/null @@ -1,155 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __IWCH_H__ -#define __IWCH_H__ - -#include -#include -#include -#include -#include - -#include - -#include "cxio_hal.h" -#include "cxgb3_offload.h" - -struct iwch_pd; -struct iwch_cq; -struct iwch_qp; -struct iwch_mr; - -struct iwch_rnic_attributes { - u32 max_qps; - u32 max_wrs; /* Max for any SQ/RQ */ - u32 max_sge_per_wr; - u32 max_sge_per_rdma_write_wr; /* for RDMA Write WR */ - u32 max_cqs; - u32 max_cqes_per_cq; - u32 max_mem_regs; - u32 max_phys_buf_entries; /* for phys buf list */ - u32 max_pds; - - /* - * The memory page sizes supported by this RNIC. - * Bit position i in bitmap indicates page of - * size (4k)^i. Phys block list mode unsupported. - */ - u32 mem_pgsizes_bitmask; - u64 max_mr_size; - u8 can_resize_wq; - - /* - * The maximum number of RDMA Reads that can be outstanding - * per QP with this RNIC as the target. - */ - u32 max_rdma_reads_per_qp; - - /* - * The maximum number of resources used for RDMA Reads - * by this RNIC with this RNIC as the target. - */ - u32 max_rdma_read_resources; - - /* - * The max depth per QP for initiation of RDMA Read - * by this RNIC. - */ - u32 max_rdma_read_qp_depth; - - /* - * The maximum depth for initiation of RDMA Read - * operations by this RNIC on all QPs - */ - u32 max_rdma_read_depth; - u8 rq_overflow_handled; - u32 can_modify_ird; - u32 can_modify_ord; - u32 max_mem_windows; - u32 stag0_value; - u8 zbva_support; - u8 local_invalidate_fence; - u32 cq_overflow_detection; -}; - -struct iwch_dev { - struct ib_device ibdev; - struct cxio_rdev rdev; - u32 device_cap_flags; - struct iwch_rnic_attributes attr; - struct xarray cqs; - struct xarray qps; - struct xarray mrs; - struct list_head entry; - struct delayed_work db_drop_task; -}; - -static inline struct iwch_dev *to_iwch_dev(struct ib_device *ibdev) -{ - return container_of(ibdev, struct iwch_dev, ibdev); -} - -static inline struct iwch_dev *rdev_to_iwch_dev(struct cxio_rdev *rdev) -{ - return container_of(rdev, struct iwch_dev, rdev); -} - -static inline int t3b_device(const struct iwch_dev *rhp) -{ - return rhp->rdev.t3cdev_p->type == T3B; -} - -static inline int t3a_device(const struct iwch_dev *rhp) -{ - return rhp->rdev.t3cdev_p->type == T3A; -} - -static inline struct iwch_cq *get_chp(struct iwch_dev *rhp, u32 cqid) -{ - return xa_load(&rhp->cqs, cqid); -} - -static inline struct iwch_qp *get_qhp(struct iwch_dev *rhp, u32 qpid) -{ - return xa_load(&rhp->qps, qpid); -} - -static inline struct iwch_mr *get_mhp(struct iwch_dev *rhp, u32 mmid) -{ - return xa_load(&rhp->mrs, mmid); -} - -extern struct cxgb3_client t3c_client; -extern cxgb3_cpl_handler_func t3c_handlers[NUM_CPL_CMDS]; -extern void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb); - -#endif diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c deleted file mode 100644 index 0bca72cb4d9a..000000000000 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ /dev/null @@ -1,2258 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "tcb.h" -#include "cxgb3_offload.h" -#include "iwch.h" -#include "iwch_provider.h" -#include "iwch_cm.h" - -static char *states[] = { - "idle", - "listen", - "connecting", - "mpa_wait_req", - "mpa_req_sent", - "mpa_req_rcvd", - "mpa_rep_sent", - "fpdu_mode", - "aborting", - "closing", - "moribund", - "dead", - NULL, -}; - -int peer2peer = 0; -module_param(peer2peer, int, 0644); -MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=0)"); - -static int ep_timeout_secs = 60; -module_param(ep_timeout_secs, int, 0644); -MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout " - "in seconds (default=60)"); - -static int mpa_rev = 1; -module_param(mpa_rev, int, 0644); -MODULE_PARM_DESC(mpa_rev, "MPA Revision, 0 supports amso1100, " - "1 is spec compliant. (default=1)"); - -static int markers_enabled = 0; -module_param(markers_enabled, int, 0644); -MODULE_PARM_DESC(markers_enabled, "Enable MPA MARKERS (default(0)=disabled)"); - -static int crc_enabled = 1; -module_param(crc_enabled, int, 0644); -MODULE_PARM_DESC(crc_enabled, "Enable MPA CRC (default(1)=enabled)"); - -static int rcv_win = 256 * 1024; -module_param(rcv_win, int, 0644); -MODULE_PARM_DESC(rcv_win, "TCP receive window in bytes (default=256)"); - -static int snd_win = 32 * 1024; -module_param(snd_win, int, 0644); -MODULE_PARM_DESC(snd_win, "TCP send window in bytes (default=32KB)"); - -static unsigned int nocong = 0; -module_param(nocong, uint, 0644); -MODULE_PARM_DESC(nocong, "Turn off congestion control (default=0)"); - -static unsigned int cong_flavor = 1; -module_param(cong_flavor, uint, 0644); -MODULE_PARM_DESC(cong_flavor, "TCP Congestion control flavor (default=1)"); - -static struct workqueue_struct *workq; - -static struct sk_buff_head rxq; - -static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp); -static void ep_timeout(struct timer_list *t); -static void connect_reply_upcall(struct iwch_ep *ep, int status); - -static void start_ep_timer(struct iwch_ep *ep) -{ - pr_debug("%s ep %p\n", __func__, ep); - if (timer_pending(&ep->timer)) { - pr_debug("%s stopped / restarted timer ep %p\n", __func__, ep); - del_timer_sync(&ep->timer); - } else - get_ep(&ep->com); - ep->timer.expires = jiffies + ep_timeout_secs * HZ; - add_timer(&ep->timer); -} - -static void stop_ep_timer(struct iwch_ep *ep) -{ - pr_debug("%s ep %p\n", __func__, ep); - if (!timer_pending(&ep->timer)) { - WARN(1, "%s timer stopped when its not running! ep %p state %u\n", - __func__, ep, ep->com.state); - return; - } - del_timer_sync(&ep->timer); - put_ep(&ep->com); -} - -static int iwch_l2t_send(struct t3cdev *tdev, struct sk_buff *skb, struct l2t_entry *l2e) -{ - int error = 0; - struct cxio_rdev *rdev; - - rdev = (struct cxio_rdev *)tdev->ulp; - if (cxio_fatal_error(rdev)) { - kfree_skb(skb); - return -EIO; - } - error = l2t_send(tdev, skb, l2e); - if (error < 0) - kfree_skb(skb); - return error < 0 ? error : 0; -} - -int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb) -{ - int error = 0; - struct cxio_rdev *rdev; - - rdev = (struct cxio_rdev *)tdev->ulp; - if (cxio_fatal_error(rdev)) { - kfree_skb(skb); - return -EIO; - } - error = cxgb3_ofld_send(tdev, skb); - if (error < 0) - kfree_skb(skb); - return error < 0 ? error : 0; -} - -static void release_tid(struct t3cdev *tdev, u32 hwtid, struct sk_buff *skb) -{ - struct cpl_tid_release *req; - - skb = get_skb(skb, sizeof(*req), GFP_KERNEL); - if (!skb) - return; - req = skb_put(skb, sizeof(*req)); - req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); - OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, hwtid)); - skb->priority = CPL_PRIORITY_SETUP; - iwch_cxgb3_ofld_send(tdev, skb); - return; -} - -int iwch_quiesce_tid(struct iwch_ep *ep) -{ - struct cpl_set_tcb_field *req; - struct sk_buff *skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); - - if (!skb) - return -ENOMEM; - req = skb_put(skb, sizeof(*req)); - req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); - req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid)); - OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid)); - req->reply = 0; - req->cpu_idx = 0; - req->word = htons(W_TCB_RX_QUIESCE); - req->mask = cpu_to_be64(1ULL << S_TCB_RX_QUIESCE); - req->val = cpu_to_be64(1 << S_TCB_RX_QUIESCE); - - skb->priority = CPL_PRIORITY_DATA; - return iwch_cxgb3_ofld_send(ep->com.tdev, skb); -} - -int iwch_resume_tid(struct iwch_ep *ep) -{ - struct cpl_set_tcb_field *req; - struct sk_buff *skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); - - if (!skb) - return -ENOMEM; - req = skb_put(skb, sizeof(*req)); - req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); - req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid)); - OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid)); - req->reply = 0; - req->cpu_idx = 0; - req->word = htons(W_TCB_RX_QUIESCE); - req->mask = cpu_to_be64(1ULL << S_TCB_RX_QUIESCE); - req->val = 0; - - skb->priority = CPL_PRIORITY_DATA; - return iwch_cxgb3_ofld_send(ep->com.tdev, skb); -} - -static void set_emss(struct iwch_ep *ep, u16 opt) -{ - pr_debug("%s ep %p opt %u\n", __func__, ep, opt); - ep->emss = T3C_DATA(ep->com.tdev)->mtus[G_TCPOPT_MSS(opt)] - 40; - if (G_TCPOPT_TSTAMP(opt)) - ep->emss -= 12; - if (ep->emss < 128) - ep->emss = 128; - pr_debug("emss=%d\n", ep->emss); -} - -static enum iwch_ep_state state_read(struct iwch_ep_common *epc) -{ - unsigned long flags; - enum iwch_ep_state state; - - spin_lock_irqsave(&epc->lock, flags); - state = epc->state; - spin_unlock_irqrestore(&epc->lock, flags); - return state; -} - -static void __state_set(struct iwch_ep_common *epc, enum iwch_ep_state new) -{ - epc->state = new; -} - -static void state_set(struct iwch_ep_common *epc, enum iwch_ep_state new) -{ - unsigned long flags; - - spin_lock_irqsave(&epc->lock, flags); - pr_debug("%s - %s -> %s\n", __func__, states[epc->state], states[new]); - __state_set(epc, new); - spin_unlock_irqrestore(&epc->lock, flags); - return; -} - -static void *alloc_ep(int size, gfp_t gfp) -{ - struct iwch_ep_common *epc; - - epc = kzalloc(size, gfp); - if (epc) { - kref_init(&epc->kref); - spin_lock_init(&epc->lock); - init_waitqueue_head(&epc->waitq); - } - pr_debug("%s alloc ep %p\n", __func__, epc); - return epc; -} - -void __free_ep(struct kref *kref) -{ - struct iwch_ep *ep; - ep = container_of(container_of(kref, struct iwch_ep_common, kref), - struct iwch_ep, com); - pr_debug("%s ep %p state %s\n", - __func__, ep, states[state_read(&ep->com)]); - if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) { - cxgb3_remove_tid(ep->com.tdev, (void *)ep, ep->hwtid); - dst_release(ep->dst); - l2t_release(ep->com.tdev, ep->l2t); - } - kfree(ep); -} - -static void release_ep_resources(struct iwch_ep *ep) -{ - pr_debug("%s ep %p tid %d\n", __func__, ep, ep->hwtid); - set_bit(RELEASE_RESOURCES, &ep->com.flags); - put_ep(&ep->com); -} - -static int status2errno(int status) -{ - switch (status) { - case CPL_ERR_NONE: - return 0; - case CPL_ERR_CONN_RESET: - return -ECONNRESET; - case CPL_ERR_ARP_MISS: - return -EHOSTUNREACH; - case CPL_ERR_CONN_TIMEDOUT: - return -ETIMEDOUT; - case CPL_ERR_TCAM_FULL: - return -ENOMEM; - case CPL_ERR_CONN_EXIST: - return -EADDRINUSE; - default: - return -EIO; - } -} - -/* - * Try and reuse skbs already allocated... - */ -static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp) -{ - if (skb && !skb_is_nonlinear(skb) && !skb_cloned(skb)) { - skb_trim(skb, 0); - skb_get(skb); - } else { - skb = alloc_skb(len, gfp); - } - return skb; -} - -static struct rtable *find_route(struct t3cdev *dev, __be32 local_ip, - __be32 peer_ip, __be16 local_port, - __be16 peer_port, u8 tos) -{ - struct rtable *rt; - struct flowi4 fl4; - - rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip, local_ip, - peer_port, local_port, IPPROTO_TCP, - tos, 0); - if (IS_ERR(rt)) - return NULL; - return rt; -} - -static unsigned int find_best_mtu(const struct t3c_data *d, unsigned short mtu) -{ - int i = 0; - - while (i < d->nmtus - 1 && d->mtus[i + 1] <= mtu) - ++i; - return i; -} - -static void arp_failure_discard(struct t3cdev *dev, struct sk_buff *skb) -{ - pr_debug("%s t3cdev %p\n", __func__, dev); - kfree_skb(skb); -} - -/* - * Handle an ARP failure for an active open. - */ -static void act_open_req_arp_failure(struct t3cdev *dev, struct sk_buff *skb) -{ - pr_err("ARP failure during connect\n"); - kfree_skb(skb); -} - -/* - * Handle an ARP failure for a CPL_ABORT_REQ. Change it into a no RST variant - * and send it along. - */ -static void abort_arp_failure(struct t3cdev *dev, struct sk_buff *skb) -{ - struct cpl_abort_req *req = cplhdr(skb); - - pr_debug("%s t3cdev %p\n", __func__, dev); - req->cmd = CPL_ABORT_NO_RST; - iwch_cxgb3_ofld_send(dev, skb); -} - -static int send_halfclose(struct iwch_ep *ep, gfp_t gfp) -{ - struct cpl_close_con_req *req; - struct sk_buff *skb; - - pr_debug("%s ep %p\n", __func__, ep); - skb = get_skb(NULL, sizeof(*req), gfp); - if (!skb) { - pr_err("%s - failed to alloc skb\n", __func__); - return -ENOMEM; - } - skb->priority = CPL_PRIORITY_DATA; - set_arp_failure_handler(skb, arp_failure_discard); - req = skb_put(skb, sizeof(*req)); - req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_CLOSE_CON)); - req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid)); - OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, ep->hwtid)); - return iwch_l2t_send(ep->com.tdev, skb, ep->l2t); -} - -static int send_abort(struct iwch_ep *ep, struct sk_buff *skb, gfp_t gfp) -{ - struct cpl_abort_req *req; - - pr_debug("%s ep %p\n", __func__, ep); - skb = get_skb(skb, sizeof(*req), gfp); - if (!skb) { - pr_err("%s - failed to alloc skb\n", __func__); - return -ENOMEM; - } - skb->priority = CPL_PRIORITY_DATA; - set_arp_failure_handler(skb, abort_arp_failure); - req = skb_put_zero(skb, sizeof(*req)); - req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ)); - req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid)); - OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid)); - req->cmd = CPL_ABORT_SEND_RST; - return iwch_l2t_send(ep->com.tdev, skb, ep->l2t); -} - -static int send_connect(struct iwch_ep *ep) -{ - struct cpl_act_open_req *req; - struct sk_buff *skb; - u32 opt0h, opt0l, opt2; - unsigned int mtu_idx; - int wscale; - - pr_debug("%s ep %p\n", __func__, ep); - - skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); - if (!skb) { - pr_err("%s - failed to alloc skb\n", __func__); - return -ENOMEM; - } - mtu_idx = find_best_mtu(T3C_DATA(ep->com.tdev), dst_mtu(ep->dst)); - wscale = compute_wscale(rcv_win); - opt0h = V_NAGLE(0) | - V_NO_CONG(nocong) | - V_KEEP_ALIVE(1) | - F_TCAM_BYPASS | - V_WND_SCALE(wscale) | - V_MSS_IDX(mtu_idx) | - V_L2T_IDX(ep->l2t->idx) | V_TX_CHANNEL(ep->l2t->smt_idx); - opt0l = V_TOS((ep->tos >> 2) & M_TOS) | V_RCV_BUFSIZ(rcv_win>>10); - opt2 = F_RX_COALESCE_VALID | V_RX_COALESCE(0) | V_FLAVORS_VALID(1) | - V_CONG_CONTROL_FLAVOR(cong_flavor); - skb->priority = CPL_PRIORITY_SETUP; - set_arp_failure_handler(skb, act_open_req_arp_failure); - - req = skb_put(skb, sizeof(*req)); - req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); - OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, ep->atid)); - req->local_port = ep->com.local_addr.sin_port; - req->peer_port = ep->com.remote_addr.sin_port; - req->local_ip = ep->com.local_addr.sin_addr.s_addr; - req->peer_ip = ep->com.remote_addr.sin_addr.s_addr; - req->opt0h = htonl(opt0h); - req->opt0l = htonl(opt0l); - req->params = 0; - req->opt2 = htonl(opt2); - return iwch_l2t_send(ep->com.tdev, skb, ep->l2t); -} - -static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb) -{ - int mpalen; - struct tx_data_wr *req; - struct mpa_message *mpa; - int len; - - pr_debug("%s ep %p pd_len %d\n", __func__, ep, ep->plen); - - BUG_ON(skb_cloned(skb)); - - mpalen = sizeof(*mpa) + ep->plen; - if (skb->data + mpalen + sizeof(*req) > skb_end_pointer(skb)) { - kfree_skb(skb); - skb=alloc_skb(mpalen + sizeof(*req), GFP_KERNEL); - if (!skb) { - connect_reply_upcall(ep, -ENOMEM); - return; - } - } - skb_trim(skb, 0); - skb_reserve(skb, sizeof(*req)); - skb_put(skb, mpalen); - skb->priority = CPL_PRIORITY_DATA; - mpa = (struct mpa_message *) skb->data; - memset(mpa, 0, sizeof(*mpa)); - memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key)); - mpa->flags = (crc_enabled ? MPA_CRC : 0) | - (markers_enabled ? MPA_MARKERS : 0); - mpa->private_data_size = htons(ep->plen); - mpa->revision = mpa_rev; - - if (ep->plen) - memcpy(mpa->private_data, ep->mpa_pkt + sizeof(*mpa), ep->plen); - - /* - * Reference the mpa skb. This ensures the data area - * will remain in memory until the hw acks the tx. - * Function tx_ack() will deref it. - */ - skb_get(skb); - set_arp_failure_handler(skb, arp_failure_discard); - skb_reset_transport_header(skb); - len = skb->len; - req = skb_push(skb, sizeof(*req)); - req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL); - req->wr_lo = htonl(V_WR_TID(ep->hwtid)); - req->len = htonl(len); - req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) | - V_TX_SNDBUF(snd_win>>15)); - req->flags = htonl(F_TX_INIT); - req->sndseq = htonl(ep->snd_seq); - BUG_ON(ep->mpa_skb); - ep->mpa_skb = skb; - iwch_l2t_send(ep->com.tdev, skb, ep->l2t); - start_ep_timer(ep); - state_set(&ep->com, MPA_REQ_SENT); - return; -} - -static int send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen) -{ - int mpalen; - struct tx_data_wr *req; - struct mpa_message *mpa; - struct sk_buff *skb; - - pr_debug("%s ep %p plen %d\n", __func__, ep, plen); - - mpalen = sizeof(*mpa) + plen; - - skb = get_skb(NULL, mpalen + sizeof(*req), GFP_KERNEL); - if (!skb) { - pr_err("%s - cannot alloc skb!\n", __func__); - return -ENOMEM; - } - skb_reserve(skb, sizeof(*req)); - mpa = skb_put(skb, mpalen); - memset(mpa, 0, sizeof(*mpa)); - memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); - mpa->flags = MPA_REJECT; - mpa->revision = mpa_rev; - mpa->private_data_size = htons(plen); - if (plen) - memcpy(mpa->private_data, pdata, plen); - - /* - * Reference the mpa skb again. This ensures the data area - * will remain in memory until the hw acks the tx. - * Function tx_ack() will deref it. - */ - skb_get(skb); - skb->priority = CPL_PRIORITY_DATA; - set_arp_failure_handler(skb, arp_failure_discard); - skb_reset_transport_header(skb); - req = skb_push(skb, sizeof(*req)); - req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL); - req->wr_lo = htonl(V_WR_TID(ep->hwtid)); - req->len = htonl(mpalen); - req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) | - V_TX_SNDBUF(snd_win>>15)); - req->flags = htonl(F_TX_INIT); - req->sndseq = htonl(ep->snd_seq); - BUG_ON(ep->mpa_skb); - ep->mpa_skb = skb; - return iwch_l2t_send(ep->com.tdev, skb, ep->l2t); -} - -static int send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen) -{ - int mpalen; - struct tx_data_wr *req; - struct mpa_message *mpa; - int len; - struct sk_buff *skb; - - pr_debug("%s ep %p plen %d\n", __func__, ep, plen); - - mpalen = sizeof(*mpa) + plen; - - skb = get_skb(NULL, mpalen + sizeof(*req), GFP_KERNEL); - if (!skb) { - pr_err("%s - cannot alloc skb!\n", __func__); - return -ENOMEM; - } - skb->priority = CPL_PRIORITY_DATA; - skb_reserve(skb, sizeof(*req)); - mpa = skb_put(skb, mpalen); - memset(mpa, 0, sizeof(*mpa)); - memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); - mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) | - (markers_enabled ? MPA_MARKERS : 0); - mpa->revision = mpa_rev; - mpa->private_data_size = htons(plen); - if (plen) - memcpy(mpa->private_data, pdata, plen); - - /* - * Reference the mpa skb. This ensures the data area - * will remain in memory until the hw acks the tx. - * Function tx_ack() will deref it. - */ - skb_get(skb); - set_arp_failure_handler(skb, arp_failure_discard); - skb_reset_transport_header(skb); - len = skb->len; - req = skb_push(skb, sizeof(*req)); - req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL); - req->wr_lo = htonl(V_WR_TID(ep->hwtid)); - req->len = htonl(len); - req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) | - V_TX_SNDBUF(snd_win>>15)); - req->flags = htonl(F_TX_INIT); - req->sndseq = htonl(ep->snd_seq); - ep->mpa_skb = skb; - state_set(&ep->com, MPA_REP_SENT); - return iwch_l2t_send(ep->com.tdev, skb, ep->l2t); -} - -static int act_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_ep *ep = ctx; - struct cpl_act_establish *req = cplhdr(skb); - unsigned int tid = GET_TID(req); - - pr_debug("%s ep %p tid %d\n", __func__, ep, tid); - - dst_confirm(ep->dst); - - /* setup the hwtid for this connection */ - ep->hwtid = tid; - cxgb3_insert_tid(ep->com.tdev, &t3c_client, ep, tid); - - ep->snd_seq = ntohl(req->snd_isn); - ep->rcv_seq = ntohl(req->rcv_isn); - - set_emss(ep, ntohs(req->tcp_opt)); - - /* dealloc the atid */ - cxgb3_free_atid(ep->com.tdev, ep->atid); - - /* start MPA negotiation */ - send_mpa_req(ep, skb); - - return 0; -} - -static void abort_connection(struct iwch_ep *ep, struct sk_buff *skb, gfp_t gfp) -{ - pr_debug("%s ep %p\n", __FILE__, ep); - state_set(&ep->com, ABORTING); - send_abort(ep, skb, gfp); -} - -static void close_complete_upcall(struct iwch_ep *ep) -{ - struct iw_cm_event event; - - pr_debug("%s ep %p\n", __func__, ep); - memset(&event, 0, sizeof(event)); - event.event = IW_CM_EVENT_CLOSE; - if (ep->com.cm_id) { - pr_debug("close complete delivered ep %p cm_id %p tid %d\n", - ep, ep->com.cm_id, ep->hwtid); - ep->com.cm_id->event_handler(ep->com.cm_id, &event); - ep->com.cm_id->rem_ref(ep->com.cm_id); - ep->com.cm_id = NULL; - ep->com.qp = NULL; - } -} - -static void peer_close_upcall(struct iwch_ep *ep) -{ - struct iw_cm_event event; - - pr_debug("%s ep %p\n", __func__, ep); - memset(&event, 0, sizeof(event)); - event.event = IW_CM_EVENT_DISCONNECT; - if (ep->com.cm_id) { - pr_debug("peer close delivered ep %p cm_id %p tid %d\n", - ep, ep->com.cm_id, ep->hwtid); - ep->com.cm_id->event_handler(ep->com.cm_id, &event); - } -} - -static void peer_abort_upcall(struct iwch_ep *ep) -{ - struct iw_cm_event event; - - pr_debug("%s ep %p\n", __func__, ep); - memset(&event, 0, sizeof(event)); - event.event = IW_CM_EVENT_CLOSE; - event.status = -ECONNRESET; - if (ep->com.cm_id) { - pr_debug("abort delivered ep %p cm_id %p tid %d\n", ep, - ep->com.cm_id, ep->hwtid); - ep->com.cm_id->event_handler(ep->com.cm_id, &event); - ep->com.cm_id->rem_ref(ep->com.cm_id); - ep->com.cm_id = NULL; - ep->com.qp = NULL; - } -} - -static void connect_reply_upcall(struct iwch_ep *ep, int status) -{ - struct iw_cm_event event; - - pr_debug("%s ep %p status %d\n", __func__, ep, status); - memset(&event, 0, sizeof(event)); - event.event = IW_CM_EVENT_CONNECT_REPLY; - event.status = status; - memcpy(&event.local_addr, &ep->com.local_addr, - sizeof(ep->com.local_addr)); - memcpy(&event.remote_addr, &ep->com.remote_addr, - sizeof(ep->com.remote_addr)); - - if ((status == 0) || (status == -ECONNREFUSED)) { - event.private_data_len = ep->plen; - event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); - } - if (ep->com.cm_id) { - pr_debug("%s ep %p tid %d status %d\n", __func__, ep, - ep->hwtid, status); - ep->com.cm_id->event_handler(ep->com.cm_id, &event); - } - if (status < 0) { - ep->com.cm_id->rem_ref(ep->com.cm_id); - ep->com.cm_id = NULL; - ep->com.qp = NULL; - } -} - -static void connect_request_upcall(struct iwch_ep *ep) -{ - struct iw_cm_event event; - - pr_debug("%s ep %p tid %d\n", __func__, ep, ep->hwtid); - memset(&event, 0, sizeof(event)); - event.event = IW_CM_EVENT_CONNECT_REQUEST; - memcpy(&event.local_addr, &ep->com.local_addr, - sizeof(ep->com.local_addr)); - memcpy(&event.remote_addr, &ep->com.remote_addr, - sizeof(ep->com.local_addr)); - event.private_data_len = ep->plen; - event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); - event.provider_data = ep; - /* - * Until ird/ord negotiation via MPAv2 support is added, send max - * supported values - */ - event.ird = event.ord = 8; - if (state_read(&ep->parent_ep->com) != DEAD) { - get_ep(&ep->com); - ep->parent_ep->com.cm_id->event_handler( - ep->parent_ep->com.cm_id, - &event); - } - put_ep(&ep->parent_ep->com); - ep->parent_ep = NULL; -} - -static void established_upcall(struct iwch_ep *ep) -{ - struct iw_cm_event event; - - pr_debug("%s ep %p\n", __func__, ep); - memset(&event, 0, sizeof(event)); - event.event = IW_CM_EVENT_ESTABLISHED; - /* - * Until ird/ord negotiation via MPAv2 support is added, send max - * supported values - */ - event.ird = event.ord = 8; - if (ep->com.cm_id) { - pr_debug("%s ep %p tid %d\n", __func__, ep, ep->hwtid); - ep->com.cm_id->event_handler(ep->com.cm_id, &event); - } -} - -static int update_rx_credits(struct iwch_ep *ep, u32 credits) -{ - struct cpl_rx_data_ack *req; - struct sk_buff *skb; - - pr_debug("%s ep %p credits %u\n", __func__, ep, credits); - skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); - if (!skb) { - pr_err("update_rx_credits - cannot alloc skb!\n"); - return 0; - } - - req = skb_put(skb, sizeof(*req)); - req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); - OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, ep->hwtid)); - req->credit_dack = htonl(V_RX_CREDITS(credits) | V_RX_FORCE_ACK(1)); - skb->priority = CPL_PRIORITY_ACK; - iwch_cxgb3_ofld_send(ep->com.tdev, skb); - return credits; -} - -static void process_mpa_reply(struct iwch_ep *ep, struct sk_buff *skb) -{ - struct mpa_message *mpa; - u16 plen; - struct iwch_qp_attributes attrs; - enum iwch_qp_attr_mask mask; - int err; - - pr_debug("%s ep %p\n", __func__, ep); - - /* - * Stop mpa timer. If it expired, then the state has - * changed and we bail since ep_timeout already aborted - * the connection. - */ - stop_ep_timer(ep); - if (state_read(&ep->com) != MPA_REQ_SENT) - return; - - /* - * If we get more than the supported amount of private data - * then we must fail this connection. - */ - if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) { - err = -EINVAL; - goto err; - } - - /* - * copy the new data into our accumulation buffer. - */ - skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]), - skb->len); - ep->mpa_pkt_len += skb->len; - - /* - * if we don't even have the mpa message, then bail. - */ - if (ep->mpa_pkt_len < sizeof(*mpa)) - return; - mpa = (struct mpa_message *) ep->mpa_pkt; - - /* Validate MPA header. */ - if (mpa->revision != mpa_rev) { - err = -EPROTO; - goto err; - } - if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) { - err = -EPROTO; - goto err; - } - - plen = ntohs(mpa->private_data_size); - - /* - * Fail if there's too much private data. - */ - if (plen > MPA_MAX_PRIVATE_DATA) { - err = -EPROTO; - goto err; - } - - /* - * If plen does not account for pkt size - */ - if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) { - err = -EPROTO; - goto err; - } - - ep->plen = (u8) plen; - - /* - * If we don't have all the pdata yet, then bail. - * We'll continue process when more data arrives. - */ - if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) - return; - - if (mpa->flags & MPA_REJECT) { - err = -ECONNREFUSED; - goto err; - } - - /* - * If we get here we have accumulated the entire mpa - * start reply message including private data. And - * the MPA header is valid. - */ - state_set(&ep->com, FPDU_MODE); - ep->mpa_attr.initiator = 1; - ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; - ep->mpa_attr.recv_marker_enabled = markers_enabled; - ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; - ep->mpa_attr.version = mpa_rev; - pr_debug("%s - crc_enabled=%d, recv_marker_enabled=%d, xmit_marker_enabled=%d, version=%d\n", - __func__, - ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled, - ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version); - - attrs.mpa_attr = ep->mpa_attr; - attrs.max_ird = ep->ird; - attrs.max_ord = ep->ord; - attrs.llp_stream_handle = ep; - attrs.next_state = IWCH_QP_STATE_RTS; - - mask = IWCH_QP_ATTR_NEXT_STATE | - IWCH_QP_ATTR_LLP_STREAM_HANDLE | IWCH_QP_ATTR_MPA_ATTR | - IWCH_QP_ATTR_MAX_IRD | IWCH_QP_ATTR_MAX_ORD; - - /* bind QP and TID with INIT_WR */ - err = iwch_modify_qp(ep->com.qp->rhp, - ep->com.qp, mask, &attrs, 1); - if (err) - goto err; - - if (peer2peer && iwch_rqes_posted(ep->com.qp) == 0) { - iwch_post_zb_read(ep); - } - - goto out; -err: - abort_connection(ep, skb, GFP_KERNEL); -out: - connect_reply_upcall(ep, err); - return; -} - -static void process_mpa_request(struct iwch_ep *ep, struct sk_buff *skb) -{ - struct mpa_message *mpa; - u16 plen; - - pr_debug("%s ep %p\n", __func__, ep); - - /* - * Stop mpa timer. If it expired, then the state has - * changed and we bail since ep_timeout already aborted - * the connection. - */ - stop_ep_timer(ep); - if (state_read(&ep->com) != MPA_REQ_WAIT) - return; - - /* - * If we get more than the supported amount of private data - * then we must fail this connection. - */ - if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) { - abort_connection(ep, skb, GFP_KERNEL); - return; - } - - pr_debug("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__); - - /* - * Copy the new data into our accumulation buffer. - */ - skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]), - skb->len); - ep->mpa_pkt_len += skb->len; - - /* - * If we don't even have the mpa message, then bail. - * We'll continue process when more data arrives. - */ - if (ep->mpa_pkt_len < sizeof(*mpa)) - return; - pr_debug("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__); - mpa = (struct mpa_message *) ep->mpa_pkt; - - /* - * Validate MPA Header. - */ - if (mpa->revision != mpa_rev) { - abort_connection(ep, skb, GFP_KERNEL); - return; - } - - if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) { - abort_connection(ep, skb, GFP_KERNEL); - return; - } - - plen = ntohs(mpa->private_data_size); - - /* - * Fail if there's too much private data. - */ - if (plen > MPA_MAX_PRIVATE_DATA) { - abort_connection(ep, skb, GFP_KERNEL); - return; - } - - /* - * If plen does not account for pkt size - */ - if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) { - abort_connection(ep, skb, GFP_KERNEL); - return; - } - ep->plen = (u8) plen; - - /* - * If we don't have all the pdata yet, then bail. - */ - if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) - return; - - /* - * If we get here we have accumulated the entire mpa - * start reply message including private data. - */ - ep->mpa_attr.initiator = 0; - ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; - ep->mpa_attr.recv_marker_enabled = markers_enabled; - ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; - ep->mpa_attr.version = mpa_rev; - pr_debug("%s - crc_enabled=%d, recv_marker_enabled=%d, xmit_marker_enabled=%d, version=%d\n", - __func__, - ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled, - ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version); - - state_set(&ep->com, MPA_REQ_RCVD); - - /* drive upcall */ - connect_request_upcall(ep); - return; -} - -static int rx_data(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_ep *ep = ctx; - struct cpl_rx_data *hdr = cplhdr(skb); - unsigned int dlen = ntohs(hdr->len); - - pr_debug("%s ep %p dlen %u\n", __func__, ep, dlen); - - skb_pull(skb, sizeof(*hdr)); - skb_trim(skb, dlen); - - ep->rcv_seq += dlen; - BUG_ON(ep->rcv_seq != (ntohl(hdr->seq) + dlen)); - - switch (state_read(&ep->com)) { - case MPA_REQ_SENT: - process_mpa_reply(ep, skb); - break; - case MPA_REQ_WAIT: - process_mpa_request(ep, skb); - break; - case MPA_REP_SENT: - break; - default: - pr_err("%s Unexpected streaming data. ep %p state %d tid %d\n", - __func__, ep, state_read(&ep->com), ep->hwtid); - - /* - * The ep will timeout and inform the ULP of the failure. - * See ep_timeout(). - */ - break; - } - - /* update RX credits */ - update_rx_credits(ep, dlen); - - return CPL_RET_BUF_DONE; -} - -/* - * Upcall from the adapter indicating data has been transmitted. - * For us its just the single MPA request or reply. We can now free - * the skb holding the mpa message. - */ -static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_ep *ep = ctx; - struct cpl_wr_ack *hdr = cplhdr(skb); - unsigned int credits = ntohs(hdr->credits); - unsigned long flags; - int post_zb = 0; - - pr_debug("%s ep %p credits %u\n", __func__, ep, credits); - - if (credits == 0) { - pr_debug("%s 0 credit ack ep %p state %u\n", - __func__, ep, state_read(&ep->com)); - return CPL_RET_BUF_DONE; - } - - spin_lock_irqsave(&ep->com.lock, flags); - BUG_ON(credits != 1); - dst_confirm(ep->dst); - if (!ep->mpa_skb) { - pr_debug("%s rdma_init wr_ack ep %p state %u\n", - __func__, ep, ep->com.state); - if (ep->mpa_attr.initiator) { - pr_debug("%s initiator ep %p state %u\n", - __func__, ep, ep->com.state); - if (peer2peer && ep->com.state == FPDU_MODE) - post_zb = 1; - } else { - pr_debug("%s responder ep %p state %u\n", - __func__, ep, ep->com.state); - if (ep->com.state == MPA_REQ_RCVD) { - ep->com.rpl_done = 1; - wake_up(&ep->com.waitq); - } - } - } else { - pr_debug("%s lsm ack ep %p state %u freeing skb\n", - __func__, ep, ep->com.state); - kfree_skb(ep->mpa_skb); - ep->mpa_skb = NULL; - } - spin_unlock_irqrestore(&ep->com.lock, flags); - if (post_zb) - iwch_post_zb_read(ep); - return CPL_RET_BUF_DONE; -} - -static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_ep *ep = ctx; - unsigned long flags; - int release = 0; - - pr_debug("%s ep %p\n", __func__, ep); - BUG_ON(!ep); - - /* - * We get 2 abort replies from the HW. The first one must - * be ignored except for scribbling that we need one more. - */ - if (!test_and_set_bit(ABORT_REQ_IN_PROGRESS, &ep->com.flags)) { - return CPL_RET_BUF_DONE; - } - - spin_lock_irqsave(&ep->com.lock, flags); - switch (ep->com.state) { - case ABORTING: - close_complete_upcall(ep); - __state_set(&ep->com, DEAD); - release = 1; - break; - default: - pr_err("%s ep %p state %d\n", __func__, ep, ep->com.state); - break; - } - spin_unlock_irqrestore(&ep->com.lock, flags); - - if (release) - release_ep_resources(ep); - return CPL_RET_BUF_DONE; -} - -/* - * Return whether a failed active open has allocated a TID - */ -static inline int act_open_has_tid(int status) -{ - return status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST && - status != CPL_ERR_ARP_MISS; -} - -static int act_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_ep *ep = ctx; - struct cpl_act_open_rpl *rpl = cplhdr(skb); - - pr_debug("%s ep %p status %u errno %d\n", __func__, ep, rpl->status, - status2errno(rpl->status)); - connect_reply_upcall(ep, status2errno(rpl->status)); - state_set(&ep->com, DEAD); - if (ep->com.tdev->type != T3A && act_open_has_tid(rpl->status)) - release_tid(ep->com.tdev, GET_TID(rpl), NULL); - cxgb3_free_atid(ep->com.tdev, ep->atid); - dst_release(ep->dst); - l2t_release(ep->com.tdev, ep->l2t); - put_ep(&ep->com); - return CPL_RET_BUF_DONE; -} - -static int listen_start(struct iwch_listen_ep *ep) -{ - struct sk_buff *skb; - struct cpl_pass_open_req *req; - - pr_debug("%s ep %p\n", __func__, ep); - skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); - if (!skb) { - pr_err("t3c_listen_start failed to alloc skb!\n"); - return -ENOMEM; - } - - req = skb_put(skb, sizeof(*req)); - req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); - OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, ep->stid)); - req->local_port = ep->com.local_addr.sin_port; - req->local_ip = ep->com.local_addr.sin_addr.s_addr; - req->peer_port = 0; - req->peer_ip = 0; - req->peer_netmask = 0; - req->opt0h = htonl(F_DELACK | F_TCAM_BYPASS); - req->opt0l = htonl(V_RCV_BUFSIZ(rcv_win>>10)); - req->opt1 = htonl(V_CONN_POLICY(CPL_CONN_POLICY_ASK)); - - skb->priority = 1; - return iwch_cxgb3_ofld_send(ep->com.tdev, skb); -} - -static int pass_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_listen_ep *ep = ctx; - struct cpl_pass_open_rpl *rpl = cplhdr(skb); - - pr_debug("%s ep %p status %d error %d\n", __func__, ep, - rpl->status, status2errno(rpl->status)); - ep->com.rpl_err = status2errno(rpl->status); - ep->com.rpl_done = 1; - wake_up(&ep->com.waitq); - - return CPL_RET_BUF_DONE; -} - -static int listen_stop(struct iwch_listen_ep *ep) -{ - struct sk_buff *skb; - struct cpl_close_listserv_req *req; - - pr_debug("%s ep %p\n", __func__, ep); - skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); - if (!skb) { - pr_err("%s - failed to alloc skb\n", __func__); - return -ENOMEM; - } - req = skb_put(skb, sizeof(*req)); - req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); - req->cpu_idx = 0; - OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ, ep->stid)); - skb->priority = 1; - return iwch_cxgb3_ofld_send(ep->com.tdev, skb); -} - -static int close_listsrv_rpl(struct t3cdev *tdev, struct sk_buff *skb, - void *ctx) -{ - struct iwch_listen_ep *ep = ctx; - struct cpl_close_listserv_rpl *rpl = cplhdr(skb); - - pr_debug("%s ep %p\n", __func__, ep); - ep->com.rpl_err = status2errno(rpl->status); - ep->com.rpl_done = 1; - wake_up(&ep->com.waitq); - return CPL_RET_BUF_DONE; -} - -static void accept_cr(struct iwch_ep *ep, __be32 peer_ip, struct sk_buff *skb) -{ - struct cpl_pass_accept_rpl *rpl; - unsigned int mtu_idx; - u32 opt0h, opt0l, opt2; - int wscale; - - pr_debug("%s ep %p\n", __func__, ep); - BUG_ON(skb_cloned(skb)); - skb_trim(skb, sizeof(*rpl)); - skb_get(skb); - mtu_idx = find_best_mtu(T3C_DATA(ep->com.tdev), dst_mtu(ep->dst)); - wscale = compute_wscale(rcv_win); - opt0h = V_NAGLE(0) | - V_NO_CONG(nocong) | - V_KEEP_ALIVE(1) | - F_TCAM_BYPASS | - V_WND_SCALE(wscale) | - V_MSS_IDX(mtu_idx) | - V_L2T_IDX(ep->l2t->idx) | V_TX_CHANNEL(ep->l2t->smt_idx); - opt0l = V_TOS((ep->tos >> 2) & M_TOS) | V_RCV_BUFSIZ(rcv_win>>10); - opt2 = F_RX_COALESCE_VALID | V_RX_COALESCE(0) | V_FLAVORS_VALID(1) | - V_CONG_CONTROL_FLAVOR(cong_flavor); - - rpl = cplhdr(skb); - rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); - OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, ep->hwtid)); - rpl->peer_ip = peer_ip; - rpl->opt0h = htonl(opt0h); - rpl->opt0l_status = htonl(opt0l | CPL_PASS_OPEN_ACCEPT); - rpl->opt2 = htonl(opt2); - rpl->rsvd = rpl->opt2; /* workaround for HW bug */ - skb->priority = CPL_PRIORITY_SETUP; - iwch_l2t_send(ep->com.tdev, skb, ep->l2t); - - return; -} - -static void reject_cr(struct t3cdev *tdev, u32 hwtid, __be32 peer_ip, - struct sk_buff *skb) -{ - pr_debug("%s t3cdev %p tid %u peer_ip %x\n", __func__, tdev, hwtid, - peer_ip); - BUG_ON(skb_cloned(skb)); - skb_trim(skb, sizeof(struct cpl_tid_release)); - skb_get(skb); - - if (tdev->type != T3A) - release_tid(tdev, hwtid, skb); - else { - struct cpl_pass_accept_rpl *rpl; - - rpl = cplhdr(skb); - skb->priority = CPL_PRIORITY_SETUP; - rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); - OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, - hwtid)); - rpl->peer_ip = peer_ip; - rpl->opt0h = htonl(F_TCAM_BYPASS); - rpl->opt0l_status = htonl(CPL_PASS_OPEN_REJECT); - rpl->opt2 = 0; - rpl->rsvd = rpl->opt2; - iwch_cxgb3_ofld_send(tdev, skb); - } -} - -static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_ep *child_ep, *parent_ep = ctx; - struct cpl_pass_accept_req *req = cplhdr(skb); - unsigned int hwtid = GET_TID(req); - struct dst_entry *dst; - struct l2t_entry *l2t; - struct rtable *rt; - struct iff_mac tim; - - pr_debug("%s parent ep %p tid %u\n", __func__, parent_ep, hwtid); - - if (state_read(&parent_ep->com) != LISTEN) { - pr_err("%s - listening ep not in LISTEN\n", __func__); - goto reject; - } - - /* - * Find the netdev for this connection request. - */ - tim.mac_addr = req->dst_mac; - tim.vlan_tag = ntohs(req->vlan_tag); - if (tdev->ctl(tdev, GET_IFF_FROM_MAC, &tim) < 0 || !tim.dev) { - pr_err("%s bad dst mac %pM\n", __func__, req->dst_mac); - goto reject; - } - - /* Find output route */ - rt = find_route(tdev, - req->local_ip, - req->peer_ip, - req->local_port, - req->peer_port, G_PASS_OPEN_TOS(ntohl(req->tos_tid))); - if (!rt) { - pr_err("%s - failed to find dst entry!\n", __func__); - goto reject; - } - dst = &rt->dst; - l2t = t3_l2t_get(tdev, dst, NULL, &req->peer_ip); - if (!l2t) { - pr_err("%s - failed to allocate l2t entry!\n", __func__); - dst_release(dst); - goto reject; - } - child_ep = alloc_ep(sizeof(*child_ep), GFP_KERNEL); - if (!child_ep) { - pr_err("%s - failed to allocate ep entry!\n", __func__); - l2t_release(tdev, l2t); - dst_release(dst); - goto reject; - } - state_set(&child_ep->com, CONNECTING); - child_ep->com.tdev = tdev; - child_ep->com.cm_id = NULL; - child_ep->com.local_addr.sin_family = AF_INET; - child_ep->com.local_addr.sin_port = req->local_port; - child_ep->com.local_addr.sin_addr.s_addr = req->local_ip; - child_ep->com.remote_addr.sin_family = AF_INET; - child_ep->com.remote_addr.sin_port = req->peer_port; - child_ep->com.remote_addr.sin_addr.s_addr = req->peer_ip; - get_ep(&parent_ep->com); - child_ep->parent_ep = parent_ep; - child_ep->tos = G_PASS_OPEN_TOS(ntohl(req->tos_tid)); - child_ep->l2t = l2t; - child_ep->dst = dst; - child_ep->hwtid = hwtid; - timer_setup(&child_ep->timer, ep_timeout, 0); - cxgb3_insert_tid(tdev, &t3c_client, child_ep, hwtid); - accept_cr(child_ep, req->peer_ip, skb); - goto out; -reject: - reject_cr(tdev, hwtid, req->peer_ip, skb); -out: - return CPL_RET_BUF_DONE; -} - -static int pass_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_ep *ep = ctx; - struct cpl_pass_establish *req = cplhdr(skb); - - pr_debug("%s ep %p\n", __func__, ep); - ep->snd_seq = ntohl(req->snd_isn); - ep->rcv_seq = ntohl(req->rcv_isn); - - set_emss(ep, ntohs(req->tcp_opt)); - - dst_confirm(ep->dst); - state_set(&ep->com, MPA_REQ_WAIT); - start_ep_timer(ep); - - return CPL_RET_BUF_DONE; -} - -static int peer_close(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_ep *ep = ctx; - struct iwch_qp_attributes attrs; - unsigned long flags; - int disconnect = 1; - int release = 0; - - pr_debug("%s ep %p\n", __func__, ep); - dst_confirm(ep->dst); - - spin_lock_irqsave(&ep->com.lock, flags); - switch (ep->com.state) { - case MPA_REQ_WAIT: - __state_set(&ep->com, CLOSING); - break; - case MPA_REQ_SENT: - __state_set(&ep->com, CLOSING); - connect_reply_upcall(ep, -ECONNRESET); - break; - case MPA_REQ_RCVD: - - /* - * We're gonna mark this puppy DEAD, but keep - * the reference on it until the ULP accepts or - * rejects the CR. Also wake up anyone waiting - * in rdma connection migration (see iwch_accept_cr()). - */ - __state_set(&ep->com, CLOSING); - ep->com.rpl_done = 1; - ep->com.rpl_err = -ECONNRESET; - pr_debug("waking up ep %p\n", ep); - wake_up(&ep->com.waitq); - break; - case MPA_REP_SENT: - __state_set(&ep->com, CLOSING); - ep->com.rpl_done = 1; - ep->com.rpl_err = -ECONNRESET; - pr_debug("waking up ep %p\n", ep); - wake_up(&ep->com.waitq); - break; - case FPDU_MODE: - start_ep_timer(ep); - __state_set(&ep->com, CLOSING); - attrs.next_state = IWCH_QP_STATE_CLOSING; - iwch_modify_qp(ep->com.qp->rhp, ep->com.qp, - IWCH_QP_ATTR_NEXT_STATE, &attrs, 1); - peer_close_upcall(ep); - break; - case ABORTING: - disconnect = 0; - break; - case CLOSING: - __state_set(&ep->com, MORIBUND); - disconnect = 0; - break; - case MORIBUND: - stop_ep_timer(ep); - if (ep->com.cm_id && ep->com.qp) { - attrs.next_state = IWCH_QP_STATE_IDLE; - iwch_modify_qp(ep->com.qp->rhp, ep->com.qp, - IWCH_QP_ATTR_NEXT_STATE, &attrs, 1); - } - close_complete_upcall(ep); - __state_set(&ep->com, DEAD); - release = 1; - disconnect = 0; - break; - case DEAD: - disconnect = 0; - break; - default: - BUG_ON(1); - } - spin_unlock_irqrestore(&ep->com.lock, flags); - if (disconnect) - iwch_ep_disconnect(ep, 0, GFP_KERNEL); - if (release) - release_ep_resources(ep); - return CPL_RET_BUF_DONE; -} - -/* - * Returns whether an ABORT_REQ_RSS message is a negative advice. - */ -static int is_neg_adv_abort(unsigned int status) -{ - return status == CPL_ERR_RTX_NEG_ADVICE || - status == CPL_ERR_PERSIST_NEG_ADVICE; -} - -static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct cpl_abort_req_rss *req = cplhdr(skb); - struct iwch_ep *ep = ctx; - struct cpl_abort_rpl *rpl; - struct sk_buff *rpl_skb; - struct iwch_qp_attributes attrs; - int ret; - int release = 0; - unsigned long flags; - - if (is_neg_adv_abort(req->status)) { - pr_debug("%s neg_adv_abort ep %p tid %d\n", __func__, ep, - ep->hwtid); - t3_l2t_send_event(ep->com.tdev, ep->l2t); - return CPL_RET_BUF_DONE; - } - - /* - * We get 2 peer aborts from the HW. The first one must - * be ignored except for scribbling that we need one more. - */ - if (!test_and_set_bit(PEER_ABORT_IN_PROGRESS, &ep->com.flags)) { - return CPL_RET_BUF_DONE; - } - - spin_lock_irqsave(&ep->com.lock, flags); - pr_debug("%s ep %p state %u\n", __func__, ep, ep->com.state); - switch (ep->com.state) { - case CONNECTING: - break; - case MPA_REQ_WAIT: - stop_ep_timer(ep); - break; - case MPA_REQ_SENT: - stop_ep_timer(ep); - connect_reply_upcall(ep, -ECONNRESET); - break; - case MPA_REP_SENT: - ep->com.rpl_done = 1; - ep->com.rpl_err = -ECONNRESET; - pr_debug("waking up ep %p\n", ep); - wake_up(&ep->com.waitq); - break; - case MPA_REQ_RCVD: - - /* - * We're gonna mark this puppy DEAD, but keep - * the reference on it until the ULP accepts or - * rejects the CR. Also wake up anyone waiting - * in rdma connection migration (see iwch_accept_cr()). - */ - ep->com.rpl_done = 1; - ep->com.rpl_err = -ECONNRESET; - pr_debug("waking up ep %p\n", ep); - wake_up(&ep->com.waitq); - break; - case MORIBUND: - case CLOSING: - stop_ep_timer(ep); - /*FALLTHROUGH*/ - case FPDU_MODE: - if (ep->com.cm_id && ep->com.qp) { - attrs.next_state = IWCH_QP_STATE_ERROR; - ret = iwch_modify_qp(ep->com.qp->rhp, - ep->com.qp, IWCH_QP_ATTR_NEXT_STATE, - &attrs, 1); - if (ret) - pr_err("%s - qp <- error failed!\n", __func__); - } - peer_abort_upcall(ep); - break; - case ABORTING: - break; - case DEAD: - pr_debug("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__); - spin_unlock_irqrestore(&ep->com.lock, flags); - return CPL_RET_BUF_DONE; - default: - BUG_ON(1); - break; - } - dst_confirm(ep->dst); - if (ep->com.state != ABORTING) { - __state_set(&ep->com, DEAD); - release = 1; - } - spin_unlock_irqrestore(&ep->com.lock, flags); - - rpl_skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL); - if (!rpl_skb) { - pr_err("%s - cannot allocate skb!\n", __func__); - release = 1; - goto out; - } - rpl_skb->priority = CPL_PRIORITY_DATA; - rpl = skb_put(rpl_skb, sizeof(*rpl)); - rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_RPL)); - rpl->wr.wr_lo = htonl(V_WR_TID(ep->hwtid)); - OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid)); - rpl->cmd = CPL_ABORT_NO_RST; - iwch_cxgb3_ofld_send(ep->com.tdev, rpl_skb); -out: - if (release) - release_ep_resources(ep); - return CPL_RET_BUF_DONE; -} - -static int close_con_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_ep *ep = ctx; - struct iwch_qp_attributes attrs; - unsigned long flags; - int release = 0; - - pr_debug("%s ep %p\n", __func__, ep); - BUG_ON(!ep); - - /* The cm_id may be null if we failed to connect */ - spin_lock_irqsave(&ep->com.lock, flags); - switch (ep->com.state) { - case CLOSING: - __state_set(&ep->com, MORIBUND); - break; - case MORIBUND: - stop_ep_timer(ep); - if ((ep->com.cm_id) && (ep->com.qp)) { - attrs.next_state = IWCH_QP_STATE_IDLE; - iwch_modify_qp(ep->com.qp->rhp, - ep->com.qp, - IWCH_QP_ATTR_NEXT_STATE, - &attrs, 1); - } - close_complete_upcall(ep); - __state_set(&ep->com, DEAD); - release = 1; - break; - case ABORTING: - case DEAD: - break; - default: - BUG_ON(1); - break; - } - spin_unlock_irqrestore(&ep->com.lock, flags); - if (release) - release_ep_resources(ep); - return CPL_RET_BUF_DONE; -} - -/* - * T3A does 3 things when a TERM is received: - * 1) send up a CPL_RDMA_TERMINATE message with the TERM packet - * 2) generate an async event on the QP with the TERMINATE opcode - * 3) post a TERMINATE opcode cqe into the associated CQ. - * - * For (1), we save the message in the qp for later consumer consumption. - * For (2), we move the QP into TERMINATE, post a QP event and disconnect. - * For (3), we toss the CQE in cxio_poll_cq(). - * - * terminate() handles case (1)... - */ -static int terminate(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_ep *ep = ctx; - - if (state_read(&ep->com) != FPDU_MODE) - return CPL_RET_BUF_DONE; - - pr_debug("%s ep %p\n", __func__, ep); - skb_pull(skb, sizeof(struct cpl_rdma_terminate)); - pr_debug("%s saving %d bytes of term msg\n", __func__, skb->len); - skb_copy_from_linear_data(skb, ep->com.qp->attr.terminate_buffer, - skb->len); - ep->com.qp->attr.terminate_msg_len = skb->len; - ep->com.qp->attr.is_terminate_local = 0; - return CPL_RET_BUF_DONE; -} - -static int ec_status(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct cpl_rdma_ec_status *rep = cplhdr(skb); - struct iwch_ep *ep = ctx; - - pr_debug("%s ep %p tid %u status %d\n", __func__, ep, ep->hwtid, - rep->status); - if (rep->status) { - struct iwch_qp_attributes attrs; - - pr_err("%s BAD CLOSE - Aborting tid %u\n", - __func__, ep->hwtid); - stop_ep_timer(ep); - attrs.next_state = IWCH_QP_STATE_ERROR; - iwch_modify_qp(ep->com.qp->rhp, - ep->com.qp, IWCH_QP_ATTR_NEXT_STATE, - &attrs, 1); - abort_connection(ep, NULL, GFP_KERNEL); - } - return CPL_RET_BUF_DONE; -} - -static void ep_timeout(struct timer_list *t) -{ - struct iwch_ep *ep = from_timer(ep, t, timer); - struct iwch_qp_attributes attrs; - unsigned long flags; - int abort = 1; - - spin_lock_irqsave(&ep->com.lock, flags); - pr_debug("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid, - ep->com.state); - switch (ep->com.state) { - case MPA_REQ_SENT: - __state_set(&ep->com, ABORTING); - connect_reply_upcall(ep, -ETIMEDOUT); - break; - case MPA_REQ_WAIT: - __state_set(&ep->com, ABORTING); - break; - case CLOSING: - case MORIBUND: - if (ep->com.cm_id && ep->com.qp) { - attrs.next_state = IWCH_QP_STATE_ERROR; - iwch_modify_qp(ep->com.qp->rhp, - ep->com.qp, IWCH_QP_ATTR_NEXT_STATE, - &attrs, 1); - } - __state_set(&ep->com, ABORTING); - break; - default: - WARN(1, "%s unexpected state ep %p state %u\n", - __func__, ep, ep->com.state); - abort = 0; - } - spin_unlock_irqrestore(&ep->com.lock, flags); - if (abort) - abort_connection(ep, NULL, GFP_ATOMIC); - put_ep(&ep->com); -} - -int iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) -{ - struct iwch_ep *ep = to_ep(cm_id); - - pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid); - - if (state_read(&ep->com) == DEAD) { - put_ep(&ep->com); - return -ECONNRESET; - } - BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD); - if (mpa_rev == 0) - abort_connection(ep, NULL, GFP_KERNEL); - else { - send_mpa_reject(ep, pdata, pdata_len); - iwch_ep_disconnect(ep, 0, GFP_KERNEL); - } - put_ep(&ep->com); - return 0; -} - -int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) -{ - int err; - struct iwch_qp_attributes attrs; - enum iwch_qp_attr_mask mask; - struct iwch_ep *ep = to_ep(cm_id); - struct iwch_dev *h = to_iwch_dev(cm_id->device); - struct iwch_qp *qp = get_qhp(h, conn_param->qpn); - - pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid); - if (state_read(&ep->com) == DEAD) { - err = -ECONNRESET; - goto err; - } - - BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD); - BUG_ON(!qp); - - if ((conn_param->ord > qp->rhp->attr.max_rdma_read_qp_depth) || - (conn_param->ird > qp->rhp->attr.max_rdma_reads_per_qp)) { - abort_connection(ep, NULL, GFP_KERNEL); - err = -EINVAL; - goto err; - } - - cm_id->add_ref(cm_id); - ep->com.cm_id = cm_id; - ep->com.qp = qp; - - ep->ird = conn_param->ird; - ep->ord = conn_param->ord; - - if (peer2peer && ep->ird == 0) - ep->ird = 1; - - pr_debug("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord); - - /* bind QP to EP and move to RTS */ - attrs.mpa_attr = ep->mpa_attr; - attrs.max_ird = ep->ird; - attrs.max_ord = ep->ord; - attrs.llp_stream_handle = ep; - attrs.next_state = IWCH_QP_STATE_RTS; - - /* bind QP and TID with INIT_WR */ - mask = IWCH_QP_ATTR_NEXT_STATE | - IWCH_QP_ATTR_LLP_STREAM_HANDLE | - IWCH_QP_ATTR_MPA_ATTR | - IWCH_QP_ATTR_MAX_IRD | - IWCH_QP_ATTR_MAX_ORD; - - err = iwch_modify_qp(ep->com.qp->rhp, - ep->com.qp, mask, &attrs, 1); - if (err) - goto err1; - - /* if needed, wait for wr_ack */ - if (iwch_rqes_posted(qp)) { - wait_event(ep->com.waitq, ep->com.rpl_done); - err = ep->com.rpl_err; - if (err) - goto err1; - } - - err = send_mpa_reply(ep, conn_param->private_data, - conn_param->private_data_len); - if (err) - goto err1; - - - state_set(&ep->com, FPDU_MODE); - established_upcall(ep); - put_ep(&ep->com); - return 0; -err1: - ep->com.cm_id = NULL; - ep->com.qp = NULL; - cm_id->rem_ref(cm_id); -err: - put_ep(&ep->com); - return err; -} - -static int is_loopback_dst(struct iw_cm_id *cm_id) -{ - struct net_device *dev; - struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr; - - dev = ip_dev_find(&init_net, raddr->sin_addr.s_addr); - if (!dev) - return 0; - dev_put(dev); - return 1; -} - -int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) -{ - struct iwch_dev *h = to_iwch_dev(cm_id->device); - struct iwch_ep *ep; - struct rtable *rt; - int err = 0; - struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->m_local_addr; - struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr; - - if (cm_id->m_remote_addr.ss_family != PF_INET) { - err = -ENOSYS; - goto out; - } - - if (is_loopback_dst(cm_id)) { - err = -ENOSYS; - goto out; - } - - ep = alloc_ep(sizeof(*ep), GFP_KERNEL); - if (!ep) { - pr_err("%s - cannot alloc ep\n", __func__); - err = -ENOMEM; - goto out; - } - timer_setup(&ep->timer, ep_timeout, 0); - ep->plen = conn_param->private_data_len; - if (ep->plen) - memcpy(ep->mpa_pkt + sizeof(struct mpa_message), - conn_param->private_data, ep->plen); - ep->ird = conn_param->ird; - ep->ord = conn_param->ord; - - if (peer2peer && ep->ord == 0) - ep->ord = 1; - - ep->com.tdev = h->rdev.t3cdev_p; - - cm_id->add_ref(cm_id); - ep->com.cm_id = cm_id; - ep->com.qp = get_qhp(h, conn_param->qpn); - BUG_ON(!ep->com.qp); - pr_debug("%s qpn 0x%x qp %p cm_id %p\n", __func__, conn_param->qpn, - ep->com.qp, cm_id); - - /* - * Allocate an active TID to initiate a TCP connection. - */ - ep->atid = cxgb3_alloc_atid(h->rdev.t3cdev_p, &t3c_client, ep); - if (ep->atid == -1) { - pr_err("%s - cannot alloc atid\n", __func__); - err = -ENOMEM; - goto fail2; - } - - /* find a route */ - rt = find_route(h->rdev.t3cdev_p, laddr->sin_addr.s_addr, - raddr->sin_addr.s_addr, laddr->sin_port, - raddr->sin_port, IPTOS_LOWDELAY); - if (!rt) { - pr_err("%s - cannot find route\n", __func__); - err = -EHOSTUNREACH; - goto fail3; - } - ep->dst = &rt->dst; - ep->l2t = t3_l2t_get(ep->com.tdev, ep->dst, NULL, - &raddr->sin_addr.s_addr); - if (!ep->l2t) { - pr_err("%s - cannot alloc l2e\n", __func__); - err = -ENOMEM; - goto fail4; - } - - state_set(&ep->com, CONNECTING); - ep->tos = IPTOS_LOWDELAY; - memcpy(&ep->com.local_addr, &cm_id->m_local_addr, - sizeof(ep->com.local_addr)); - memcpy(&ep->com.remote_addr, &cm_id->m_remote_addr, - sizeof(ep->com.remote_addr)); - - /* send connect request to rnic */ - err = send_connect(ep); - if (!err) - goto out; - - l2t_release(h->rdev.t3cdev_p, ep->l2t); -fail4: - dst_release(ep->dst); -fail3: - cxgb3_free_atid(ep->com.tdev, ep->atid); -fail2: - cm_id->rem_ref(cm_id); - put_ep(&ep->com); -out: - return err; -} - -int iwch_create_listen(struct iw_cm_id *cm_id, int backlog) -{ - int err = 0; - struct iwch_dev *h = to_iwch_dev(cm_id->device); - struct iwch_listen_ep *ep; - - - might_sleep(); - - if (cm_id->m_local_addr.ss_family != PF_INET) { - err = -ENOSYS; - goto fail1; - } - - ep = alloc_ep(sizeof(*ep), GFP_KERNEL); - if (!ep) { - pr_err("%s - cannot alloc ep\n", __func__); - err = -ENOMEM; - goto fail1; - } - pr_debug("%s ep %p\n", __func__, ep); - ep->com.tdev = h->rdev.t3cdev_p; - cm_id->add_ref(cm_id); - ep->com.cm_id = cm_id; - ep->backlog = backlog; - memcpy(&ep->com.local_addr, &cm_id->m_local_addr, - sizeof(ep->com.local_addr)); - - /* - * Allocate a server TID. - */ - ep->stid = cxgb3_alloc_stid(h->rdev.t3cdev_p, &t3c_client, ep); - if (ep->stid == -1) { - pr_err("%s - cannot alloc atid\n", __func__); - err = -ENOMEM; - goto fail2; - } - - state_set(&ep->com, LISTEN); - err = listen_start(ep); - if (err) - goto fail3; - - /* wait for pass_open_rpl */ - wait_event(ep->com.waitq, ep->com.rpl_done); - err = ep->com.rpl_err; - if (!err) { - cm_id->provider_data = ep; - goto out; - } -fail3: - cxgb3_free_stid(ep->com.tdev, ep->stid); -fail2: - cm_id->rem_ref(cm_id); - put_ep(&ep->com); -fail1: -out: - return err; -} - -int iwch_destroy_listen(struct iw_cm_id *cm_id) -{ - int err; - struct iwch_listen_ep *ep = to_listen_ep(cm_id); - - pr_debug("%s ep %p\n", __func__, ep); - - might_sleep(); - state_set(&ep->com, DEAD); - ep->com.rpl_done = 0; - ep->com.rpl_err = 0; - err = listen_stop(ep); - if (err) - goto done; - wait_event(ep->com.waitq, ep->com.rpl_done); - cxgb3_free_stid(ep->com.tdev, ep->stid); -done: - err = ep->com.rpl_err; - cm_id->rem_ref(cm_id); - put_ep(&ep->com); - return err; -} - -int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp) -{ - int ret=0; - unsigned long flags; - int close = 0; - int fatal = 0; - struct t3cdev *tdev; - struct cxio_rdev *rdev; - - spin_lock_irqsave(&ep->com.lock, flags); - - pr_debug("%s ep %p state %s, abrupt %d\n", __func__, ep, - states[ep->com.state], abrupt); - - tdev = (struct t3cdev *)ep->com.tdev; - rdev = (struct cxio_rdev *)tdev->ulp; - if (cxio_fatal_error(rdev)) { - fatal = 1; - close_complete_upcall(ep); - ep->com.state = DEAD; - } - switch (ep->com.state) { - case MPA_REQ_WAIT: - case MPA_REQ_SENT: - case MPA_REQ_RCVD: - case MPA_REP_SENT: - case FPDU_MODE: - close = 1; - if (abrupt) - ep->com.state = ABORTING; - else { - ep->com.state = CLOSING; - start_ep_timer(ep); - } - set_bit(CLOSE_SENT, &ep->com.flags); - break; - case CLOSING: - if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) { - close = 1; - if (abrupt) { - stop_ep_timer(ep); - ep->com.state = ABORTING; - } else - ep->com.state = MORIBUND; - } - break; - case MORIBUND: - case ABORTING: - case DEAD: - pr_debug("%s ignoring disconnect ep %p state %u\n", - __func__, ep, ep->com.state); - break; - default: - BUG(); - break; - } - - spin_unlock_irqrestore(&ep->com.lock, flags); - if (close) { - if (abrupt) - ret = send_abort(ep, NULL, gfp); - else - ret = send_halfclose(ep, gfp); - if (ret) - fatal = 1; - } - if (fatal) - release_ep_resources(ep); - return ret; -} - -int iwch_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new, - struct l2t_entry *l2t) -{ - struct iwch_ep *ep = ctx; - - if (ep->dst != old) - return 0; - - pr_debug("%s ep %p redirect to dst %p l2t %p\n", __func__, ep, new, - l2t); - dst_hold(new); - l2t_release(ep->com.tdev, ep->l2t); - ep->l2t = l2t; - dst_release(old); - ep->dst = new; - return 1; -} - -/* - * All the CM events are handled on a work queue to have a safe context. - * These are the real handlers that are called from the work queue. - */ -static const cxgb3_cpl_handler_func work_handlers[NUM_CPL_CMDS] = { - [CPL_ACT_ESTABLISH] = act_establish, - [CPL_ACT_OPEN_RPL] = act_open_rpl, - [CPL_RX_DATA] = rx_data, - [CPL_TX_DMA_ACK] = tx_ack, - [CPL_ABORT_RPL_RSS] = abort_rpl, - [CPL_ABORT_RPL] = abort_rpl, - [CPL_PASS_OPEN_RPL] = pass_open_rpl, - [CPL_CLOSE_LISTSRV_RPL] = close_listsrv_rpl, - [CPL_PASS_ACCEPT_REQ] = pass_accept_req, - [CPL_PASS_ESTABLISH] = pass_establish, - [CPL_PEER_CLOSE] = peer_close, - [CPL_ABORT_REQ_RSS] = peer_abort, - [CPL_CLOSE_CON_RPL] = close_con_rpl, - [CPL_RDMA_TERMINATE] = terminate, - [CPL_RDMA_EC_STATUS] = ec_status, -}; - -static void process_work(struct work_struct *work) -{ - struct sk_buff *skb = NULL; - void *ep; - struct t3cdev *tdev; - int ret; - - while ((skb = skb_dequeue(&rxq))) { - ep = *((void **) (skb->cb)); - tdev = *((struct t3cdev **) (skb->cb + sizeof(void *))); - ret = work_handlers[G_OPCODE(ntohl((__force __be32)skb->csum))](tdev, skb, ep); - if (ret & CPL_RET_BUF_DONE) - kfree_skb(skb); - - /* - * ep was referenced in sched(), and is freed here. - */ - put_ep((struct iwch_ep_common *)ep); - } -} - -static DECLARE_WORK(skb_work, process_work); - -static int sched(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_ep_common *epc = ctx; - - get_ep(epc); - - /* - * Save ctx and tdev in the skb->cb area. - */ - *((void **) skb->cb) = ctx; - *((struct t3cdev **) (skb->cb + sizeof(void *))) = tdev; - - /* - * Queue the skb and schedule the worker thread. - */ - skb_queue_tail(&rxq, skb); - queue_work(workq, &skb_work); - return 0; -} - -static int set_tcb_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct cpl_set_tcb_rpl *rpl = cplhdr(skb); - - if (rpl->status != CPL_ERR_NONE) { - pr_err("Unexpected SET_TCB_RPL status %u for tid %u\n", - rpl->status, GET_TID(rpl)); - } - return CPL_RET_BUF_DONE; -} - -/* - * All upcalls from the T3 Core go to sched() to schedule the - * processing on a work queue. - */ -cxgb3_cpl_handler_func t3c_handlers[NUM_CPL_CMDS] = { - [CPL_ACT_ESTABLISH] = sched, - [CPL_ACT_OPEN_RPL] = sched, - [CPL_RX_DATA] = sched, - [CPL_TX_DMA_ACK] = sched, - [CPL_ABORT_RPL_RSS] = sched, - [CPL_ABORT_RPL] = sched, - [CPL_PASS_OPEN_RPL] = sched, - [CPL_CLOSE_LISTSRV_RPL] = sched, - [CPL_PASS_ACCEPT_REQ] = sched, - [CPL_PASS_ESTABLISH] = sched, - [CPL_PEER_CLOSE] = sched, - [CPL_CLOSE_CON_RPL] = sched, - [CPL_ABORT_REQ_RSS] = sched, - [CPL_RDMA_TERMINATE] = sched, - [CPL_RDMA_EC_STATUS] = sched, - [CPL_SET_TCB_RPL] = set_tcb_rpl, -}; - -int __init iwch_cm_init(void) -{ - skb_queue_head_init(&rxq); - - workq = alloc_ordered_workqueue("iw_cxgb3", WQ_MEM_RECLAIM); - if (!workq) - return -ENOMEM; - - return 0; -} - -void __exit iwch_cm_term(void) -{ - flush_workqueue(workq); - destroy_workqueue(workq); -} diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.h b/drivers/infiniband/hw/cxgb3/iwch_cm.h deleted file mode 100644 index cc7fe644d260..000000000000 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.h +++ /dev/null @@ -1,233 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef _IWCH_CM_H_ -#define _IWCH_CM_H_ - -#include -#include -#include -#include - -#include -#include - -#include "cxgb3_offload.h" -#include "iwch_provider.h" - -#define MPA_KEY_REQ "MPA ID Req Frame" -#define MPA_KEY_REP "MPA ID Rep Frame" - -#define MPA_MAX_PRIVATE_DATA 256 -#define MPA_REV 0 /* XXX - amso1100 uses rev 0 ! */ -#define MPA_REJECT 0x20 -#define MPA_CRC 0x40 -#define MPA_MARKERS 0x80 -#define MPA_FLAGS_MASK 0xE0 - -#define put_ep(ep) { \ - pr_debug("put_ep (via %s:%u) ep %p refcnt %d\n", \ - __func__, __LINE__, ep, kref_read(&((ep)->kref))); \ - WARN_ON(kref_read(&((ep)->kref)) < 1); \ - kref_put(&((ep)->kref), __free_ep); \ -} - -#define get_ep(ep) { \ - pr_debug("get_ep (via %s:%u) ep %p, refcnt %d\n", \ - __func__, __LINE__, ep, kref_read(&((ep)->kref))); \ - kref_get(&((ep)->kref)); \ -} - -struct mpa_message { - u8 key[16]; - u8 flags; - u8 revision; - __be16 private_data_size; - u8 private_data[0]; -}; - -struct terminate_message { - u8 layer_etype; - u8 ecode; - __be16 hdrct_rsvd; - u8 len_hdrs[0]; -}; - -#define TERM_MAX_LENGTH (sizeof(struct terminate_message) + 2 + 18 + 28) - -enum iwch_layers_types { - LAYER_RDMAP = 0x00, - LAYER_DDP = 0x10, - LAYER_MPA = 0x20, - RDMAP_LOCAL_CATA = 0x00, - RDMAP_REMOTE_PROT = 0x01, - RDMAP_REMOTE_OP = 0x02, - DDP_LOCAL_CATA = 0x00, - DDP_TAGGED_ERR = 0x01, - DDP_UNTAGGED_ERR = 0x02, - DDP_LLP = 0x03 -}; - -enum iwch_rdma_ecodes { - RDMAP_INV_STAG = 0x00, - RDMAP_BASE_BOUNDS = 0x01, - RDMAP_ACC_VIOL = 0x02, - RDMAP_STAG_NOT_ASSOC = 0x03, - RDMAP_TO_WRAP = 0x04, - RDMAP_INV_VERS = 0x05, - RDMAP_INV_OPCODE = 0x06, - RDMAP_STREAM_CATA = 0x07, - RDMAP_GLOBAL_CATA = 0x08, - RDMAP_CANT_INV_STAG = 0x09, - RDMAP_UNSPECIFIED = 0xff -}; - -enum iwch_ddp_ecodes { - DDPT_INV_STAG = 0x00, - DDPT_BASE_BOUNDS = 0x01, - DDPT_STAG_NOT_ASSOC = 0x02, - DDPT_TO_WRAP = 0x03, - DDPT_INV_VERS = 0x04, - DDPU_INV_QN = 0x01, - DDPU_INV_MSN_NOBUF = 0x02, - DDPU_INV_MSN_RANGE = 0x03, - DDPU_INV_MO = 0x04, - DDPU_MSG_TOOBIG = 0x05, - DDPU_INV_VERS = 0x06 -}; - -enum iwch_mpa_ecodes { - MPA_CRC_ERR = 0x02, - MPA_MARKER_ERR = 0x03 -}; - -enum iwch_ep_state { - IDLE = 0, - LISTEN, - CONNECTING, - MPA_REQ_WAIT, - MPA_REQ_SENT, - MPA_REQ_RCVD, - MPA_REP_SENT, - FPDU_MODE, - ABORTING, - CLOSING, - MORIBUND, - DEAD, -}; - -enum iwch_ep_flags { - PEER_ABORT_IN_PROGRESS = 0, - ABORT_REQ_IN_PROGRESS = 1, - RELEASE_RESOURCES = 2, - CLOSE_SENT = 3, -}; - -struct iwch_ep_common { - struct iw_cm_id *cm_id; - struct iwch_qp *qp; - struct t3cdev *tdev; - enum iwch_ep_state state; - struct kref kref; - spinlock_t lock; - struct sockaddr_in local_addr; - struct sockaddr_in remote_addr; - wait_queue_head_t waitq; - int rpl_done; - int rpl_err; - unsigned long flags; -}; - -struct iwch_listen_ep { - struct iwch_ep_common com; - unsigned int stid; - int backlog; -}; - -struct iwch_ep { - struct iwch_ep_common com; - struct iwch_ep *parent_ep; - struct timer_list timer; - unsigned int atid; - u32 hwtid; - u32 snd_seq; - u32 rcv_seq; - struct l2t_entry *l2t; - struct dst_entry *dst; - struct sk_buff *mpa_skb; - struct iwch_mpa_attributes mpa_attr; - unsigned int mpa_pkt_len; - u8 mpa_pkt[sizeof(struct mpa_message) + MPA_MAX_PRIVATE_DATA]; - u8 tos; - u16 emss; - u16 plen; - u32 ird; - u32 ord; -}; - -static inline struct iwch_ep *to_ep(struct iw_cm_id *cm_id) -{ - return cm_id->provider_data; -} - -static inline struct iwch_listen_ep *to_listen_ep(struct iw_cm_id *cm_id) -{ - return cm_id->provider_data; -} - -static inline int compute_wscale(int win) -{ - int wscale = 0; - - while (wscale < 14 && (65535<wq : NULL; - struct t3_cqe cqe; - u32 credit = 0; - u8 cqe_flushed; - u64 cookie; - int ret = 1; - - ret = cxio_poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, - &credit); - if (t3a_device(chp->rhp) && credit) { - pr_debug("%s updating %d cq credits on id %d\n", __func__, - credit, chp->cq.cqid); - cxio_hal_cq_op(&rhp->rdev, &chp->cq, CQ_CREDIT_UPDATE, credit); - } - - if (ret) { - ret = -EAGAIN; - goto out; - } - ret = 1; - - wc->wr_id = cookie; - wc->qp = qhp ? &qhp->ibqp : NULL; - wc->vendor_err = CQE_STATUS(cqe); - wc->wc_flags = 0; - - pr_debug("%s qpid 0x%x type %d opcode %d status 0x%x wrid hi 0x%x lo 0x%x cookie 0x%llx\n", - __func__, - CQE_QPID(cqe), CQE_TYPE(cqe), - CQE_OPCODE(cqe), CQE_STATUS(cqe), CQE_WRID_HI(cqe), - CQE_WRID_LOW(cqe), (unsigned long long)cookie); - - if (CQE_TYPE(cqe) == 0) { - if (!CQE_STATUS(cqe)) - wc->byte_len = CQE_LEN(cqe); - else - wc->byte_len = 0; - wc->opcode = IB_WC_RECV; - if (CQE_OPCODE(cqe) == T3_SEND_WITH_INV || - CQE_OPCODE(cqe) == T3_SEND_WITH_SE_INV) { - wc->ex.invalidate_rkey = CQE_WRID_STAG(cqe); - wc->wc_flags |= IB_WC_WITH_INVALIDATE; - } - } else { - switch (CQE_OPCODE(cqe)) { - case T3_RDMA_WRITE: - wc->opcode = IB_WC_RDMA_WRITE; - break; - case T3_READ_REQ: - wc->opcode = IB_WC_RDMA_READ; - wc->byte_len = CQE_LEN(cqe); - break; - case T3_SEND: - case T3_SEND_WITH_SE: - case T3_SEND_WITH_INV: - case T3_SEND_WITH_SE_INV: - wc->opcode = IB_WC_SEND; - break; - case T3_LOCAL_INV: - wc->opcode = IB_WC_LOCAL_INV; - break; - case T3_FAST_REGISTER: - wc->opcode = IB_WC_REG_MR; - break; - default: - pr_err("Unexpected opcode %d in the CQE received for QPID=0x%0x\n", - CQE_OPCODE(cqe), CQE_QPID(cqe)); - ret = -EINVAL; - goto out; - } - } - - if (cqe_flushed) - wc->status = IB_WC_WR_FLUSH_ERR; - else { - - switch (CQE_STATUS(cqe)) { - case TPT_ERR_SUCCESS: - wc->status = IB_WC_SUCCESS; - break; - case TPT_ERR_STAG: - wc->status = IB_WC_LOC_ACCESS_ERR; - break; - case TPT_ERR_PDID: - wc->status = IB_WC_LOC_PROT_ERR; - break; - case TPT_ERR_QPID: - case TPT_ERR_ACCESS: - wc->status = IB_WC_LOC_ACCESS_ERR; - break; - case TPT_ERR_WRAP: - wc->status = IB_WC_GENERAL_ERR; - break; - case TPT_ERR_BOUND: - wc->status = IB_WC_LOC_LEN_ERR; - break; - case TPT_ERR_INVALIDATE_SHARED_MR: - case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND: - wc->status = IB_WC_MW_BIND_ERR; - break; - case TPT_ERR_CRC: - case TPT_ERR_MARKER: - case TPT_ERR_PDU_LEN_ERR: - case TPT_ERR_OUT_OF_RQE: - case TPT_ERR_DDP_VERSION: - case TPT_ERR_RDMA_VERSION: - case TPT_ERR_DDP_QUEUE_NUM: - case TPT_ERR_MSN: - case TPT_ERR_TBIT: - case TPT_ERR_MO: - case TPT_ERR_MSN_RANGE: - case TPT_ERR_IRD_OVERFLOW: - case TPT_ERR_OPCODE: - wc->status = IB_WC_FATAL_ERR; - break; - case TPT_ERR_SWFLUSH: - wc->status = IB_WC_WR_FLUSH_ERR; - break; - default: - pr_err("Unexpected cqe_status 0x%x for QPID=0x%0x\n", - CQE_STATUS(cqe), CQE_QPID(cqe)); - ret = -EINVAL; - } - } -out: - return ret; -} - -/* - * Get one cq entry from cxio and map it to openib. - * - * Returns: - * 0 EMPTY; - * 1 cqe returned - * -EAGAIN caller must try again - * any other -errno fatal error - */ -static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp, - struct ib_wc *wc) -{ - struct iwch_qp *qhp; - struct t3_cqe *rd_cqe; - int ret; - - rd_cqe = cxio_next_cqe(&chp->cq); - - if (!rd_cqe) - return 0; - - qhp = get_qhp(rhp, CQE_QPID(*rd_cqe)); - if (qhp) { - spin_lock(&qhp->lock); - ret = __iwch_poll_cq_one(rhp, chp, qhp, wc); - spin_unlock(&qhp->lock); - } else { - ret = __iwch_poll_cq_one(rhp, chp, NULL, wc); - } - return ret; -} - -int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) -{ - struct iwch_dev *rhp; - struct iwch_cq *chp; - unsigned long flags; - int npolled; - int err = 0; - - chp = to_iwch_cq(ibcq); - rhp = chp->rhp; - - spin_lock_irqsave(&chp->lock, flags); - for (npolled = 0; npolled < num_entries; ++npolled) { - - /* - * Because T3 can post CQEs that are _not_ associated - * with a WR, we might have to poll again after removing - * one of these. - */ - do { - err = iwch_poll_cq_one(rhp, chp, wc + npolled); - } while (err == -EAGAIN); - if (err <= 0) - break; - } - spin_unlock_irqrestore(&chp->lock, flags); - - if (err < 0) - return err; - else { - return npolled; - } -} diff --git a/drivers/infiniband/hw/cxgb3/iwch_ev.c b/drivers/infiniband/hw/cxgb3/iwch_ev.c deleted file mode 100644 index 9d356c1301c7..000000000000 --- a/drivers/infiniband/hw/cxgb3/iwch_ev.c +++ /dev/null @@ -1,232 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include -#include -#include -#include "iwch_provider.h" -#include "iwch.h" -#include "iwch_cm.h" -#include "cxio_hal.h" -#include "cxio_wr.h" - -static void post_qp_event(struct iwch_dev *rnicp, struct iwch_cq *chp, - struct respQ_msg_t *rsp_msg, - enum ib_event_type ib_event, - int send_term) -{ - struct ib_event event; - struct iwch_qp_attributes attrs; - struct iwch_qp *qhp; - unsigned long flag; - - xa_lock(&rnicp->qps); - qhp = xa_load(&rnicp->qps, CQE_QPID(rsp_msg->cqe)); - - if (!qhp) { - pr_err("%s unaffiliated error 0x%x qpid 0x%x\n", - __func__, CQE_STATUS(rsp_msg->cqe), - CQE_QPID(rsp_msg->cqe)); - xa_unlock(&rnicp->qps); - return; - } - - if ((qhp->attr.state == IWCH_QP_STATE_ERROR) || - (qhp->attr.state == IWCH_QP_STATE_TERMINATE)) { - pr_debug("%s AE received after RTS - qp state %d qpid 0x%x status 0x%x\n", - __func__, - qhp->attr.state, qhp->wq.qpid, - CQE_STATUS(rsp_msg->cqe)); - xa_unlock(&rnicp->qps); - return; - } - - pr_err("%s - AE qpid 0x%x opcode %d status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x\n", - __func__, - CQE_QPID(rsp_msg->cqe), CQE_OPCODE(rsp_msg->cqe), - CQE_STATUS(rsp_msg->cqe), CQE_TYPE(rsp_msg->cqe), - CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe)); - - atomic_inc(&qhp->refcnt); - xa_unlock(&rnicp->qps); - - if (qhp->attr.state == IWCH_QP_STATE_RTS) { - attrs.next_state = IWCH_QP_STATE_TERMINATE; - iwch_modify_qp(qhp->rhp, qhp, IWCH_QP_ATTR_NEXT_STATE, - &attrs, 1); - if (send_term) - iwch_post_terminate(qhp, rsp_msg); - } - - event.event = ib_event; - event.device = chp->ibcq.device; - if (ib_event == IB_EVENT_CQ_ERR) - event.element.cq = &chp->ibcq; - else - event.element.qp = &qhp->ibqp; - - if (qhp->ibqp.event_handler) - (*qhp->ibqp.event_handler)(&event, qhp->ibqp.qp_context); - - spin_lock_irqsave(&chp->comp_handler_lock, flag); - (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); - spin_unlock_irqrestore(&chp->comp_handler_lock, flag); - - if (atomic_dec_and_test(&qhp->refcnt)) - wake_up(&qhp->wait); -} - -void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb) -{ - struct iwch_dev *rnicp; - struct respQ_msg_t *rsp_msg = (struct respQ_msg_t *) skb->data; - struct iwch_cq *chp; - struct iwch_qp *qhp; - u32 cqid = RSPQ_CQID(rsp_msg); - unsigned long flag; - - rnicp = (struct iwch_dev *) rdev_p->ulp; - xa_lock(&rnicp->qps); - chp = get_chp(rnicp, cqid); - qhp = xa_load(&rnicp->qps, CQE_QPID(rsp_msg->cqe)); - if (!chp || !qhp) { - pr_err("BAD AE cqid 0x%x qpid 0x%x opcode %d status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x\n", - cqid, CQE_QPID(rsp_msg->cqe), - CQE_OPCODE(rsp_msg->cqe), CQE_STATUS(rsp_msg->cqe), - CQE_TYPE(rsp_msg->cqe), CQE_WRID_HI(rsp_msg->cqe), - CQE_WRID_LOW(rsp_msg->cqe)); - xa_unlock(&rnicp->qps); - goto out; - } - iwch_qp_add_ref(&qhp->ibqp); - atomic_inc(&chp->refcnt); - xa_unlock(&rnicp->qps); - - /* - * 1) completion of our sending a TERMINATE. - * 2) incoming TERMINATE message. - */ - if ((CQE_OPCODE(rsp_msg->cqe) == T3_TERMINATE) && - (CQE_STATUS(rsp_msg->cqe) == 0)) { - if (SQ_TYPE(rsp_msg->cqe)) { - pr_debug("%s QPID 0x%x ep %p disconnecting\n", - __func__, qhp->wq.qpid, qhp->ep); - iwch_ep_disconnect(qhp->ep, 0, GFP_ATOMIC); - } else { - pr_debug("%s post REQ_ERR AE QPID 0x%x\n", __func__, - qhp->wq.qpid); - post_qp_event(rnicp, chp, rsp_msg, - IB_EVENT_QP_REQ_ERR, 0); - iwch_ep_disconnect(qhp->ep, 0, GFP_ATOMIC); - } - goto done; - } - - /* Bad incoming Read request */ - if (SQ_TYPE(rsp_msg->cqe) && - (CQE_OPCODE(rsp_msg->cqe) == T3_READ_RESP)) { - post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_REQ_ERR, 1); - goto done; - } - - /* Bad incoming write */ - if (RQ_TYPE(rsp_msg->cqe) && - (CQE_OPCODE(rsp_msg->cqe) == T3_RDMA_WRITE)) { - post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_REQ_ERR, 1); - goto done; - } - - switch (CQE_STATUS(rsp_msg->cqe)) { - - /* Completion Events */ - case TPT_ERR_SUCCESS: - - /* - * Confirm the destination entry if this is a RECV completion. - */ - if (qhp->ep && SQ_TYPE(rsp_msg->cqe)) - dst_confirm(qhp->ep->dst); - spin_lock_irqsave(&chp->comp_handler_lock, flag); - (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); - spin_unlock_irqrestore(&chp->comp_handler_lock, flag); - break; - - case TPT_ERR_STAG: - case TPT_ERR_PDID: - case TPT_ERR_QPID: - case TPT_ERR_ACCESS: - case TPT_ERR_WRAP: - case TPT_ERR_BOUND: - case TPT_ERR_INVALIDATE_SHARED_MR: - case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND: - post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_ACCESS_ERR, 1); - break; - - /* Device Fatal Errors */ - case TPT_ERR_ECC: - case TPT_ERR_ECC_PSTAG: - case TPT_ERR_INTERNAL_ERR: - post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_DEVICE_FATAL, 1); - break; - - /* QP Fatal Errors */ - case TPT_ERR_OUT_OF_RQE: - case TPT_ERR_PBL_ADDR_BOUND: - case TPT_ERR_CRC: - case TPT_ERR_MARKER: - case TPT_ERR_PDU_LEN_ERR: - case TPT_ERR_DDP_VERSION: - case TPT_ERR_RDMA_VERSION: - case TPT_ERR_OPCODE: - case TPT_ERR_DDP_QUEUE_NUM: - case TPT_ERR_MSN: - case TPT_ERR_TBIT: - case TPT_ERR_MO: - case TPT_ERR_MSN_GAP: - case TPT_ERR_MSN_RANGE: - case TPT_ERR_RQE_ADDR_BOUND: - case TPT_ERR_IRD_OVERFLOW: - post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_FATAL, 1); - break; - - default: - pr_err("Unknown T3 status 0x%x QPID 0x%x\n", - CQE_STATUS(rsp_msg->cqe), qhp->wq.qpid); - post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_FATAL, 1); - break; - } -done: - if (atomic_dec_and_test(&chp->refcnt)) - wake_up(&chp->wait); - iwch_qp_rem_ref(&qhp->ibqp); -out: - dev_kfree_skb_irq(skb); -} diff --git a/drivers/infiniband/hw/cxgb3/iwch_mem.c b/drivers/infiniband/hw/cxgb3/iwch_mem.c deleted file mode 100644 index ce0f2741821d..000000000000 --- a/drivers/infiniband/hw/cxgb3/iwch_mem.c +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include -#include - -#include -#include - -#include "cxio_hal.h" -#include "cxio_resource.h" -#include "iwch.h" -#include "iwch_provider.h" - -static int iwch_finish_mem_reg(struct iwch_mr *mhp, u32 stag) -{ - u32 mmid; - - mhp->attr.state = 1; - mhp->attr.stag = stag; - mmid = stag >> 8; - mhp->ibmr.rkey = mhp->ibmr.lkey = stag; - pr_debug("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp); - return xa_insert_irq(&mhp->rhp->mrs, mmid, mhp, GFP_KERNEL); -} - -int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php, - struct iwch_mr *mhp, int shift) -{ - u32 stag; - int ret; - - if (cxio_register_phys_mem(&rhp->rdev, - &stag, mhp->attr.pdid, - mhp->attr.perms, - mhp->attr.zbva, - mhp->attr.va_fbo, - mhp->attr.len, - shift - 12, - mhp->attr.pbl_size, mhp->attr.pbl_addr)) - return -ENOMEM; - - ret = iwch_finish_mem_reg(mhp, stag); - if (ret) - cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, - mhp->attr.pbl_addr); - return ret; -} - -int iwch_alloc_pbl(struct iwch_mr *mhp, int npages) -{ - mhp->attr.pbl_addr = cxio_hal_pblpool_alloc(&mhp->rhp->rdev, - npages << 3); - - if (!mhp->attr.pbl_addr) - return -ENOMEM; - - mhp->attr.pbl_size = npages; - - return 0; -} - -void iwch_free_pbl(struct iwch_mr *mhp) -{ - cxio_hal_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr, - mhp->attr.pbl_size << 3); -} - -int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset) -{ - return cxio_write_pbl(&mhp->rhp->rdev, pages, - mhp->attr.pbl_addr + (offset << 3), npages); -} diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c deleted file mode 100644 index dcf02ec02810..000000000000 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ /dev/null @@ -1,1321 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include "cxio_hal.h" -#include "iwch.h" -#include "iwch_provider.h" -#include "iwch_cm.h" -#include -#include "common.h" - -static void iwch_dealloc_ucontext(struct ib_ucontext *context) -{ - struct iwch_dev *rhp = to_iwch_dev(context->device); - struct iwch_ucontext *ucontext = to_iwch_ucontext(context); - struct iwch_mm_entry *mm, *tmp; - - pr_debug("%s context %p\n", __func__, context); - list_for_each_entry_safe(mm, tmp, &ucontext->mmaps, entry) - kfree(mm); - cxio_release_ucontext(&rhp->rdev, &ucontext->uctx); -} - -static int iwch_alloc_ucontext(struct ib_ucontext *ucontext, - struct ib_udata *udata) -{ - struct ib_device *ibdev = ucontext->device; - struct iwch_ucontext *context = to_iwch_ucontext(ucontext); - struct iwch_dev *rhp = to_iwch_dev(ibdev); - - pr_debug("%s ibdev %p\n", __func__, ibdev); - cxio_init_ucontext(&rhp->rdev, &context->uctx); - INIT_LIST_HEAD(&context->mmaps); - spin_lock_init(&context->mmap_lock); - return 0; -} - -static void iwch_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) -{ - struct iwch_cq *chp; - - pr_debug("%s ib_cq %p\n", __func__, ib_cq); - chp = to_iwch_cq(ib_cq); - - xa_erase_irq(&chp->rhp->cqs, chp->cq.cqid); - atomic_dec(&chp->refcnt); - wait_event(chp->wait, !atomic_read(&chp->refcnt)); - - cxio_destroy_cq(&chp->rhp->rdev, &chp->cq); -} - -static int iwch_create_cq(struct ib_cq *ibcq, - const struct ib_cq_init_attr *attr, - struct ib_udata *udata) -{ - struct ib_device *ibdev = ibcq->device; - int entries = attr->cqe; - struct iwch_dev *rhp = to_iwch_dev(ibcq->device); - struct iwch_cq *chp = to_iwch_cq(ibcq); - struct iwch_create_cq_resp uresp; - struct iwch_create_cq_req ureq; - static int warned; - size_t resplen; - - pr_debug("%s ib_dev %p entries %d\n", __func__, ibdev, entries); - if (attr->flags) - return -EINVAL; - - if (udata) { - if (!t3a_device(rhp)) { - if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) - return -EFAULT; - - chp->user_rptr_addr = (u32 __user *)(unsigned long)ureq.user_rptr_addr; - } - } - - if (t3a_device(rhp)) { - - /* - * T3A: Add some fluff to handle extra CQEs inserted - * for various errors. - * Additional CQE possibilities: - * TERMINATE, - * incoming RDMA WRITE Failures - * incoming RDMA READ REQUEST FAILUREs - * NOTE: We cannot ensure the CQ won't overflow. - */ - entries += 16; - } - entries = roundup_pow_of_two(entries); - chp->cq.size_log2 = ilog2(entries); - - if (cxio_create_cq(&rhp->rdev, &chp->cq, !udata)) - return -ENOMEM; - - chp->rhp = rhp; - chp->ibcq.cqe = 1 << chp->cq.size_log2; - spin_lock_init(&chp->lock); - spin_lock_init(&chp->comp_handler_lock); - atomic_set(&chp->refcnt, 1); - init_waitqueue_head(&chp->wait); - if (xa_store_irq(&rhp->cqs, chp->cq.cqid, chp, GFP_KERNEL)) { - cxio_destroy_cq(&chp->rhp->rdev, &chp->cq); - return -ENOMEM; - } - - if (udata) { - struct iwch_mm_entry *mm; - struct iwch_ucontext *ucontext = rdma_udata_to_drv_context( - udata, struct iwch_ucontext, ibucontext); - - mm = kmalloc(sizeof(*mm), GFP_KERNEL); - if (!mm) { - iwch_destroy_cq(&chp->ibcq, udata); - return -ENOMEM; - } - uresp.cqid = chp->cq.cqid; - uresp.size_log2 = chp->cq.size_log2; - spin_lock(&ucontext->mmap_lock); - uresp.key = ucontext->key; - ucontext->key += PAGE_SIZE; - spin_unlock(&ucontext->mmap_lock); - mm->key = uresp.key; - mm->addr = virt_to_phys(chp->cq.queue); - if (udata->outlen < sizeof(uresp)) { - if (!warned++) - pr_warn("Warning - downlevel libcxgb3 (non-fatal)\n"); - mm->len = PAGE_ALIGN((1UL << uresp.size_log2) * - sizeof(struct t3_cqe)); - resplen = sizeof(struct iwch_create_cq_resp_v0); - } else { - mm->len = PAGE_ALIGN(((1UL << uresp.size_log2) + 1) * - sizeof(struct t3_cqe)); - uresp.memsize = mm->len; - uresp.reserved = 0; - resplen = sizeof(uresp); - } - if (ib_copy_to_udata(udata, &uresp, resplen)) { - kfree(mm); - iwch_destroy_cq(&chp->ibcq, udata); - return -EFAULT; - } - insert_mmap(ucontext, mm); - } - pr_debug("created cqid 0x%0x chp %p size 0x%0x, dma_addr %pad\n", - chp->cq.cqid, chp, (1 << chp->cq.size_log2), - &chp->cq.dma_addr); - return 0; -} - -static int iwch_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) -{ - struct iwch_dev *rhp; - struct iwch_cq *chp; - enum t3_cq_opcode cq_op; - int err; - unsigned long flag; - u32 rptr; - - chp = to_iwch_cq(ibcq); - rhp = chp->rhp; - if ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED) - cq_op = CQ_ARM_SE; - else - cq_op = CQ_ARM_AN; - if (chp->user_rptr_addr) { - if (get_user(rptr, chp->user_rptr_addr)) - return -EFAULT; - spin_lock_irqsave(&chp->lock, flag); - chp->cq.rptr = rptr; - } else - spin_lock_irqsave(&chp->lock, flag); - pr_debug("%s rptr 0x%x\n", __func__, chp->cq.rptr); - err = cxio_hal_cq_op(&rhp->rdev, &chp->cq, cq_op, 0); - spin_unlock_irqrestore(&chp->lock, flag); - if (err < 0) - pr_err("Error %d rearming CQID 0x%x\n", err, chp->cq.cqid); - if (err > 0 && !(flags & IB_CQ_REPORT_MISSED_EVENTS)) - err = 0; - return err; -} - -static int iwch_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) -{ - int len = vma->vm_end - vma->vm_start; - u32 key = vma->vm_pgoff << PAGE_SHIFT; - struct cxio_rdev *rdev_p; - int ret = 0; - struct iwch_mm_entry *mm; - struct iwch_ucontext *ucontext; - u64 addr; - - pr_debug("%s pgoff 0x%lx key 0x%x len %d\n", __func__, vma->vm_pgoff, - key, len); - - if (vma->vm_start & (PAGE_SIZE-1)) { - return -EINVAL; - } - - rdev_p = &(to_iwch_dev(context->device)->rdev); - ucontext = to_iwch_ucontext(context); - - mm = remove_mmap(ucontext, key, len); - if (!mm) - return -EINVAL; - addr = mm->addr; - kfree(mm); - - if ((addr >= rdev_p->rnic_info.udbell_physbase) && - (addr < (rdev_p->rnic_info.udbell_physbase + - rdev_p->rnic_info.udbell_len))) { - - /* - * Map T3 DB register. - */ - if (vma->vm_flags & VM_READ) { - return -EPERM; - } - - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND; - vma->vm_flags &= ~VM_MAYREAD; - ret = io_remap_pfn_range(vma, vma->vm_start, - addr >> PAGE_SHIFT, - len, vma->vm_page_prot); - } else { - - /* - * Map WQ or CQ contig dma memory... - */ - ret = remap_pfn_range(vma, vma->vm_start, - addr >> PAGE_SHIFT, - len, vma->vm_page_prot); - } - - return ret; -} - -static void iwch_deallocate_pd(struct ib_pd *pd, struct ib_udata *udata) -{ - struct iwch_dev *rhp; - struct iwch_pd *php; - - php = to_iwch_pd(pd); - rhp = php->rhp; - pr_debug("%s ibpd %p pdid 0x%x\n", __func__, pd, php->pdid); - cxio_hal_put_pdid(rhp->rdev.rscp, php->pdid); -} - -static int iwch_allocate_pd(struct ib_pd *pd, struct ib_udata *udata) -{ - struct iwch_pd *php = to_iwch_pd(pd); - struct ib_device *ibdev = pd->device; - u32 pdid; - struct iwch_dev *rhp; - - pr_debug("%s ibdev %p\n", __func__, ibdev); - rhp = (struct iwch_dev *) ibdev; - pdid = cxio_hal_get_pdid(rhp->rdev.rscp); - if (!pdid) - return -EINVAL; - - php->pdid = pdid; - php->rhp = rhp; - if (udata) { - struct iwch_alloc_pd_resp resp = {.pdid = php->pdid}; - - if (ib_copy_to_udata(udata, &resp, sizeof(resp))) { - iwch_deallocate_pd(&php->ibpd, udata); - return -EFAULT; - } - } - pr_debug("%s pdid 0x%0x ptr 0x%p\n", __func__, pdid, php); - return 0; -} - -static int iwch_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) -{ - struct iwch_dev *rhp; - struct iwch_mr *mhp; - u32 mmid; - - pr_debug("%s ib_mr %p\n", __func__, ib_mr); - - mhp = to_iwch_mr(ib_mr); - kfree(mhp->pages); - rhp = mhp->rhp; - mmid = mhp->attr.stag >> 8; - cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, - mhp->attr.pbl_addr); - iwch_free_pbl(mhp); - xa_erase_irq(&rhp->mrs, mmid); - if (mhp->kva) - kfree((void *) (unsigned long) mhp->kva); - ib_umem_release(mhp->umem); - pr_debug("%s mmid 0x%x ptr %p\n", __func__, mmid, mhp); - kfree(mhp); - return 0; -} - -static struct ib_mr *iwch_get_dma_mr(struct ib_pd *pd, int acc) -{ - const u64 total_size = 0xffffffff; - const u64 mask = (total_size + PAGE_SIZE - 1) & PAGE_MASK; - struct iwch_pd *php = to_iwch_pd(pd); - struct iwch_dev *rhp = php->rhp; - struct iwch_mr *mhp; - __be64 *page_list; - int shift = 26, npages, ret, i; - - pr_debug("%s ib_pd %p\n", __func__, pd); - - /* - * T3 only supports 32 bits of size. - */ - if (sizeof(phys_addr_t) > 4) { - pr_warn_once("Cannot support dma_mrs on this platform\n"); - return ERR_PTR(-ENOTSUPP); - } - - mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); - if (!mhp) - return ERR_PTR(-ENOMEM); - - mhp->rhp = rhp; - - npages = (total_size + (1ULL << shift) - 1) >> shift; - if (!npages) { - ret = -EINVAL; - goto err; - } - - page_list = kmalloc_array(npages, sizeof(u64), GFP_KERNEL); - if (!page_list) { - ret = -ENOMEM; - goto err; - } - - for (i = 0; i < npages; i++) - page_list[i] = cpu_to_be64((u64)i << shift); - - pr_debug("%s mask 0x%llx shift %d len %lld pbl_size %d\n", - __func__, mask, shift, total_size, npages); - - ret = iwch_alloc_pbl(mhp, npages); - if (ret) { - kfree(page_list); - goto err_pbl; - } - - ret = iwch_write_pbl(mhp, page_list, npages, 0); - kfree(page_list); - if (ret) - goto err_pbl; - - mhp->attr.pdid = php->pdid; - mhp->attr.zbva = 0; - - mhp->attr.perms = iwch_ib_to_tpt_access(acc); - mhp->attr.va_fbo = 0; - mhp->attr.page_size = shift - 12; - - mhp->attr.len = (u32) total_size; - mhp->attr.pbl_size = npages; - ret = iwch_register_mem(rhp, php, mhp, shift); - if (ret) - goto err_pbl; - - return &mhp->ibmr; - -err_pbl: - iwch_free_pbl(mhp); - -err: - kfree(mhp); - return ERR_PTR(ret); -} - -static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, - u64 virt, int acc, struct ib_udata *udata) -{ - __be64 *pages; - int shift, n, i; - int err = 0; - struct iwch_dev *rhp; - struct iwch_pd *php; - struct iwch_mr *mhp; - struct iwch_reg_user_mr_resp uresp; - struct sg_dma_page_iter sg_iter; - pr_debug("%s ib_pd %p\n", __func__, pd); - - php = to_iwch_pd(pd); - rhp = php->rhp; - mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); - if (!mhp) - return ERR_PTR(-ENOMEM); - - mhp->rhp = rhp; - - mhp->umem = ib_umem_get(udata, start, length, acc, 0); - if (IS_ERR(mhp->umem)) { - err = PTR_ERR(mhp->umem); - kfree(mhp); - return ERR_PTR(err); - } - - shift = PAGE_SHIFT; - - n = ib_umem_num_pages(mhp->umem); - - err = iwch_alloc_pbl(mhp, n); - if (err) - goto err; - - pages = (__be64 *) __get_free_page(GFP_KERNEL); - if (!pages) { - err = -ENOMEM; - goto err_pbl; - } - - i = n = 0; - - for_each_sg_dma_page(mhp->umem->sg_head.sgl, &sg_iter, mhp->umem->nmap, 0) { - pages[i++] = cpu_to_be64(sg_page_iter_dma_address(&sg_iter)); - if (i == PAGE_SIZE / sizeof(*pages)) { - err = iwch_write_pbl(mhp, pages, i, n); - if (err) - goto pbl_done; - n += i; - i = 0; - } - } - - if (i) - err = iwch_write_pbl(mhp, pages, i, n); - -pbl_done: - free_page((unsigned long) pages); - if (err) - goto err_pbl; - - mhp->attr.pdid = php->pdid; - mhp->attr.zbva = 0; - mhp->attr.perms = iwch_ib_to_tpt_access(acc); - mhp->attr.va_fbo = virt; - mhp->attr.page_size = shift - 12; - mhp->attr.len = (u32) length; - - err = iwch_register_mem(rhp, php, mhp, shift); - if (err) - goto err_pbl; - - if (udata && !t3a_device(rhp)) { - uresp.pbl_addr = (mhp->attr.pbl_addr - - rhp->rdev.rnic_info.pbl_base) >> 3; - pr_debug("%s user resp pbl_addr 0x%x\n", __func__, - uresp.pbl_addr); - - if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) { - iwch_dereg_mr(&mhp->ibmr, udata); - err = -EFAULT; - goto err; - } - } - - return &mhp->ibmr; - -err_pbl: - iwch_free_pbl(mhp); - -err: - ib_umem_release(mhp->umem); - kfree(mhp); - return ERR_PTR(err); -} - -static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, - struct ib_udata *udata) -{ - struct iwch_dev *rhp; - struct iwch_pd *php; - struct iwch_mw *mhp; - u32 mmid; - u32 stag = 0; - int ret; - - if (type != IB_MW_TYPE_1) - return ERR_PTR(-EINVAL); - - php = to_iwch_pd(pd); - rhp = php->rhp; - mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); - if (!mhp) - return ERR_PTR(-ENOMEM); - ret = cxio_allocate_window(&rhp->rdev, &stag, php->pdid); - if (ret) { - kfree(mhp); - return ERR_PTR(ret); - } - mhp->rhp = rhp; - mhp->attr.pdid = php->pdid; - mhp->attr.type = TPT_MW; - mhp->attr.stag = stag; - mmid = (stag) >> 8; - mhp->ibmw.rkey = stag; - if (xa_insert_irq(&rhp->mrs, mmid, mhp, GFP_KERNEL)) { - cxio_deallocate_window(&rhp->rdev, mhp->attr.stag); - kfree(mhp); - return ERR_PTR(-ENOMEM); - } - pr_debug("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag); - return &(mhp->ibmw); -} - -static int iwch_dealloc_mw(struct ib_mw *mw) -{ - struct iwch_dev *rhp; - struct iwch_mw *mhp; - u32 mmid; - - mhp = to_iwch_mw(mw); - rhp = mhp->rhp; - mmid = (mw->rkey) >> 8; - cxio_deallocate_window(&rhp->rdev, mhp->attr.stag); - xa_erase_irq(&rhp->mrs, mmid); - pr_debug("%s ib_mw %p mmid 0x%x ptr %p\n", __func__, mw, mmid, mhp); - kfree(mhp); - return 0; -} - -static struct ib_mr *iwch_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata) -{ - struct iwch_dev *rhp; - struct iwch_pd *php; - struct iwch_mr *mhp; - u32 mmid; - u32 stag = 0; - int ret = -ENOMEM; - - if (mr_type != IB_MR_TYPE_MEM_REG || - max_num_sg > T3_MAX_FASTREG_DEPTH) - return ERR_PTR(-EINVAL); - - php = to_iwch_pd(pd); - rhp = php->rhp; - mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); - if (!mhp) - goto err; - - mhp->pages = kcalloc(max_num_sg, sizeof(u64), GFP_KERNEL); - if (!mhp->pages) - goto pl_err; - - mhp->rhp = rhp; - ret = iwch_alloc_pbl(mhp, max_num_sg); - if (ret) - goto err1; - mhp->attr.pbl_size = max_num_sg; - ret = cxio_allocate_stag(&rhp->rdev, &stag, php->pdid, - mhp->attr.pbl_size, mhp->attr.pbl_addr); - if (ret) - goto err2; - mhp->attr.pdid = php->pdid; - mhp->attr.type = TPT_NON_SHARED_MR; - mhp->attr.stag = stag; - mhp->attr.state = 1; - mmid = (stag) >> 8; - mhp->ibmr.rkey = mhp->ibmr.lkey = stag; - ret = xa_insert_irq(&rhp->mrs, mmid, mhp, GFP_KERNEL); - if (ret) - goto err3; - - pr_debug("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag); - return &(mhp->ibmr); -err3: - cxio_dereg_mem(&rhp->rdev, stag, mhp->attr.pbl_size, - mhp->attr.pbl_addr); -err2: - iwch_free_pbl(mhp); -err1: - kfree(mhp->pages); -pl_err: - kfree(mhp); -err: - return ERR_PTR(ret); -} - -static int iwch_set_page(struct ib_mr *ibmr, u64 addr) -{ - struct iwch_mr *mhp = to_iwch_mr(ibmr); - - if (unlikely(mhp->npages == mhp->attr.pbl_size)) - return -ENOMEM; - - mhp->pages[mhp->npages++] = addr; - - return 0; -} - -static int iwch_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, - int sg_nents, unsigned int *sg_offset) -{ - struct iwch_mr *mhp = to_iwch_mr(ibmr); - - mhp->npages = 0; - - return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, iwch_set_page); -} - -static int iwch_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata) -{ - struct iwch_dev *rhp; - struct iwch_qp *qhp; - struct iwch_qp_attributes attrs; - struct iwch_ucontext *ucontext; - - qhp = to_iwch_qp(ib_qp); - rhp = qhp->rhp; - - attrs.next_state = IWCH_QP_STATE_ERROR; - iwch_modify_qp(rhp, qhp, IWCH_QP_ATTR_NEXT_STATE, &attrs, 0); - wait_event(qhp->wait, !qhp->ep); - - xa_erase_irq(&rhp->qps, qhp->wq.qpid); - - atomic_dec(&qhp->refcnt); - wait_event(qhp->wait, !atomic_read(&qhp->refcnt)); - - ucontext = rdma_udata_to_drv_context(udata, struct iwch_ucontext, - ibucontext); - cxio_destroy_qp(&rhp->rdev, &qhp->wq, - ucontext ? &ucontext->uctx : &rhp->rdev.uctx); - - pr_debug("%s ib_qp %p qpid 0x%0x qhp %p\n", __func__, - ib_qp, qhp->wq.qpid, qhp); - kfree(qhp); - return 0; -} - -static struct ib_qp *iwch_create_qp(struct ib_pd *pd, - struct ib_qp_init_attr *attrs, - struct ib_udata *udata) -{ - struct iwch_dev *rhp; - struct iwch_qp *qhp; - struct iwch_pd *php; - struct iwch_cq *schp; - struct iwch_cq *rchp; - struct iwch_create_qp_resp uresp; - int wqsize, sqsize, rqsize; - struct iwch_ucontext *ucontext; - - pr_debug("%s ib_pd %p\n", __func__, pd); - if (attrs->qp_type != IB_QPT_RC) - return ERR_PTR(-EINVAL); - php = to_iwch_pd(pd); - rhp = php->rhp; - schp = get_chp(rhp, ((struct iwch_cq *) attrs->send_cq)->cq.cqid); - rchp = get_chp(rhp, ((struct iwch_cq *) attrs->recv_cq)->cq.cqid); - if (!schp || !rchp) - return ERR_PTR(-EINVAL); - - /* The RQT size must be # of entries + 1 rounded up to a power of two */ - rqsize = roundup_pow_of_two(attrs->cap.max_recv_wr); - if (rqsize == attrs->cap.max_recv_wr) - rqsize = roundup_pow_of_two(attrs->cap.max_recv_wr+1); - - /* T3 doesn't support RQT depth < 16 */ - if (rqsize < 16) - rqsize = 16; - - if (rqsize > T3_MAX_RQ_SIZE) - return ERR_PTR(-EINVAL); - - if (attrs->cap.max_inline_data > T3_MAX_INLINE) - return ERR_PTR(-EINVAL); - - /* - * NOTE: The SQ and total WQ sizes don't need to be - * a power of two. However, all the code assumes - * they are. EG: Q_FREECNT() and friends. - */ - sqsize = roundup_pow_of_two(attrs->cap.max_send_wr); - wqsize = roundup_pow_of_two(rqsize + sqsize); - - /* - * Kernel users need more wq space for fastreg WRs which can take - * 2 WR fragments. - */ - ucontext = rdma_udata_to_drv_context(udata, struct iwch_ucontext, - ibucontext); - if (!ucontext && wqsize < (rqsize + (2 * sqsize))) - wqsize = roundup_pow_of_two(rqsize + - roundup_pow_of_two(attrs->cap.max_send_wr * 2)); - pr_debug("%s wqsize %d sqsize %d rqsize %d\n", __func__, - wqsize, sqsize, rqsize); - qhp = kzalloc(sizeof(*qhp), GFP_KERNEL); - if (!qhp) - return ERR_PTR(-ENOMEM); - qhp->wq.size_log2 = ilog2(wqsize); - qhp->wq.rq_size_log2 = ilog2(rqsize); - qhp->wq.sq_size_log2 = ilog2(sqsize); - if (cxio_create_qp(&rhp->rdev, !udata, &qhp->wq, - ucontext ? &ucontext->uctx : &rhp->rdev.uctx)) { - kfree(qhp); - return ERR_PTR(-ENOMEM); - } - - attrs->cap.max_recv_wr = rqsize - 1; - attrs->cap.max_send_wr = sqsize; - attrs->cap.max_inline_data = T3_MAX_INLINE; - - qhp->rhp = rhp; - qhp->attr.pd = php->pdid; - qhp->attr.scq = ((struct iwch_cq *) attrs->send_cq)->cq.cqid; - qhp->attr.rcq = ((struct iwch_cq *) attrs->recv_cq)->cq.cqid; - qhp->attr.sq_num_entries = attrs->cap.max_send_wr; - qhp->attr.rq_num_entries = attrs->cap.max_recv_wr; - qhp->attr.sq_max_sges = attrs->cap.max_send_sge; - qhp->attr.sq_max_sges_rdma_write = attrs->cap.max_send_sge; - qhp->attr.rq_max_sges = attrs->cap.max_recv_sge; - qhp->attr.state = IWCH_QP_STATE_IDLE; - qhp->attr.next_state = IWCH_QP_STATE_IDLE; - - /* - * XXX - These don't get passed in from the openib user - * at create time. The CM sets them via a QP modify. - * Need to fix... I think the CM should - */ - qhp->attr.enable_rdma_read = 1; - qhp->attr.enable_rdma_write = 1; - qhp->attr.enable_bind = 1; - qhp->attr.max_ord = 1; - qhp->attr.max_ird = 1; - - spin_lock_init(&qhp->lock); - init_waitqueue_head(&qhp->wait); - atomic_set(&qhp->refcnt, 1); - - if (xa_store_irq(&rhp->qps, qhp->wq.qpid, qhp, GFP_KERNEL)) { - cxio_destroy_qp(&rhp->rdev, &qhp->wq, - ucontext ? &ucontext->uctx : &rhp->rdev.uctx); - kfree(qhp); - return ERR_PTR(-ENOMEM); - } - - if (udata) { - - struct iwch_mm_entry *mm1, *mm2; - - mm1 = kmalloc(sizeof(*mm1), GFP_KERNEL); - if (!mm1) { - iwch_destroy_qp(&qhp->ibqp, udata); - return ERR_PTR(-ENOMEM); - } - - mm2 = kmalloc(sizeof(*mm2), GFP_KERNEL); - if (!mm2) { - kfree(mm1); - iwch_destroy_qp(&qhp->ibqp, udata); - return ERR_PTR(-ENOMEM); - } - - uresp.qpid = qhp->wq.qpid; - uresp.size_log2 = qhp->wq.size_log2; - uresp.sq_size_log2 = qhp->wq.sq_size_log2; - uresp.rq_size_log2 = qhp->wq.rq_size_log2; - spin_lock(&ucontext->mmap_lock); - uresp.key = ucontext->key; - ucontext->key += PAGE_SIZE; - uresp.db_key = ucontext->key; - ucontext->key += PAGE_SIZE; - spin_unlock(&ucontext->mmap_lock); - if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) { - kfree(mm1); - kfree(mm2); - iwch_destroy_qp(&qhp->ibqp, udata); - return ERR_PTR(-EFAULT); - } - mm1->key = uresp.key; - mm1->addr = virt_to_phys(qhp->wq.queue); - mm1->len = PAGE_ALIGN(wqsize * sizeof(union t3_wr)); - insert_mmap(ucontext, mm1); - mm2->key = uresp.db_key; - mm2->addr = qhp->wq.udb & PAGE_MASK; - mm2->len = PAGE_SIZE; - insert_mmap(ucontext, mm2); - } - qhp->ibqp.qp_num = qhp->wq.qpid; - pr_debug( - "%s sq_num_entries %d, rq_num_entries %d qpid 0x%0x qhp %p dma_addr %pad size %d rq_addr 0x%x\n", - __func__, qhp->attr.sq_num_entries, qhp->attr.rq_num_entries, - qhp->wq.qpid, qhp, &qhp->wq.dma_addr, 1 << qhp->wq.size_log2, - qhp->wq.rq_addr); - return &qhp->ibqp; -} - -static int iwch_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, - int attr_mask, struct ib_udata *udata) -{ - struct iwch_dev *rhp; - struct iwch_qp *qhp; - enum iwch_qp_attr_mask mask = 0; - struct iwch_qp_attributes attrs = {}; - - pr_debug("%s ib_qp %p\n", __func__, ibqp); - - /* iwarp does not support the RTR state */ - if ((attr_mask & IB_QP_STATE) && (attr->qp_state == IB_QPS_RTR)) - attr_mask &= ~IB_QP_STATE; - - /* Make sure we still have something left to do */ - if (!attr_mask) - return 0; - - qhp = to_iwch_qp(ibqp); - rhp = qhp->rhp; - - attrs.next_state = iwch_convert_state(attr->qp_state); - attrs.enable_rdma_read = (attr->qp_access_flags & - IB_ACCESS_REMOTE_READ) ? 1 : 0; - attrs.enable_rdma_write = (attr->qp_access_flags & - IB_ACCESS_REMOTE_WRITE) ? 1 : 0; - attrs.enable_bind = (attr->qp_access_flags & IB_ACCESS_MW_BIND) ? 1 : 0; - - - mask |= (attr_mask & IB_QP_STATE) ? IWCH_QP_ATTR_NEXT_STATE : 0; - mask |= (attr_mask & IB_QP_ACCESS_FLAGS) ? - (IWCH_QP_ATTR_ENABLE_RDMA_READ | - IWCH_QP_ATTR_ENABLE_RDMA_WRITE | - IWCH_QP_ATTR_ENABLE_RDMA_BIND) : 0; - - return iwch_modify_qp(rhp, qhp, mask, &attrs, 0); -} - -void iwch_qp_add_ref(struct ib_qp *qp) -{ - pr_debug("%s ib_qp %p\n", __func__, qp); - atomic_inc(&(to_iwch_qp(qp)->refcnt)); -} - -void iwch_qp_rem_ref(struct ib_qp *qp) -{ - pr_debug("%s ib_qp %p\n", __func__, qp); - if (atomic_dec_and_test(&(to_iwch_qp(qp)->refcnt))) - wake_up(&(to_iwch_qp(qp)->wait)); -} - -static struct ib_qp *iwch_get_qp(struct ib_device *dev, int qpn) -{ - pr_debug("%s ib_dev %p qpn 0x%x\n", __func__, dev, qpn); - return (struct ib_qp *)get_qhp(to_iwch_dev(dev), qpn); -} - - -static int iwch_query_pkey(struct ib_device *ibdev, - u8 port, u16 index, u16 * pkey) -{ - pr_debug("%s ibdev %p\n", __func__, ibdev); - *pkey = 0; - return 0; -} - -static int iwch_query_gid(struct ib_device *ibdev, u8 port, - int index, union ib_gid *gid) -{ - struct iwch_dev *dev; - - pr_debug("%s ibdev %p, port %d, index %d, gid %p\n", - __func__, ibdev, port, index, gid); - dev = to_iwch_dev(ibdev); - BUG_ON(port == 0 || port > 2); - memset(&(gid->raw[0]), 0, sizeof(gid->raw)); - memcpy(&(gid->raw[0]), dev->rdev.port_info.lldevs[port-1]->dev_addr, 6); - return 0; -} - -static u64 fw_vers_string_to_u64(struct iwch_dev *iwch_dev) -{ - struct ethtool_drvinfo info; - struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev; - char *cp, *next; - unsigned fw_maj, fw_min, fw_mic; - - lldev->ethtool_ops->get_drvinfo(lldev, &info); - - next = info.fw_version + 1; - cp = strsep(&next, "."); - sscanf(cp, "%i", &fw_maj); - cp = strsep(&next, "."); - sscanf(cp, "%i", &fw_min); - cp = strsep(&next, "."); - sscanf(cp, "%i", &fw_mic); - - return (((u64)fw_maj & 0xffff) << 32) | ((fw_min & 0xffff) << 16) | - (fw_mic & 0xffff); -} - -static int iwch_query_device(struct ib_device *ibdev, struct ib_device_attr *props, - struct ib_udata *uhw) -{ - - struct iwch_dev *dev; - - pr_debug("%s ibdev %p\n", __func__, ibdev); - - if (uhw->inlen || uhw->outlen) - return -EINVAL; - - dev = to_iwch_dev(ibdev); - memcpy(&props->sys_image_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6); - props->hw_ver = dev->rdev.t3cdev_p->type; - props->fw_ver = fw_vers_string_to_u64(dev); - props->device_cap_flags = dev->device_cap_flags; - props->page_size_cap = dev->attr.mem_pgsizes_bitmask; - props->vendor_id = (u32)dev->rdev.rnic_info.pdev->vendor; - props->vendor_part_id = (u32)dev->rdev.rnic_info.pdev->device; - props->max_mr_size = dev->attr.max_mr_size; - props->max_qp = dev->attr.max_qps; - props->max_qp_wr = dev->attr.max_wrs; - props->max_send_sge = dev->attr.max_sge_per_wr; - props->max_recv_sge = dev->attr.max_sge_per_wr; - props->max_sge_rd = 1; - props->max_qp_rd_atom = dev->attr.max_rdma_reads_per_qp; - props->max_qp_init_rd_atom = dev->attr.max_rdma_reads_per_qp; - props->max_cq = dev->attr.max_cqs; - props->max_cqe = dev->attr.max_cqes_per_cq; - props->max_mr = dev->attr.max_mem_regs; - props->max_pd = dev->attr.max_pds; - props->local_ca_ack_delay = 0; - props->max_fast_reg_page_list_len = T3_MAX_FASTREG_DEPTH; - - return 0; -} - -static int iwch_query_port(struct ib_device *ibdev, - u8 port, struct ib_port_attr *props) -{ - pr_debug("%s ibdev %p\n", __func__, ibdev); - - props->port_cap_flags = - IB_PORT_CM_SUP | - IB_PORT_SNMP_TUNNEL_SUP | - IB_PORT_REINIT_SUP | - IB_PORT_DEVICE_MGMT_SUP | - IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP; - props->gid_tbl_len = 1; - props->pkey_tbl_len = 1; - props->active_width = 2; - props->active_speed = IB_SPEED_DDR; - props->max_msg_sz = -1; - - return 0; -} - -static ssize_t hw_rev_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct iwch_dev *iwch_dev = - rdma_device_to_drv_device(dev, struct iwch_dev, ibdev); - - pr_debug("%s dev 0x%p\n", __func__, dev); - return sprintf(buf, "%d\n", iwch_dev->rdev.t3cdev_p->type); -} -static DEVICE_ATTR_RO(hw_rev); - -static ssize_t hca_type_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct iwch_dev *iwch_dev = - rdma_device_to_drv_device(dev, struct iwch_dev, ibdev); - struct ethtool_drvinfo info; - struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev; - - pr_debug("%s dev 0x%p\n", __func__, dev); - lldev->ethtool_ops->get_drvinfo(lldev, &info); - return sprintf(buf, "%s\n", info.driver); -} -static DEVICE_ATTR_RO(hca_type); - -static ssize_t board_id_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct iwch_dev *iwch_dev = - rdma_device_to_drv_device(dev, struct iwch_dev, ibdev); - - pr_debug("%s dev 0x%p\n", __func__, dev); - return sprintf(buf, "%x.%x\n", iwch_dev->rdev.rnic_info.pdev->vendor, - iwch_dev->rdev.rnic_info.pdev->device); -} -static DEVICE_ATTR_RO(board_id); - -enum counters { - IPINRECEIVES, - IPINHDRERRORS, - IPINADDRERRORS, - IPINUNKNOWNPROTOS, - IPINDISCARDS, - IPINDELIVERS, - IPOUTREQUESTS, - IPOUTDISCARDS, - IPOUTNOROUTES, - IPREASMTIMEOUT, - IPREASMREQDS, - IPREASMOKS, - IPREASMFAILS, - TCPACTIVEOPENS, - TCPPASSIVEOPENS, - TCPATTEMPTFAILS, - TCPESTABRESETS, - TCPCURRESTAB, - TCPINSEGS, - TCPOUTSEGS, - TCPRETRANSSEGS, - TCPINERRS, - TCPOUTRSTS, - TCPRTOMIN, - TCPRTOMAX, - NR_COUNTERS -}; - -static const char * const names[] = { - [IPINRECEIVES] = "ipInReceives", - [IPINHDRERRORS] = "ipInHdrErrors", - [IPINADDRERRORS] = "ipInAddrErrors", - [IPINUNKNOWNPROTOS] = "ipInUnknownProtos", - [IPINDISCARDS] = "ipInDiscards", - [IPINDELIVERS] = "ipInDelivers", - [IPOUTREQUESTS] = "ipOutRequests", - [IPOUTDISCARDS] = "ipOutDiscards", - [IPOUTNOROUTES] = "ipOutNoRoutes", - [IPREASMTIMEOUT] = "ipReasmTimeout", - [IPREASMREQDS] = "ipReasmReqds", - [IPREASMOKS] = "ipReasmOKs", - [IPREASMFAILS] = "ipReasmFails", - [TCPACTIVEOPENS] = "tcpActiveOpens", - [TCPPASSIVEOPENS] = "tcpPassiveOpens", - [TCPATTEMPTFAILS] = "tcpAttemptFails", - [TCPESTABRESETS] = "tcpEstabResets", - [TCPCURRESTAB] = "tcpCurrEstab", - [TCPINSEGS] = "tcpInSegs", - [TCPOUTSEGS] = "tcpOutSegs", - [TCPRETRANSSEGS] = "tcpRetransSegs", - [TCPINERRS] = "tcpInErrs", - [TCPOUTRSTS] = "tcpOutRsts", - [TCPRTOMIN] = "tcpRtoMin", - [TCPRTOMAX] = "tcpRtoMax", -}; - -static struct rdma_hw_stats *iwch_alloc_stats(struct ib_device *ibdev, - u8 port_num) -{ - BUILD_BUG_ON(ARRAY_SIZE(names) != NR_COUNTERS); - - /* Our driver only supports device level stats */ - if (port_num != 0) - return NULL; - - return rdma_alloc_hw_stats_struct(names, NR_COUNTERS, - RDMA_HW_STATS_DEFAULT_LIFESPAN); -} - -static int iwch_get_mib(struct ib_device *ibdev, struct rdma_hw_stats *stats, - u8 port, int index) -{ - struct iwch_dev *dev; - struct tp_mib_stats m; - int ret; - - if (port != 0 || !stats) - return -ENOSYS; - - pr_debug("%s ibdev %p\n", __func__, ibdev); - dev = to_iwch_dev(ibdev); - ret = dev->rdev.t3cdev_p->ctl(dev->rdev.t3cdev_p, RDMA_GET_MIB, &m); - if (ret) - return -ENOSYS; - - stats->value[IPINRECEIVES] = ((u64)m.ipInReceive_hi << 32) + m.ipInReceive_lo; - stats->value[IPINHDRERRORS] = ((u64)m.ipInHdrErrors_hi << 32) + m.ipInHdrErrors_lo; - stats->value[IPINADDRERRORS] = ((u64)m.ipInAddrErrors_hi << 32) + m.ipInAddrErrors_lo; - stats->value[IPINUNKNOWNPROTOS] = ((u64)m.ipInUnknownProtos_hi << 32) + m.ipInUnknownProtos_lo; - stats->value[IPINDISCARDS] = ((u64)m.ipInDiscards_hi << 32) + m.ipInDiscards_lo; - stats->value[IPINDELIVERS] = ((u64)m.ipInDelivers_hi << 32) + m.ipInDelivers_lo; - stats->value[IPOUTREQUESTS] = ((u64)m.ipOutRequests_hi << 32) + m.ipOutRequests_lo; - stats->value[IPOUTDISCARDS] = ((u64)m.ipOutDiscards_hi << 32) + m.ipOutDiscards_lo; - stats->value[IPOUTNOROUTES] = ((u64)m.ipOutNoRoutes_hi << 32) + m.ipOutNoRoutes_lo; - stats->value[IPREASMTIMEOUT] = m.ipReasmTimeout; - stats->value[IPREASMREQDS] = m.ipReasmReqds; - stats->value[IPREASMOKS] = m.ipReasmOKs; - stats->value[IPREASMFAILS] = m.ipReasmFails; - stats->value[TCPACTIVEOPENS] = m.tcpActiveOpens; - stats->value[TCPPASSIVEOPENS] = m.tcpPassiveOpens; - stats->value[TCPATTEMPTFAILS] = m.tcpAttemptFails; - stats->value[TCPESTABRESETS] = m.tcpEstabResets; - stats->value[TCPCURRESTAB] = m.tcpOutRsts; - stats->value[TCPINSEGS] = m.tcpCurrEstab; - stats->value[TCPOUTSEGS] = ((u64)m.tcpInSegs_hi << 32) + m.tcpInSegs_lo; - stats->value[TCPRETRANSSEGS] = ((u64)m.tcpOutSegs_hi << 32) + m.tcpOutSegs_lo; - stats->value[TCPINERRS] = ((u64)m.tcpRetransSeg_hi << 32) + m.tcpRetransSeg_lo, - stats->value[TCPOUTRSTS] = ((u64)m.tcpInErrs_hi << 32) + m.tcpInErrs_lo; - stats->value[TCPRTOMIN] = m.tcpRtoMin; - stats->value[TCPRTOMAX] = m.tcpRtoMax; - - return stats->num_counters; -} - -static struct attribute *iwch_class_attributes[] = { - &dev_attr_hw_rev.attr, - &dev_attr_hca_type.attr, - &dev_attr_board_id.attr, - NULL -}; - -static const struct attribute_group iwch_attr_group = { - .attrs = iwch_class_attributes, -}; - -static int iwch_port_immutable(struct ib_device *ibdev, u8 port_num, - struct ib_port_immutable *immutable) -{ - struct ib_port_attr attr; - int err; - - immutable->core_cap_flags = RDMA_CORE_PORT_IWARP; - - err = ib_query_port(ibdev, port_num, &attr); - if (err) - return err; - - immutable->pkey_tbl_len = attr.pkey_tbl_len; - immutable->gid_tbl_len = attr.gid_tbl_len; - - return 0; -} - -static void get_dev_fw_ver_str(struct ib_device *ibdev, char *str) -{ - struct iwch_dev *iwch_dev = to_iwch_dev(ibdev); - struct ethtool_drvinfo info; - struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev; - - pr_debug("%s dev 0x%p\n", __func__, iwch_dev); - lldev->ethtool_ops->get_drvinfo(lldev, &info); - snprintf(str, IB_FW_VERSION_NAME_MAX, "%s", info.fw_version); -} - -static const struct ib_device_ops iwch_dev_ops = { - .owner = THIS_MODULE, - .driver_id = RDMA_DRIVER_CXGB3, - .uverbs_abi_ver = IWCH_UVERBS_ABI_VERSION, - .uverbs_no_driver_id_binding = 1, - - .alloc_hw_stats = iwch_alloc_stats, - .alloc_mr = iwch_alloc_mr, - .alloc_mw = iwch_alloc_mw, - .alloc_pd = iwch_allocate_pd, - .alloc_ucontext = iwch_alloc_ucontext, - .create_cq = iwch_create_cq, - .create_qp = iwch_create_qp, - .dealloc_mw = iwch_dealloc_mw, - .dealloc_pd = iwch_deallocate_pd, - .dealloc_ucontext = iwch_dealloc_ucontext, - .dereg_mr = iwch_dereg_mr, - .destroy_cq = iwch_destroy_cq, - .destroy_qp = iwch_destroy_qp, - .get_dev_fw_str = get_dev_fw_ver_str, - .get_dma_mr = iwch_get_dma_mr, - .get_hw_stats = iwch_get_mib, - .get_port_immutable = iwch_port_immutable, - .iw_accept = iwch_accept_cr, - .iw_add_ref = iwch_qp_add_ref, - .iw_connect = iwch_connect, - .iw_create_listen = iwch_create_listen, - .iw_destroy_listen = iwch_destroy_listen, - .iw_get_qp = iwch_get_qp, - .iw_reject = iwch_reject_cr, - .iw_rem_ref = iwch_qp_rem_ref, - .map_mr_sg = iwch_map_mr_sg, - .mmap = iwch_mmap, - .modify_qp = iwch_ib_modify_qp, - .poll_cq = iwch_poll_cq, - .post_recv = iwch_post_receive, - .post_send = iwch_post_send, - .query_device = iwch_query_device, - .query_gid = iwch_query_gid, - .query_pkey = iwch_query_pkey, - .query_port = iwch_query_port, - .reg_user_mr = iwch_reg_user_mr, - .req_notify_cq = iwch_arm_cq, - INIT_RDMA_OBJ_SIZE(ib_pd, iwch_pd, ibpd), - INIT_RDMA_OBJ_SIZE(ib_cq, iwch_cq, ibcq), - INIT_RDMA_OBJ_SIZE(ib_ucontext, iwch_ucontext, ibucontext), -}; - -static int set_netdevs(struct ib_device *ib_dev, struct cxio_rdev *rdev) -{ - int ret; - int i; - - for (i = 0; i < rdev->port_info.nports; i++) { - ret = ib_device_set_netdev(ib_dev, rdev->port_info.lldevs[i], - i + 1); - if (ret) - return ret; - } - return 0; -} - -int iwch_register_device(struct iwch_dev *dev) -{ - int err; - - pr_debug("%s iwch_dev %p\n", __func__, dev); - memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid)); - memcpy(&dev->ibdev.node_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6); - dev->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY | - IB_DEVICE_MEM_WINDOW | - IB_DEVICE_MEM_MGT_EXTENSIONS; - - /* cxgb3 supports STag 0. */ - dev->ibdev.local_dma_lkey = 0; - - dev->ibdev.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_POLL_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | - (1ull << IB_USER_VERBS_CMD_POST_SEND) | - (1ull << IB_USER_VERBS_CMD_POST_RECV); - dev->ibdev.node_type = RDMA_NODE_RNIC; - BUILD_BUG_ON(sizeof(IWCH_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX); - memcpy(dev->ibdev.node_desc, IWCH_NODE_DESC, sizeof(IWCH_NODE_DESC)); - dev->ibdev.phys_port_cnt = dev->rdev.port_info.nports; - dev->ibdev.num_comp_vectors = 1; - dev->ibdev.dev.parent = &dev->rdev.rnic_info.pdev->dev; - - memcpy(dev->ibdev.iw_ifname, dev->rdev.t3cdev_p->lldev->name, - sizeof(dev->ibdev.iw_ifname)); - - rdma_set_device_sysfs_group(&dev->ibdev, &iwch_attr_group); - ib_set_device_ops(&dev->ibdev, &iwch_dev_ops); - err = set_netdevs(&dev->ibdev, &dev->rdev); - if (err) - return err; - - return ib_register_device(&dev->ibdev, "cxgb3_%d"); -} - -void iwch_unregister_device(struct iwch_dev *dev) -{ - pr_debug("%s iwch_dev %p\n", __func__, dev); - ib_unregister_device(&dev->ibdev); - return; -} diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h deleted file mode 100644 index 8adbe9658935..000000000000 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.h +++ /dev/null @@ -1,347 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __IWCH_PROVIDER_H__ -#define __IWCH_PROVIDER_H__ - -#include -#include -#include -#include -#include "t3cdev.h" -#include "iwch.h" -#include "cxio_wr.h" -#include "cxio_hal.h" - -struct iwch_pd { - struct ib_pd ibpd; - u32 pdid; - struct iwch_dev *rhp; -}; - -static inline struct iwch_pd *to_iwch_pd(struct ib_pd *ibpd) -{ - return container_of(ibpd, struct iwch_pd, ibpd); -} - -struct tpt_attributes { - u32 stag; - u32 state:1; - u32 type:2; - u32 rsvd:1; - enum tpt_mem_perm perms; - u32 remote_invaliate_disable:1; - u32 zbva:1; - u32 mw_bind_enable:1; - u32 page_size:5; - - u32 pdid; - u32 qpid; - u32 pbl_addr; - u32 len; - u64 va_fbo; - u32 pbl_size; -}; - -struct iwch_mr { - struct ib_mr ibmr; - struct ib_umem *umem; - struct iwch_dev *rhp; - u64 kva; - struct tpt_attributes attr; - u64 *pages; - u32 npages; -}; - -typedef struct iwch_mw iwch_mw_handle; - -static inline struct iwch_mr *to_iwch_mr(struct ib_mr *ibmr) -{ - return container_of(ibmr, struct iwch_mr, ibmr); -} - -struct iwch_mw { - struct ib_mw ibmw; - struct iwch_dev *rhp; - u64 kva; - struct tpt_attributes attr; -}; - -static inline struct iwch_mw *to_iwch_mw(struct ib_mw *ibmw) -{ - return container_of(ibmw, struct iwch_mw, ibmw); -} - -struct iwch_cq { - struct ib_cq ibcq; - struct iwch_dev *rhp; - struct t3_cq cq; - spinlock_t lock; - spinlock_t comp_handler_lock; - atomic_t refcnt; - wait_queue_head_t wait; - u32 __user *user_rptr_addr; -}; - -static inline struct iwch_cq *to_iwch_cq(struct ib_cq *ibcq) -{ - return container_of(ibcq, struct iwch_cq, ibcq); -} - -enum IWCH_QP_FLAGS { - QP_QUIESCED = 0x01 -}; - -struct iwch_mpa_attributes { - u8 initiator; - u8 recv_marker_enabled; - u8 xmit_marker_enabled; /* iWARP: enable inbound Read Resp. */ - u8 crc_enabled; - u8 version; /* 0 or 1 */ -}; - -struct iwch_qp_attributes { - u32 scq; - u32 rcq; - u32 sq_num_entries; - u32 rq_num_entries; - u32 sq_max_sges; - u32 sq_max_sges_rdma_write; - u32 rq_max_sges; - u32 state; - u8 enable_rdma_read; - u8 enable_rdma_write; /* enable inbound Read Resp. */ - u8 enable_bind; - u8 enable_mmid0_fastreg; /* Enable STAG0 + Fast-register */ - /* - * Next QP state. If specify the current state, only the - * QP attributes will be modified. - */ - u32 max_ord; - u32 max_ird; - u32 pd; /* IN */ - u32 next_state; - char terminate_buffer[52]; - u32 terminate_msg_len; - u8 is_terminate_local; - struct iwch_mpa_attributes mpa_attr; /* IN-OUT */ - struct iwch_ep *llp_stream_handle; - char *stream_msg_buf; /* Last stream msg. before Idle -> RTS */ - u32 stream_msg_buf_len; /* Only on Idle -> RTS */ -}; - -struct iwch_qp { - struct ib_qp ibqp; - struct iwch_dev *rhp; - struct iwch_ep *ep; - struct iwch_qp_attributes attr; - struct t3_wq wq; - spinlock_t lock; - atomic_t refcnt; - wait_queue_head_t wait; - enum IWCH_QP_FLAGS flags; -}; - -static inline int qp_quiesced(struct iwch_qp *qhp) -{ - return qhp->flags & QP_QUIESCED; -} - -static inline struct iwch_qp *to_iwch_qp(struct ib_qp *ibqp) -{ - return container_of(ibqp, struct iwch_qp, ibqp); -} - -void iwch_qp_add_ref(struct ib_qp *qp); -void iwch_qp_rem_ref(struct ib_qp *qp); - -struct iwch_ucontext { - struct ib_ucontext ibucontext; - struct cxio_ucontext uctx; - u32 key; - spinlock_t mmap_lock; - struct list_head mmaps; -}; - -static inline struct iwch_ucontext *to_iwch_ucontext(struct ib_ucontext *c) -{ - return container_of(c, struct iwch_ucontext, ibucontext); -} - -struct iwch_mm_entry { - struct list_head entry; - u64 addr; - u32 key; - unsigned len; -}; - -static inline struct iwch_mm_entry *remove_mmap(struct iwch_ucontext *ucontext, - u32 key, unsigned len) -{ - struct list_head *pos, *nxt; - struct iwch_mm_entry *mm; - - spin_lock(&ucontext->mmap_lock); - list_for_each_safe(pos, nxt, &ucontext->mmaps) { - - mm = list_entry(pos, struct iwch_mm_entry, entry); - if (mm->key == key && mm->len == len) { - list_del_init(&mm->entry); - spin_unlock(&ucontext->mmap_lock); - pr_debug("%s key 0x%x addr 0x%llx len %d\n", - __func__, key, - (unsigned long long)mm->addr, mm->len); - return mm; - } - } - spin_unlock(&ucontext->mmap_lock); - return NULL; -} - -static inline void insert_mmap(struct iwch_ucontext *ucontext, - struct iwch_mm_entry *mm) -{ - spin_lock(&ucontext->mmap_lock); - pr_debug("%s key 0x%x addr 0x%llx len %d\n", - __func__, mm->key, (unsigned long long)mm->addr, mm->len); - list_add_tail(&mm->entry, &ucontext->mmaps); - spin_unlock(&ucontext->mmap_lock); -} - -enum iwch_qp_attr_mask { - IWCH_QP_ATTR_NEXT_STATE = 1 << 0, - IWCH_QP_ATTR_ENABLE_RDMA_READ = 1 << 7, - IWCH_QP_ATTR_ENABLE_RDMA_WRITE = 1 << 8, - IWCH_QP_ATTR_ENABLE_RDMA_BIND = 1 << 9, - IWCH_QP_ATTR_MAX_ORD = 1 << 11, - IWCH_QP_ATTR_MAX_IRD = 1 << 12, - IWCH_QP_ATTR_LLP_STREAM_HANDLE = 1 << 22, - IWCH_QP_ATTR_STREAM_MSG_BUFFER = 1 << 23, - IWCH_QP_ATTR_MPA_ATTR = 1 << 24, - IWCH_QP_ATTR_QP_CONTEXT_ACTIVATE = 1 << 25, - IWCH_QP_ATTR_VALID_MODIFY = (IWCH_QP_ATTR_ENABLE_RDMA_READ | - IWCH_QP_ATTR_ENABLE_RDMA_WRITE | - IWCH_QP_ATTR_MAX_ORD | - IWCH_QP_ATTR_MAX_IRD | - IWCH_QP_ATTR_LLP_STREAM_HANDLE | - IWCH_QP_ATTR_STREAM_MSG_BUFFER | - IWCH_QP_ATTR_MPA_ATTR | - IWCH_QP_ATTR_QP_CONTEXT_ACTIVATE) -}; - -int iwch_modify_qp(struct iwch_dev *rhp, - struct iwch_qp *qhp, - enum iwch_qp_attr_mask mask, - struct iwch_qp_attributes *attrs, - int internal); - -enum iwch_qp_state { - IWCH_QP_STATE_IDLE, - IWCH_QP_STATE_RTS, - IWCH_QP_STATE_ERROR, - IWCH_QP_STATE_TERMINATE, - IWCH_QP_STATE_CLOSING, - IWCH_QP_STATE_TOT -}; - -static inline int iwch_convert_state(enum ib_qp_state ib_state) -{ - switch (ib_state) { - case IB_QPS_RESET: - case IB_QPS_INIT: - return IWCH_QP_STATE_IDLE; - case IB_QPS_RTS: - return IWCH_QP_STATE_RTS; - case IB_QPS_SQD: - return IWCH_QP_STATE_CLOSING; - case IB_QPS_SQE: - return IWCH_QP_STATE_TERMINATE; - case IB_QPS_ERR: - return IWCH_QP_STATE_ERROR; - default: - return -1; - } -} - -static inline u32 iwch_ib_to_tpt_access(int acc) -{ - return (acc & IB_ACCESS_REMOTE_WRITE ? TPT_REMOTE_WRITE : 0) | - (acc & IB_ACCESS_REMOTE_READ ? TPT_REMOTE_READ : 0) | - (acc & IB_ACCESS_LOCAL_WRITE ? TPT_LOCAL_WRITE : 0) | - (acc & IB_ACCESS_MW_BIND ? TPT_MW_BIND : 0) | - TPT_LOCAL_READ; -} - -static inline u32 iwch_ib_to_tpt_bind_access(int acc) -{ - return (acc & IB_ACCESS_REMOTE_WRITE ? TPT_REMOTE_WRITE : 0) | - (acc & IB_ACCESS_REMOTE_READ ? TPT_REMOTE_READ : 0); -} - -enum iwch_mmid_state { - IWCH_STAG_STATE_VALID, - IWCH_STAG_STATE_INVALID -}; - -enum iwch_qp_query_flags { - IWCH_QP_QUERY_CONTEXT_NONE = 0x0, /* No ctx; Only attrs */ - IWCH_QP_QUERY_CONTEXT_GET = 0x1, /* Get ctx + attrs */ - IWCH_QP_QUERY_CONTEXT_SUSPEND = 0x2, /* Not Supported */ - - /* - * Quiesce QP context; Consumer - * will NOT replay outstanding WR - */ - IWCH_QP_QUERY_CONTEXT_QUIESCE = 0x4, - IWCH_QP_QUERY_CONTEXT_REMOVE = 0x8, - IWCH_QP_QUERY_TEST_USERWRITE = 0x32 /* Test special */ -}; - -u16 iwch_rqes_posted(struct iwch_qp *qhp); -int iwch_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, - const struct ib_send_wr **bad_wr); -int iwch_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr, - const struct ib_recv_wr **bad_wr); -int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); -int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg); -int iwch_post_zb_read(struct iwch_ep *ep); -int iwch_register_device(struct iwch_dev *dev); -void iwch_unregister_device(struct iwch_dev *dev); -void stop_read_rep_timer(struct iwch_qp *qhp); -int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php, - struct iwch_mr *mhp, int shift); -int iwch_alloc_pbl(struct iwch_mr *mhp, int npages); -void iwch_free_pbl(struct iwch_mr *mhp); -int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset); - -#define IWCH_NODE_DESC "cxgb3 Chelsio Communications" - -#endif diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c deleted file mode 100644 index c649faad63f9..000000000000 --- a/drivers/infiniband/hw/cxgb3/iwch_qp.c +++ /dev/null @@ -1,1082 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include -#include -#include "iwch_provider.h" -#include "iwch.h" -#include "iwch_cm.h" -#include "cxio_hal.h" -#include "cxio_resource.h" - -#define NO_SUPPORT -1 - -static int build_rdma_send(union t3_wr *wqe, const struct ib_send_wr *wr, - u8 *flit_cnt) -{ - int i; - u32 plen; - - switch (wr->opcode) { - case IB_WR_SEND: - if (wr->send_flags & IB_SEND_SOLICITED) - wqe->send.rdmaop = T3_SEND_WITH_SE; - else - wqe->send.rdmaop = T3_SEND; - wqe->send.rem_stag = 0; - break; - case IB_WR_SEND_WITH_INV: - if (wr->send_flags & IB_SEND_SOLICITED) - wqe->send.rdmaop = T3_SEND_WITH_SE_INV; - else - wqe->send.rdmaop = T3_SEND_WITH_INV; - wqe->send.rem_stag = cpu_to_be32(wr->ex.invalidate_rkey); - break; - default: - return -EINVAL; - } - if (wr->num_sge > T3_MAX_SGE) - return -EINVAL; - wqe->send.reserved[0] = 0; - wqe->send.reserved[1] = 0; - wqe->send.reserved[2] = 0; - plen = 0; - for (i = 0; i < wr->num_sge; i++) { - if ((plen + wr->sg_list[i].length) < plen) - return -EMSGSIZE; - - plen += wr->sg_list[i].length; - wqe->send.sgl[i].stag = cpu_to_be32(wr->sg_list[i].lkey); - wqe->send.sgl[i].len = cpu_to_be32(wr->sg_list[i].length); - wqe->send.sgl[i].to = cpu_to_be64(wr->sg_list[i].addr); - } - wqe->send.num_sgle = cpu_to_be32(wr->num_sge); - *flit_cnt = 4 + ((wr->num_sge) << 1); - wqe->send.plen = cpu_to_be32(plen); - return 0; -} - -static int build_rdma_write(union t3_wr *wqe, const struct ib_send_wr *wr, - u8 *flit_cnt) -{ - int i; - u32 plen; - if (wr->num_sge > T3_MAX_SGE) - return -EINVAL; - wqe->write.rdmaop = T3_RDMA_WRITE; - wqe->write.reserved[0] = 0; - wqe->write.reserved[1] = 0; - wqe->write.reserved[2] = 0; - wqe->write.stag_sink = cpu_to_be32(rdma_wr(wr)->rkey); - wqe->write.to_sink = cpu_to_be64(rdma_wr(wr)->remote_addr); - - if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) { - plen = 4; - wqe->write.sgl[0].stag = wr->ex.imm_data; - wqe->write.sgl[0].len = cpu_to_be32(0); - wqe->write.num_sgle = cpu_to_be32(0); - *flit_cnt = 6; - } else { - plen = 0; - for (i = 0; i < wr->num_sge; i++) { - if ((plen + wr->sg_list[i].length) < plen) { - return -EMSGSIZE; - } - plen += wr->sg_list[i].length; - wqe->write.sgl[i].stag = - cpu_to_be32(wr->sg_list[i].lkey); - wqe->write.sgl[i].len = - cpu_to_be32(wr->sg_list[i].length); - wqe->write.sgl[i].to = - cpu_to_be64(wr->sg_list[i].addr); - } - wqe->write.num_sgle = cpu_to_be32(wr->num_sge); - *flit_cnt = 5 + ((wr->num_sge) << 1); - } - wqe->write.plen = cpu_to_be32(plen); - return 0; -} - -static int build_rdma_read(union t3_wr *wqe, const struct ib_send_wr *wr, - u8 *flit_cnt) -{ - if (wr->num_sge > 1) - return -EINVAL; - wqe->read.rdmaop = T3_READ_REQ; - if (wr->opcode == IB_WR_RDMA_READ_WITH_INV) - wqe->read.local_inv = 1; - else - wqe->read.local_inv = 0; - wqe->read.reserved[0] = 0; - wqe->read.reserved[1] = 0; - wqe->read.rem_stag = cpu_to_be32(rdma_wr(wr)->rkey); - wqe->read.rem_to = cpu_to_be64(rdma_wr(wr)->remote_addr); - wqe->read.local_stag = cpu_to_be32(wr->sg_list[0].lkey); - wqe->read.local_len = cpu_to_be32(wr->sg_list[0].length); - wqe->read.local_to = cpu_to_be64(wr->sg_list[0].addr); - *flit_cnt = sizeof(struct t3_rdma_read_wr) >> 3; - return 0; -} - -static int build_memreg(union t3_wr *wqe, const struct ib_reg_wr *wr, - u8 *flit_cnt, int *wr_cnt, struct t3_wq *wq) -{ - struct iwch_mr *mhp = to_iwch_mr(wr->mr); - int i; - __be64 *p; - - if (mhp->npages > T3_MAX_FASTREG_DEPTH) - return -EINVAL; - *wr_cnt = 1; - wqe->fastreg.stag = cpu_to_be32(wr->key); - wqe->fastreg.len = cpu_to_be32(mhp->ibmr.length); - wqe->fastreg.va_base_hi = cpu_to_be32(mhp->ibmr.iova >> 32); - wqe->fastreg.va_base_lo_fbo = - cpu_to_be32(mhp->ibmr.iova & 0xffffffff); - wqe->fastreg.page_type_perms = cpu_to_be32( - V_FR_PAGE_COUNT(mhp->npages) | - V_FR_PAGE_SIZE(ilog2(wr->mr->page_size) - 12) | - V_FR_TYPE(TPT_VATO) | - V_FR_PERMS(iwch_ib_to_tpt_access(wr->access))); - p = &wqe->fastreg.pbl_addrs[0]; - for (i = 0; i < mhp->npages; i++, p++) { - - /* If we need a 2nd WR, then set it up */ - if (i == T3_MAX_FASTREG_FRAG) { - *wr_cnt = 2; - wqe = (union t3_wr *)(wq->queue + - Q_PTR2IDX((wq->wptr+1), wq->size_log2)); - build_fw_riwrh((void *)wqe, T3_WR_FASTREG, 0, - Q_GENBIT(wq->wptr + 1, wq->size_log2), - 0, 1 + mhp->npages - T3_MAX_FASTREG_FRAG, - T3_EOP); - - p = &wqe->pbl_frag.pbl_addrs[0]; - } - *p = cpu_to_be64((u64)mhp->pages[i]); - } - *flit_cnt = 5 + mhp->npages; - if (*flit_cnt > 15) - *flit_cnt = 15; - return 0; -} - -static int build_inv_stag(union t3_wr *wqe, const struct ib_send_wr *wr, - u8 *flit_cnt) -{ - wqe->local_inv.stag = cpu_to_be32(wr->ex.invalidate_rkey); - wqe->local_inv.reserved = 0; - *flit_cnt = sizeof(struct t3_local_inv_wr) >> 3; - return 0; -} - -static int iwch_sgl2pbl_map(struct iwch_dev *rhp, struct ib_sge *sg_list, - u32 num_sgle, u32 * pbl_addr, u8 * page_size) -{ - int i; - struct iwch_mr *mhp; - u64 offset; - for (i = 0; i < num_sgle; i++) { - - mhp = get_mhp(rhp, (sg_list[i].lkey) >> 8); - if (!mhp) { - pr_debug("%s %d\n", __func__, __LINE__); - return -EIO; - } - if (!mhp->attr.state) { - pr_debug("%s %d\n", __func__, __LINE__); - return -EIO; - } - if (mhp->attr.zbva) { - pr_debug("%s %d\n", __func__, __LINE__); - return -EIO; - } - - if (sg_list[i].addr < mhp->attr.va_fbo) { - pr_debug("%s %d\n", __func__, __LINE__); - return -EINVAL; - } - if (sg_list[i].addr + ((u64) sg_list[i].length) < - sg_list[i].addr) { - pr_debug("%s %d\n", __func__, __LINE__); - return -EINVAL; - } - if (sg_list[i].addr + ((u64) sg_list[i].length) > - mhp->attr.va_fbo + ((u64) mhp->attr.len)) { - pr_debug("%s %d\n", __func__, __LINE__); - return -EINVAL; - } - offset = sg_list[i].addr - mhp->attr.va_fbo; - offset += mhp->attr.va_fbo & - ((1UL << (12 + mhp->attr.page_size)) - 1); - pbl_addr[i] = ((mhp->attr.pbl_addr - - rhp->rdev.rnic_info.pbl_base) >> 3) + - (offset >> (12 + mhp->attr.page_size)); - page_size[i] = mhp->attr.page_size; - } - return 0; -} - -static int build_rdma_recv(struct iwch_qp *qhp, union t3_wr *wqe, - const struct ib_recv_wr *wr) -{ - int i, err = 0; - u32 pbl_addr[T3_MAX_SGE]; - u8 page_size[T3_MAX_SGE]; - - err = iwch_sgl2pbl_map(qhp->rhp, wr->sg_list, wr->num_sge, pbl_addr, - page_size); - if (err) - return err; - wqe->recv.pagesz[0] = page_size[0]; - wqe->recv.pagesz[1] = page_size[1]; - wqe->recv.pagesz[2] = page_size[2]; - wqe->recv.pagesz[3] = page_size[3]; - wqe->recv.num_sgle = cpu_to_be32(wr->num_sge); - for (i = 0; i < wr->num_sge; i++) { - wqe->recv.sgl[i].stag = cpu_to_be32(wr->sg_list[i].lkey); - wqe->recv.sgl[i].len = cpu_to_be32(wr->sg_list[i].length); - - /* to in the WQE == the offset into the page */ - wqe->recv.sgl[i].to = cpu_to_be64(((u32)wr->sg_list[i].addr) & - ((1UL << (12 + page_size[i])) - 1)); - - /* pbl_addr is the adapters address in the PBL */ - wqe->recv.pbl_addr[i] = cpu_to_be32(pbl_addr[i]); - } - for (; i < T3_MAX_SGE; i++) { - wqe->recv.sgl[i].stag = 0; - wqe->recv.sgl[i].len = 0; - wqe->recv.sgl[i].to = 0; - wqe->recv.pbl_addr[i] = 0; - } - qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, - qhp->wq.rq_size_log2)].wr_id = wr->wr_id; - qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, - qhp->wq.rq_size_log2)].pbl_addr = 0; - return 0; -} - -static int build_zero_stag_recv(struct iwch_qp *qhp, union t3_wr *wqe, - const struct ib_recv_wr *wr) -{ - int i; - u32 pbl_addr; - u32 pbl_offset; - - - /* - * The T3 HW requires the PBL in the HW recv descriptor to reference - * a PBL entry. So we allocate the max needed PBL memory here and pass - * it to the uP in the recv WR. The uP will build the PBL and setup - * the HW recv descriptor. - */ - pbl_addr = cxio_hal_pblpool_alloc(&qhp->rhp->rdev, T3_STAG0_PBL_SIZE); - if (!pbl_addr) - return -ENOMEM; - - /* - * Compute the 8B aligned offset. - */ - pbl_offset = (pbl_addr - qhp->rhp->rdev.rnic_info.pbl_base) >> 3; - - wqe->recv.num_sgle = cpu_to_be32(wr->num_sge); - - for (i = 0; i < wr->num_sge; i++) { - - /* - * Use a 128MB page size. This and an imposed 128MB - * sge length limit allows us to require only a 2-entry HW - * PBL for each SGE. This restriction is acceptable since - * since it is not possible to allocate 128MB of contiguous - * DMA coherent memory! - */ - if (wr->sg_list[i].length > T3_STAG0_MAX_PBE_LEN) - return -EINVAL; - wqe->recv.pagesz[i] = T3_STAG0_PAGE_SHIFT; - - /* - * T3 restricts a recv to all zero-stag or all non-zero-stag. - */ - if (wr->sg_list[i].lkey != 0) - return -EINVAL; - wqe->recv.sgl[i].stag = 0; - wqe->recv.sgl[i].len = cpu_to_be32(wr->sg_list[i].length); - wqe->recv.sgl[i].to = cpu_to_be64(wr->sg_list[i].addr); - wqe->recv.pbl_addr[i] = cpu_to_be32(pbl_offset); - pbl_offset += 2; - } - for (; i < T3_MAX_SGE; i++) { - wqe->recv.pagesz[i] = 0; - wqe->recv.sgl[i].stag = 0; - wqe->recv.sgl[i].len = 0; - wqe->recv.sgl[i].to = 0; - wqe->recv.pbl_addr[i] = 0; - } - qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, - qhp->wq.rq_size_log2)].wr_id = wr->wr_id; - qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, - qhp->wq.rq_size_log2)].pbl_addr = pbl_addr; - return 0; -} - -int iwch_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, - const struct ib_send_wr **bad_wr) -{ - int err = 0; - u8 uninitialized_var(t3_wr_flit_cnt); - enum t3_wr_opcode t3_wr_opcode = 0; - enum t3_wr_flags t3_wr_flags; - struct iwch_qp *qhp; - u32 idx; - union t3_wr *wqe; - u32 num_wrs; - unsigned long flag; - struct t3_swsq *sqp; - int wr_cnt = 1; - - qhp = to_iwch_qp(ibqp); - spin_lock_irqsave(&qhp->lock, flag); - if (qhp->attr.state > IWCH_QP_STATE_RTS) { - spin_unlock_irqrestore(&qhp->lock, flag); - err = -EINVAL; - goto out; - } - num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr, - qhp->wq.sq_size_log2); - if (num_wrs == 0) { - spin_unlock_irqrestore(&qhp->lock, flag); - err = -ENOMEM; - goto out; - } - while (wr) { - if (num_wrs == 0) { - err = -ENOMEM; - break; - } - idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2); - wqe = (union t3_wr *) (qhp->wq.queue + idx); - t3_wr_flags = 0; - if (wr->send_flags & IB_SEND_SOLICITED) - t3_wr_flags |= T3_SOLICITED_EVENT_FLAG; - if (wr->send_flags & IB_SEND_SIGNALED) - t3_wr_flags |= T3_COMPLETION_FLAG; - sqp = qhp->wq.sq + - Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2); - switch (wr->opcode) { - case IB_WR_SEND: - case IB_WR_SEND_WITH_INV: - if (wr->send_flags & IB_SEND_FENCE) - t3_wr_flags |= T3_READ_FENCE_FLAG; - t3_wr_opcode = T3_WR_SEND; - err = build_rdma_send(wqe, wr, &t3_wr_flit_cnt); - break; - case IB_WR_RDMA_WRITE: - case IB_WR_RDMA_WRITE_WITH_IMM: - t3_wr_opcode = T3_WR_WRITE; - err = build_rdma_write(wqe, wr, &t3_wr_flit_cnt); - break; - case IB_WR_RDMA_READ: - case IB_WR_RDMA_READ_WITH_INV: - t3_wr_opcode = T3_WR_READ; - t3_wr_flags = 0; /* T3 reads are always signaled */ - err = build_rdma_read(wqe, wr, &t3_wr_flit_cnt); - if (err) - break; - sqp->read_len = wqe->read.local_len; - if (!qhp->wq.oldest_read) - qhp->wq.oldest_read = sqp; - break; - case IB_WR_REG_MR: - t3_wr_opcode = T3_WR_FASTREG; - err = build_memreg(wqe, reg_wr(wr), &t3_wr_flit_cnt, - &wr_cnt, &qhp->wq); - break; - case IB_WR_LOCAL_INV: - if (wr->send_flags & IB_SEND_FENCE) - t3_wr_flags |= T3_LOCAL_FENCE_FLAG; - t3_wr_opcode = T3_WR_INV_STAG; - err = build_inv_stag(wqe, wr, &t3_wr_flit_cnt); - break; - default: - pr_debug("%s post of type=%d TBD!\n", __func__, - wr->opcode); - err = -EINVAL; - } - if (err) - break; - wqe->send.wrid.id0.hi = qhp->wq.sq_wptr; - sqp->wr_id = wr->wr_id; - sqp->opcode = wr2opcode(t3_wr_opcode); - sqp->sq_wptr = qhp->wq.sq_wptr; - sqp->complete = 0; - sqp->signaled = (wr->send_flags & IB_SEND_SIGNALED); - - build_fw_riwrh((void *) wqe, t3_wr_opcode, t3_wr_flags, - Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), - 0, t3_wr_flit_cnt, - (wr_cnt == 1) ? T3_SOPEOP : T3_SOP); - pr_debug("%s cookie 0x%llx wq idx 0x%x swsq idx %ld opcode %d\n", - __func__, (unsigned long long)wr->wr_id, idx, - Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2), - sqp->opcode); - wr = wr->next; - num_wrs--; - qhp->wq.wptr += wr_cnt; - ++(qhp->wq.sq_wptr); - } - spin_unlock_irqrestore(&qhp->lock, flag); - if (cxio_wq_db_enabled(&qhp->wq)) - ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid); - -out: - if (err) - *bad_wr = wr; - return err; -} - -int iwch_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr, - const struct ib_recv_wr **bad_wr) -{ - int err = 0; - struct iwch_qp *qhp; - u32 idx; - union t3_wr *wqe; - u32 num_wrs; - unsigned long flag; - - qhp = to_iwch_qp(ibqp); - spin_lock_irqsave(&qhp->lock, flag); - if (qhp->attr.state > IWCH_QP_STATE_RTS) { - spin_unlock_irqrestore(&qhp->lock, flag); - err = -EINVAL; - goto out; - } - num_wrs = Q_FREECNT(qhp->wq.rq_rptr, qhp->wq.rq_wptr, - qhp->wq.rq_size_log2) - 1; - if (!wr) { - spin_unlock_irqrestore(&qhp->lock, flag); - err = -ENOMEM; - goto out; - } - while (wr) { - if (wr->num_sge > T3_MAX_SGE) { - err = -EINVAL; - break; - } - idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2); - wqe = (union t3_wr *) (qhp->wq.queue + idx); - if (num_wrs) - if (wr->sg_list[0].lkey) - err = build_rdma_recv(qhp, wqe, wr); - else - err = build_zero_stag_recv(qhp, wqe, wr); - else - err = -ENOMEM; - - if (err) - break; - - build_fw_riwrh((void *) wqe, T3_WR_RCV, T3_COMPLETION_FLAG, - Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), - 0, sizeof(struct t3_receive_wr) >> 3, T3_SOPEOP); - pr_debug("%s cookie 0x%llx idx 0x%x rq_wptr 0x%x rw_rptr 0x%x wqe %p\n", - __func__, (unsigned long long)wr->wr_id, - idx, qhp->wq.rq_wptr, qhp->wq.rq_rptr, wqe); - ++(qhp->wq.rq_wptr); - ++(qhp->wq.wptr); - wr = wr->next; - num_wrs--; - } - spin_unlock_irqrestore(&qhp->lock, flag); - if (cxio_wq_db_enabled(&qhp->wq)) - ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid); - -out: - if (err) - *bad_wr = wr; - return err; -} - -static inline void build_term_codes(struct respQ_msg_t *rsp_msg, - u8 *layer_type, u8 *ecode) -{ - int status = TPT_ERR_INTERNAL_ERR; - int tagged = 0; - int opcode = -1; - int rqtype = 0; - int send_inv = 0; - - if (rsp_msg) { - status = CQE_STATUS(rsp_msg->cqe); - opcode = CQE_OPCODE(rsp_msg->cqe); - rqtype = RQ_TYPE(rsp_msg->cqe); - send_inv = (opcode == T3_SEND_WITH_INV) || - (opcode == T3_SEND_WITH_SE_INV); - tagged = (opcode == T3_RDMA_WRITE) || - (rqtype && (opcode == T3_READ_RESP)); - } - - switch (status) { - case TPT_ERR_STAG: - if (send_inv) { - *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP; - *ecode = RDMAP_CANT_INV_STAG; - } else { - *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; - *ecode = RDMAP_INV_STAG; - } - break; - case TPT_ERR_PDID: - *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; - if ((opcode == T3_SEND_WITH_INV) || - (opcode == T3_SEND_WITH_SE_INV)) - *ecode = RDMAP_CANT_INV_STAG; - else - *ecode = RDMAP_STAG_NOT_ASSOC; - break; - case TPT_ERR_QPID: - *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; - *ecode = RDMAP_STAG_NOT_ASSOC; - break; - case TPT_ERR_ACCESS: - *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; - *ecode = RDMAP_ACC_VIOL; - break; - case TPT_ERR_WRAP: - *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; - *ecode = RDMAP_TO_WRAP; - break; - case TPT_ERR_BOUND: - if (tagged) { - *layer_type = LAYER_DDP|DDP_TAGGED_ERR; - *ecode = DDPT_BASE_BOUNDS; - } else { - *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; - *ecode = RDMAP_BASE_BOUNDS; - } - break; - case TPT_ERR_INVALIDATE_SHARED_MR: - case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND: - *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP; - *ecode = RDMAP_CANT_INV_STAG; - break; - case TPT_ERR_ECC: - case TPT_ERR_ECC_PSTAG: - case TPT_ERR_INTERNAL_ERR: - *layer_type = LAYER_RDMAP|RDMAP_LOCAL_CATA; - *ecode = 0; - break; - case TPT_ERR_OUT_OF_RQE: - *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; - *ecode = DDPU_INV_MSN_NOBUF; - break; - case TPT_ERR_PBL_ADDR_BOUND: - *layer_type = LAYER_DDP|DDP_TAGGED_ERR; - *ecode = DDPT_BASE_BOUNDS; - break; - case TPT_ERR_CRC: - *layer_type = LAYER_MPA|DDP_LLP; - *ecode = MPA_CRC_ERR; - break; - case TPT_ERR_MARKER: - *layer_type = LAYER_MPA|DDP_LLP; - *ecode = MPA_MARKER_ERR; - break; - case TPT_ERR_PDU_LEN_ERR: - *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; - *ecode = DDPU_MSG_TOOBIG; - break; - case TPT_ERR_DDP_VERSION: - if (tagged) { - *layer_type = LAYER_DDP|DDP_TAGGED_ERR; - *ecode = DDPT_INV_VERS; - } else { - *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; - *ecode = DDPU_INV_VERS; - } - break; - case TPT_ERR_RDMA_VERSION: - *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP; - *ecode = RDMAP_INV_VERS; - break; - case TPT_ERR_OPCODE: - *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP; - *ecode = RDMAP_INV_OPCODE; - break; - case TPT_ERR_DDP_QUEUE_NUM: - *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; - *ecode = DDPU_INV_QN; - break; - case TPT_ERR_MSN: - case TPT_ERR_MSN_GAP: - case TPT_ERR_MSN_RANGE: - case TPT_ERR_IRD_OVERFLOW: - *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; - *ecode = DDPU_INV_MSN_RANGE; - break; - case TPT_ERR_TBIT: - *layer_type = LAYER_DDP|DDP_LOCAL_CATA; - *ecode = 0; - break; - case TPT_ERR_MO: - *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; - *ecode = DDPU_INV_MO; - break; - default: - *layer_type = LAYER_RDMAP|DDP_LOCAL_CATA; - *ecode = 0; - break; - } -} - -int iwch_post_zb_read(struct iwch_ep *ep) -{ - union t3_wr *wqe; - struct sk_buff *skb; - u8 flit_cnt = sizeof(struct t3_rdma_read_wr) >> 3; - - pr_debug("%s enter\n", __func__); - skb = alloc_skb(40, GFP_KERNEL); - if (!skb) { - pr_err("%s cannot send zb_read!!\n", __func__); - return -ENOMEM; - } - wqe = skb_put_zero(skb, sizeof(struct t3_rdma_read_wr)); - wqe->read.rdmaop = T3_READ_REQ; - wqe->read.reserved[0] = 0; - wqe->read.reserved[1] = 0; - wqe->read.rem_stag = cpu_to_be32(1); - wqe->read.rem_to = cpu_to_be64(1); - wqe->read.local_stag = cpu_to_be32(1); - wqe->read.local_len = cpu_to_be32(0); - wqe->read.local_to = cpu_to_be64(1); - wqe->send.wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_READ)); - wqe->send.wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(ep->hwtid)| - V_FW_RIWR_LEN(flit_cnt)); - skb->priority = CPL_PRIORITY_DATA; - return iwch_cxgb3_ofld_send(ep->com.qp->rhp->rdev.t3cdev_p, skb); -} - -/* - * This posts a TERMINATE with layer=RDMA, type=catastrophic. - */ -int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg) -{ - union t3_wr *wqe; - struct terminate_message *term; - struct sk_buff *skb; - - pr_debug("%s %d\n", __func__, __LINE__); - skb = alloc_skb(40, GFP_ATOMIC); - if (!skb) { - pr_err("%s cannot send TERMINATE!\n", __func__); - return -ENOMEM; - } - wqe = skb_put_zero(skb, 40); - wqe->send.rdmaop = T3_TERMINATE; - - /* immediate data length */ - wqe->send.plen = htonl(4); - - /* immediate data starts here. */ - term = (struct terminate_message *)wqe->send.sgl; - build_term_codes(rsp_msg, &term->layer_etype, &term->ecode); - wqe->send.wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_SEND) | - V_FW_RIWR_FLAGS(T3_COMPLETION_FLAG | T3_NOTIFY_FLAG)); - wqe->send.wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(qhp->ep->hwtid)); - skb->priority = CPL_PRIORITY_DATA; - return iwch_cxgb3_ofld_send(qhp->rhp->rdev.t3cdev_p, skb); -} - -/* - * Assumes qhp lock is held. - */ -static void __flush_qp(struct iwch_qp *qhp, struct iwch_cq *rchp, - struct iwch_cq *schp) - __releases(&qhp->lock) - __acquires(&qhp->lock) -{ - int count; - int flushed; - - lockdep_assert_held(&qhp->lock); - - pr_debug("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp); - /* take a ref on the qhp since we must release the lock */ - atomic_inc(&qhp->refcnt); - spin_unlock(&qhp->lock); - - /* locking hierarchy: cq lock first, then qp lock. */ - spin_lock(&rchp->lock); - spin_lock(&qhp->lock); - cxio_flush_hw_cq(&rchp->cq); - cxio_count_rcqes(&rchp->cq, &qhp->wq, &count); - flushed = cxio_flush_rq(&qhp->wq, &rchp->cq, count); - spin_unlock(&qhp->lock); - spin_unlock(&rchp->lock); - if (flushed) { - spin_lock(&rchp->comp_handler_lock); - (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); - spin_unlock(&rchp->comp_handler_lock); - } - - /* locking hierarchy: cq lock first, then qp lock. */ - spin_lock(&schp->lock); - spin_lock(&qhp->lock); - cxio_flush_hw_cq(&schp->cq); - cxio_count_scqes(&schp->cq, &qhp->wq, &count); - flushed = cxio_flush_sq(&qhp->wq, &schp->cq, count); - spin_unlock(&qhp->lock); - spin_unlock(&schp->lock); - if (flushed) { - spin_lock(&schp->comp_handler_lock); - (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context); - spin_unlock(&schp->comp_handler_lock); - } - - /* deref */ - if (atomic_dec_and_test(&qhp->refcnt)) - wake_up(&qhp->wait); - - spin_lock(&qhp->lock); -} - -static void flush_qp(struct iwch_qp *qhp) -{ - struct iwch_cq *rchp, *schp; - - rchp = get_chp(qhp->rhp, qhp->attr.rcq); - schp = get_chp(qhp->rhp, qhp->attr.scq); - - if (qhp->ibqp.uobject) { - cxio_set_wq_in_error(&qhp->wq); - cxio_set_cq_in_error(&rchp->cq); - spin_lock(&rchp->comp_handler_lock); - (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); - spin_unlock(&rchp->comp_handler_lock); - if (schp != rchp) { - cxio_set_cq_in_error(&schp->cq); - spin_lock(&schp->comp_handler_lock); - (*schp->ibcq.comp_handler)(&schp->ibcq, - schp->ibcq.cq_context); - spin_unlock(&schp->comp_handler_lock); - } - return; - } - __flush_qp(qhp, rchp, schp); -} - - -/* - * Return count of RECV WRs posted - */ -u16 iwch_rqes_posted(struct iwch_qp *qhp) -{ - union t3_wr *wqe = qhp->wq.queue; - u16 count = 0; - - while (count < USHRT_MAX && fw_riwrh_opcode((struct fw_riwrh *)wqe) == T3_WR_RCV) { - count++; - wqe++; - } - pr_debug("%s qhp %p count %u\n", __func__, qhp, count); - return count; -} - -static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp, - enum iwch_qp_attr_mask mask, - struct iwch_qp_attributes *attrs) -{ - struct t3_rdma_init_attr init_attr; - int ret; - - init_attr.tid = qhp->ep->hwtid; - init_attr.qpid = qhp->wq.qpid; - init_attr.pdid = qhp->attr.pd; - init_attr.scqid = qhp->attr.scq; - init_attr.rcqid = qhp->attr.rcq; - init_attr.rq_addr = qhp->wq.rq_addr; - init_attr.rq_size = 1 << qhp->wq.rq_size_log2; - init_attr.mpaattrs = uP_RI_MPA_IETF_ENABLE | - qhp->attr.mpa_attr.recv_marker_enabled | - (qhp->attr.mpa_attr.xmit_marker_enabled << 1) | - (qhp->attr.mpa_attr.crc_enabled << 2); - - init_attr.qpcaps = uP_RI_QP_RDMA_READ_ENABLE | - uP_RI_QP_RDMA_WRITE_ENABLE | - uP_RI_QP_BIND_ENABLE; - if (!qhp->ibqp.uobject) - init_attr.qpcaps |= uP_RI_QP_STAG0_ENABLE | - uP_RI_QP_FAST_REGISTER_ENABLE; - - init_attr.tcp_emss = qhp->ep->emss; - init_attr.ord = qhp->attr.max_ord; - init_attr.ird = qhp->attr.max_ird; - init_attr.qp_dma_addr = qhp->wq.dma_addr; - init_attr.qp_dma_size = (1UL << qhp->wq.size_log2); - init_attr.rqe_count = iwch_rqes_posted(qhp); - init_attr.flags = qhp->attr.mpa_attr.initiator ? MPA_INITIATOR : 0; - init_attr.chan = qhp->ep->l2t->smt_idx; - if (peer2peer) { - init_attr.rtr_type = RTR_READ; - if (init_attr.ord == 0 && qhp->attr.mpa_attr.initiator) - init_attr.ord = 1; - if (init_attr.ird == 0 && !qhp->attr.mpa_attr.initiator) - init_attr.ird = 1; - } else - init_attr.rtr_type = 0; - init_attr.irs = qhp->ep->rcv_seq; - pr_debug("%s init_attr.rq_addr 0x%x init_attr.rq_size = %d flags 0x%x qpcaps 0x%x\n", - __func__, - init_attr.rq_addr, init_attr.rq_size, - init_attr.flags, init_attr.qpcaps); - ret = cxio_rdma_init(&rhp->rdev, &init_attr); - pr_debug("%s ret %d\n", __func__, ret); - return ret; -} - -int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp, - enum iwch_qp_attr_mask mask, - struct iwch_qp_attributes *attrs, - int internal) -{ - int ret = 0; - struct iwch_qp_attributes newattr = qhp->attr; - unsigned long flag; - int disconnect = 0; - int terminate = 0; - int abort = 0; - int free = 0; - struct iwch_ep *ep = NULL; - - pr_debug("%s qhp %p qpid 0x%x ep %p state %d -> %d\n", __func__, - qhp, qhp->wq.qpid, qhp->ep, qhp->attr.state, - (mask & IWCH_QP_ATTR_NEXT_STATE) ? attrs->next_state : -1); - - spin_lock_irqsave(&qhp->lock, flag); - - /* Process attr changes if in IDLE */ - if (mask & IWCH_QP_ATTR_VALID_MODIFY) { - if (qhp->attr.state != IWCH_QP_STATE_IDLE) { - ret = -EIO; - goto out; - } - if (mask & IWCH_QP_ATTR_ENABLE_RDMA_READ) - newattr.enable_rdma_read = attrs->enable_rdma_read; - if (mask & IWCH_QP_ATTR_ENABLE_RDMA_WRITE) - newattr.enable_rdma_write = attrs->enable_rdma_write; - if (mask & IWCH_QP_ATTR_ENABLE_RDMA_BIND) - newattr.enable_bind = attrs->enable_bind; - if (mask & IWCH_QP_ATTR_MAX_ORD) { - if (attrs->max_ord > - rhp->attr.max_rdma_read_qp_depth) { - ret = -EINVAL; - goto out; - } - newattr.max_ord = attrs->max_ord; - } - if (mask & IWCH_QP_ATTR_MAX_IRD) { - if (attrs->max_ird > - rhp->attr.max_rdma_reads_per_qp) { - ret = -EINVAL; - goto out; - } - newattr.max_ird = attrs->max_ird; - } - qhp->attr = newattr; - } - - if (!(mask & IWCH_QP_ATTR_NEXT_STATE)) - goto out; - if (qhp->attr.state == attrs->next_state) - goto out; - - switch (qhp->attr.state) { - case IWCH_QP_STATE_IDLE: - switch (attrs->next_state) { - case IWCH_QP_STATE_RTS: - if (!(mask & IWCH_QP_ATTR_LLP_STREAM_HANDLE)) { - ret = -EINVAL; - goto out; - } - if (!(mask & IWCH_QP_ATTR_MPA_ATTR)) { - ret = -EINVAL; - goto out; - } - qhp->attr.mpa_attr = attrs->mpa_attr; - qhp->attr.llp_stream_handle = attrs->llp_stream_handle; - qhp->ep = qhp->attr.llp_stream_handle; - qhp->attr.state = IWCH_QP_STATE_RTS; - - /* - * Ref the endpoint here and deref when we - * disassociate the endpoint from the QP. This - * happens in CLOSING->IDLE transition or *->ERROR - * transition. - */ - get_ep(&qhp->ep->com); - spin_unlock_irqrestore(&qhp->lock, flag); - ret = rdma_init(rhp, qhp, mask, attrs); - spin_lock_irqsave(&qhp->lock, flag); - if (ret) - goto err; - break; - case IWCH_QP_STATE_ERROR: - qhp->attr.state = IWCH_QP_STATE_ERROR; - flush_qp(qhp); - break; - default: - ret = -EINVAL; - goto out; - } - break; - case IWCH_QP_STATE_RTS: - switch (attrs->next_state) { - case IWCH_QP_STATE_CLOSING: - BUG_ON(kref_read(&qhp->ep->com.kref) < 2); - qhp->attr.state = IWCH_QP_STATE_CLOSING; - if (!internal) { - abort=0; - disconnect = 1; - ep = qhp->ep; - get_ep(&ep->com); - } - break; - case IWCH_QP_STATE_TERMINATE: - qhp->attr.state = IWCH_QP_STATE_TERMINATE; - if (qhp->ibqp.uobject) - cxio_set_wq_in_error(&qhp->wq); - if (!internal) - terminate = 1; - break; - case IWCH_QP_STATE_ERROR: - qhp->attr.state = IWCH_QP_STATE_ERROR; - if (!internal) { - abort=1; - disconnect = 1; - ep = qhp->ep; - get_ep(&ep->com); - } - goto err; - break; - default: - ret = -EINVAL; - goto out; - } - break; - case IWCH_QP_STATE_CLOSING: - if (!internal) { - ret = -EINVAL; - goto out; - } - switch (attrs->next_state) { - case IWCH_QP_STATE_IDLE: - flush_qp(qhp); - qhp->attr.state = IWCH_QP_STATE_IDLE; - qhp->attr.llp_stream_handle = NULL; - put_ep(&qhp->ep->com); - qhp->ep = NULL; - wake_up(&qhp->wait); - break; - case IWCH_QP_STATE_ERROR: - goto err; - default: - ret = -EINVAL; - goto err; - } - break; - case IWCH_QP_STATE_ERROR: - if (attrs->next_state != IWCH_QP_STATE_IDLE) { - ret = -EINVAL; - goto out; - } - - if (!Q_EMPTY(qhp->wq.sq_rptr, qhp->wq.sq_wptr) || - !Q_EMPTY(qhp->wq.rq_rptr, qhp->wq.rq_wptr)) { - ret = -EINVAL; - goto out; - } - qhp->attr.state = IWCH_QP_STATE_IDLE; - break; - case IWCH_QP_STATE_TERMINATE: - if (!internal) { - ret = -EINVAL; - goto out; - } - goto err; - break; - default: - pr_err("%s in a bad state %d\n", __func__, qhp->attr.state); - ret = -EINVAL; - goto err; - break; - } - goto out; -err: - pr_debug("%s disassociating ep %p qpid 0x%x\n", __func__, qhp->ep, - qhp->wq.qpid); - - /* disassociate the LLP connection */ - qhp->attr.llp_stream_handle = NULL; - ep = qhp->ep; - qhp->ep = NULL; - qhp->attr.state = IWCH_QP_STATE_ERROR; - free=1; - wake_up(&qhp->wait); - BUG_ON(!ep); - flush_qp(qhp); -out: - spin_unlock_irqrestore(&qhp->lock, flag); - - if (terminate) - iwch_post_terminate(qhp, NULL); - - /* - * If disconnect is 1, then we need to initiate a disconnect - * on the EP. This can be a normal close (RTS->CLOSING) or - * an abnormal close (RTS/CLOSING->ERROR). - */ - if (disconnect) { - iwch_ep_disconnect(ep, abort, GFP_KERNEL); - put_ep(&ep->com); - } - - /* - * If free is 1, then we've disassociated the EP from the QP - * and we need to dereference the EP. - */ - if (free) - put_ep(&ep->com); - - pr_debug("%s exit state %d\n", __func__, qhp->attr.state); - return ret; -} diff --git a/drivers/infiniband/hw/cxgb3/tcb.h b/drivers/infiniband/hw/cxgb3/tcb.h deleted file mode 100644 index c702dc199e18..000000000000 --- a/drivers/infiniband/hw/cxgb3/tcb.h +++ /dev/null @@ -1,632 +0,0 @@ -/* - * Copyright (c) 2007 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef _TCB_DEFS_H -#define _TCB_DEFS_H - -#define W_TCB_T_STATE 0 -#define S_TCB_T_STATE 0 -#define M_TCB_T_STATE 0xfULL -#define V_TCB_T_STATE(x) ((x) << S_TCB_T_STATE) - -#define W_TCB_TIMER 0 -#define S_TCB_TIMER 4 -#define M_TCB_TIMER 0x1ULL -#define V_TCB_TIMER(x) ((x) << S_TCB_TIMER) - -#define W_TCB_DACK_TIMER 0 -#define S_TCB_DACK_TIMER 5 -#define M_TCB_DACK_TIMER 0x1ULL -#define V_TCB_DACK_TIMER(x) ((x) << S_TCB_DACK_TIMER) - -#define W_TCB_DEL_FLAG 0 -#define S_TCB_DEL_FLAG 6 -#define M_TCB_DEL_FLAG 0x1ULL -#define V_TCB_DEL_FLAG(x) ((x) << S_TCB_DEL_FLAG) - -#define W_TCB_L2T_IX 0 -#define S_TCB_L2T_IX 7 -#define M_TCB_L2T_IX 0x7ffULL -#define V_TCB_L2T_IX(x) ((x) << S_TCB_L2T_IX) - -#define W_TCB_SMAC_SEL 0 -#define S_TCB_SMAC_SEL 18 -#define M_TCB_SMAC_SEL 0x3ULL -#define V_TCB_SMAC_SEL(x) ((x) << S_TCB_SMAC_SEL) - -#define W_TCB_TOS 0 -#define S_TCB_TOS 20 -#define M_TCB_TOS 0x3fULL -#define V_TCB_TOS(x) ((x) << S_TCB_TOS) - -#define W_TCB_MAX_RT 0 -#define S_TCB_MAX_RT 26 -#define M_TCB_MAX_RT 0xfULL -#define V_TCB_MAX_RT(x) ((x) << S_TCB_MAX_RT) - -#define W_TCB_T_RXTSHIFT 0 -#define S_TCB_T_RXTSHIFT 30 -#define M_TCB_T_RXTSHIFT 0xfULL -#define V_TCB_T_RXTSHIFT(x) ((x) << S_TCB_T_RXTSHIFT) - -#define W_TCB_T_DUPACKS 1 -#define S_TCB_T_DUPACKS 2 -#define M_TCB_T_DUPACKS 0xfULL -#define V_TCB_T_DUPACKS(x) ((x) << S_TCB_T_DUPACKS) - -#define W_TCB_T_MAXSEG 1 -#define S_TCB_T_MAXSEG 6 -#define M_TCB_T_MAXSEG 0xfULL -#define V_TCB_T_MAXSEG(x) ((x) << S_TCB_T_MAXSEG) - -#define W_TCB_T_FLAGS1 1 -#define S_TCB_T_FLAGS1 10 -#define M_TCB_T_FLAGS1 0xffffffffULL -#define V_TCB_T_FLAGS1(x) ((x) << S_TCB_T_FLAGS1) - -#define W_TCB_T_MIGRATION 1 -#define S_TCB_T_MIGRATION 20 -#define M_TCB_T_MIGRATION 0x1ULL -#define V_TCB_T_MIGRATION(x) ((x) << S_TCB_T_MIGRATION) - -#define W_TCB_T_FLAGS2 2 -#define S_TCB_T_FLAGS2 10 -#define M_TCB_T_FLAGS2 0x7fULL -#define V_TCB_T_FLAGS2(x) ((x) << S_TCB_T_FLAGS2) - -#define W_TCB_SND_SCALE 2 -#define S_TCB_SND_SCALE 17 -#define M_TCB_SND_SCALE 0xfULL -#define V_TCB_SND_SCALE(x) ((x) << S_TCB_SND_SCALE) - -#define W_TCB_RCV_SCALE 2 -#define S_TCB_RCV_SCALE 21 -#define M_TCB_RCV_SCALE 0xfULL -#define V_TCB_RCV_SCALE(x) ((x) << S_TCB_RCV_SCALE) - -#define W_TCB_SND_UNA_RAW 2 -#define S_TCB_SND_UNA_RAW 25 -#define M_TCB_SND_UNA_RAW 0x7ffffffULL -#define V_TCB_SND_UNA_RAW(x) ((x) << S_TCB_SND_UNA_RAW) - -#define W_TCB_SND_NXT_RAW 3 -#define S_TCB_SND_NXT_RAW 20 -#define M_TCB_SND_NXT_RAW 0x7ffffffULL -#define V_TCB_SND_NXT_RAW(x) ((x) << S_TCB_SND_NXT_RAW) - -#define W_TCB_RCV_NXT 4 -#define S_TCB_RCV_NXT 15 -#define M_TCB_RCV_NXT 0xffffffffULL -#define V_TCB_RCV_NXT(x) ((x) << S_TCB_RCV_NXT) - -#define W_TCB_RCV_ADV 5 -#define S_TCB_RCV_ADV 15 -#define M_TCB_RCV_ADV 0xffffULL -#define V_TCB_RCV_ADV(x) ((x) << S_TCB_RCV_ADV) - -#define W_TCB_SND_MAX_RAW 5 -#define S_TCB_SND_MAX_RAW 31 -#define M_TCB_SND_MAX_RAW 0x7ffffffULL -#define V_TCB_SND_MAX_RAW(x) ((x) << S_TCB_SND_MAX_RAW) - -#define W_TCB_SND_CWND 6 -#define S_TCB_SND_CWND 26 -#define M_TCB_SND_CWND 0x7ffffffULL -#define V_TCB_SND_CWND(x) ((x) << S_TCB_SND_CWND) - -#define W_TCB_SND_SSTHRESH 7 -#define S_TCB_SND_SSTHRESH 21 -#define M_TCB_SND_SSTHRESH 0x7ffffffULL -#define V_TCB_SND_SSTHRESH(x) ((x) << S_TCB_SND_SSTHRESH) - -#define W_TCB_T_RTT_TS_RECENT_AGE 8 -#define S_TCB_T_RTT_TS_RECENT_AGE 16 -#define M_TCB_T_RTT_TS_RECENT_AGE 0xffffffffULL -#define V_TCB_T_RTT_TS_RECENT_AGE(x) ((x) << S_TCB_T_RTT_TS_RECENT_AGE) - -#define W_TCB_T_RTSEQ_RECENT 9 -#define S_TCB_T_RTSEQ_RECENT 16 -#define M_TCB_T_RTSEQ_RECENT 0xffffffffULL -#define V_TCB_T_RTSEQ_RECENT(x) ((x) << S_TCB_T_RTSEQ_RECENT) - -#define W_TCB_T_SRTT 10 -#define S_TCB_T_SRTT 16 -#define M_TCB_T_SRTT 0xffffULL -#define V_TCB_T_SRTT(x) ((x) << S_TCB_T_SRTT) - -#define W_TCB_T_RTTVAR 11 -#define S_TCB_T_RTTVAR 0 -#define M_TCB_T_RTTVAR 0xffffULL -#define V_TCB_T_RTTVAR(x) ((x) << S_TCB_T_RTTVAR) - -#define W_TCB_TS_LAST_ACK_SENT_RAW 11 -#define S_TCB_TS_LAST_ACK_SENT_RAW 16 -#define M_TCB_TS_LAST_ACK_SENT_RAW 0x7ffffffULL -#define V_TCB_TS_LAST_ACK_SENT_RAW(x) ((x) << S_TCB_TS_LAST_ACK_SENT_RAW) - -#define W_TCB_DIP 12 -#define S_TCB_DIP 11 -#define M_TCB_DIP 0xffffffffULL -#define V_TCB_DIP(x) ((x) << S_TCB_DIP) - -#define W_TCB_SIP 13 -#define S_TCB_SIP 11 -#define M_TCB_SIP 0xffffffffULL -#define V_TCB_SIP(x) ((x) << S_TCB_SIP) - -#define W_TCB_DP 14 -#define S_TCB_DP 11 -#define M_TCB_DP 0xffffULL -#define V_TCB_DP(x) ((x) << S_TCB_DP) - -#define W_TCB_SP 14 -#define S_TCB_SP 27 -#define M_TCB_SP 0xffffULL -#define V_TCB_SP(x) ((x) << S_TCB_SP) - -#define W_TCB_TIMESTAMP 15 -#define S_TCB_TIMESTAMP 11 -#define M_TCB_TIMESTAMP 0xffffffffULL -#define V_TCB_TIMESTAMP(x) ((x) << S_TCB_TIMESTAMP) - -#define W_TCB_TIMESTAMP_OFFSET 16 -#define S_TCB_TIMESTAMP_OFFSET 11 -#define M_TCB_TIMESTAMP_OFFSET 0xfULL -#define V_TCB_TIMESTAMP_OFFSET(x) ((x) << S_TCB_TIMESTAMP_OFFSET) - -#define W_TCB_TX_MAX 16 -#define S_TCB_TX_MAX 15 -#define M_TCB_TX_MAX 0xffffffffULL -#define V_TCB_TX_MAX(x) ((x) << S_TCB_TX_MAX) - -#define W_TCB_TX_HDR_PTR_RAW 17 -#define S_TCB_TX_HDR_PTR_RAW 15 -#define M_TCB_TX_HDR_PTR_RAW 0x1ffffULL -#define V_TCB_TX_HDR_PTR_RAW(x) ((x) << S_TCB_TX_HDR_PTR_RAW) - -#define W_TCB_TX_LAST_PTR_RAW 18 -#define S_TCB_TX_LAST_PTR_RAW 0 -#define M_TCB_TX_LAST_PTR_RAW 0x1ffffULL -#define V_TCB_TX_LAST_PTR_RAW(x) ((x) << S_TCB_TX_LAST_PTR_RAW) - -#define W_TCB_TX_COMPACT 18 -#define S_TCB_TX_COMPACT 17 -#define M_TCB_TX_COMPACT 0x1ULL -#define V_TCB_TX_COMPACT(x) ((x) << S_TCB_TX_COMPACT) - -#define W_TCB_RX_COMPACT 18 -#define S_TCB_RX_COMPACT 18 -#define M_TCB_RX_COMPACT 0x1ULL -#define V_TCB_RX_COMPACT(x) ((x) << S_TCB_RX_COMPACT) - -#define W_TCB_RCV_WND 18 -#define S_TCB_RCV_WND 19 -#define M_TCB_RCV_WND 0x7ffffffULL -#define V_TCB_RCV_WND(x) ((x) << S_TCB_RCV_WND) - -#define W_TCB_RX_HDR_OFFSET 19 -#define S_TCB_RX_HDR_OFFSET 14 -#define M_TCB_RX_HDR_OFFSET 0x7ffffffULL -#define V_TCB_RX_HDR_OFFSET(x) ((x) << S_TCB_RX_HDR_OFFSET) - -#define W_TCB_RX_FRAG0_START_IDX_RAW 20 -#define S_TCB_RX_FRAG0_START_IDX_RAW 9 -#define M_TCB_RX_FRAG0_START_IDX_RAW 0x7ffffffULL -#define V_TCB_RX_FRAG0_START_IDX_RAW(x) ((x) << S_TCB_RX_FRAG0_START_IDX_RAW) - -#define W_TCB_RX_FRAG1_START_IDX_OFFSET 21 -#define S_TCB_RX_FRAG1_START_IDX_OFFSET 4 -#define M_TCB_RX_FRAG1_START_IDX_OFFSET 0x7ffffffULL -#define V_TCB_RX_FRAG1_START_IDX_OFFSET(x) ((x) << S_TCB_RX_FRAG1_START_IDX_OFFSET) - -#define W_TCB_RX_FRAG0_LEN 21 -#define S_TCB_RX_FRAG0_LEN 31 -#define M_TCB_RX_FRAG0_LEN 0x7ffffffULL -#define V_TCB_RX_FRAG0_LEN(x) ((x) << S_TCB_RX_FRAG0_LEN) - -#define W_TCB_RX_FRAG1_LEN 22 -#define S_TCB_RX_FRAG1_LEN 26 -#define M_TCB_RX_FRAG1_LEN 0x7ffffffULL -#define V_TCB_RX_FRAG1_LEN(x) ((x) << S_TCB_RX_FRAG1_LEN) - -#define W_TCB_NEWRENO_RECOVER 23 -#define S_TCB_NEWRENO_RECOVER 21 -#define M_TCB_NEWRENO_RECOVER 0x7ffffffULL -#define V_TCB_NEWRENO_RECOVER(x) ((x) << S_TCB_NEWRENO_RECOVER) - -#define W_TCB_PDU_HAVE_LEN 24 -#define S_TCB_PDU_HAVE_LEN 16 -#define M_TCB_PDU_HAVE_LEN 0x1ULL -#define V_TCB_PDU_HAVE_LEN(x) ((x) << S_TCB_PDU_HAVE_LEN) - -#define W_TCB_PDU_LEN 24 -#define S_TCB_PDU_LEN 17 -#define M_TCB_PDU_LEN 0xffffULL -#define V_TCB_PDU_LEN(x) ((x) << S_TCB_PDU_LEN) - -#define W_TCB_RX_QUIESCE 25 -#define S_TCB_RX_QUIESCE 1 -#define M_TCB_RX_QUIESCE 0x1ULL -#define V_TCB_RX_QUIESCE(x) ((x) << S_TCB_RX_QUIESCE) - -#define W_TCB_RX_PTR_RAW 25 -#define S_TCB_RX_PTR_RAW 2 -#define M_TCB_RX_PTR_RAW 0x1ffffULL -#define V_TCB_RX_PTR_RAW(x) ((x) << S_TCB_RX_PTR_RAW) - -#define W_TCB_CPU_NO 25 -#define S_TCB_CPU_NO 19 -#define M_TCB_CPU_NO 0x7fULL -#define V_TCB_CPU_NO(x) ((x) << S_TCB_CPU_NO) - -#define W_TCB_ULP_TYPE 25 -#define S_TCB_ULP_TYPE 26 -#define M_TCB_ULP_TYPE 0xfULL -#define V_TCB_ULP_TYPE(x) ((x) << S_TCB_ULP_TYPE) - -#define W_TCB_RX_FRAG1_PTR_RAW 25 -#define S_TCB_RX_FRAG1_PTR_RAW 30 -#define M_TCB_RX_FRAG1_PTR_RAW 0x1ffffULL -#define V_TCB_RX_FRAG1_PTR_RAW(x) ((x) << S_TCB_RX_FRAG1_PTR_RAW) - -#define W_TCB_RX_FRAG2_START_IDX_OFFSET_RAW 26 -#define S_TCB_RX_FRAG2_START_IDX_OFFSET_RAW 15 -#define M_TCB_RX_FRAG2_START_IDX_OFFSET_RAW 0x7ffffffULL -#define V_TCB_RX_FRAG2_START_IDX_OFFSET_RAW(x) ((x) << S_TCB_RX_FRAG2_START_IDX_OFFSET_RAW) - -#define W_TCB_RX_FRAG2_PTR_RAW 27 -#define S_TCB_RX_FRAG2_PTR_RAW 10 -#define M_TCB_RX_FRAG2_PTR_RAW 0x1ffffULL -#define V_TCB_RX_FRAG2_PTR_RAW(x) ((x) << S_TCB_RX_FRAG2_PTR_RAW) - -#define W_TCB_RX_FRAG2_LEN_RAW 27 -#define S_TCB_RX_FRAG2_LEN_RAW 27 -#define M_TCB_RX_FRAG2_LEN_RAW 0x7ffffffULL -#define V_TCB_RX_FRAG2_LEN_RAW(x) ((x) << S_TCB_RX_FRAG2_LEN_RAW) - -#define W_TCB_RX_FRAG3_PTR_RAW 28 -#define S_TCB_RX_FRAG3_PTR_RAW 22 -#define M_TCB_RX_FRAG3_PTR_RAW 0x1ffffULL -#define V_TCB_RX_FRAG3_PTR_RAW(x) ((x) << S_TCB_RX_FRAG3_PTR_RAW) - -#define W_TCB_RX_FRAG3_LEN_RAW 29 -#define S_TCB_RX_FRAG3_LEN_RAW 7 -#define M_TCB_RX_FRAG3_LEN_RAW 0x7ffffffULL -#define V_TCB_RX_FRAG3_LEN_RAW(x) ((x) << S_TCB_RX_FRAG3_LEN_RAW) - -#define W_TCB_RX_FRAG3_START_IDX_OFFSET_RAW 30 -#define S_TCB_RX_FRAG3_START_IDX_OFFSET_RAW 2 -#define M_TCB_RX_FRAG3_START_IDX_OFFSET_RAW 0x7ffffffULL -#define V_TCB_RX_FRAG3_START_IDX_OFFSET_RAW(x) ((x) << S_TCB_RX_FRAG3_START_IDX_OFFSET_RAW) - -#define W_TCB_PDU_HDR_LEN 30 -#define S_TCB_PDU_HDR_LEN 29 -#define M_TCB_PDU_HDR_LEN 0xffULL -#define V_TCB_PDU_HDR_LEN(x) ((x) << S_TCB_PDU_HDR_LEN) - -#define W_TCB_SLUSH1 31 -#define S_TCB_SLUSH1 5 -#define M_TCB_SLUSH1 0x7ffffULL -#define V_TCB_SLUSH1(x) ((x) << S_TCB_SLUSH1) - -#define W_TCB_ULP_RAW 31 -#define S_TCB_ULP_RAW 24 -#define M_TCB_ULP_RAW 0xffULL -#define V_TCB_ULP_RAW(x) ((x) << S_TCB_ULP_RAW) - -#define W_TCB_DDP_RDMAP_VERSION 25 -#define S_TCB_DDP_RDMAP_VERSION 30 -#define M_TCB_DDP_RDMAP_VERSION 0x1ULL -#define V_TCB_DDP_RDMAP_VERSION(x) ((x) << S_TCB_DDP_RDMAP_VERSION) - -#define W_TCB_MARKER_ENABLE_RX 25 -#define S_TCB_MARKER_ENABLE_RX 31 -#define M_TCB_MARKER_ENABLE_RX 0x1ULL -#define V_TCB_MARKER_ENABLE_RX(x) ((x) << S_TCB_MARKER_ENABLE_RX) - -#define W_TCB_MARKER_ENABLE_TX 26 -#define S_TCB_MARKER_ENABLE_TX 0 -#define M_TCB_MARKER_ENABLE_TX 0x1ULL -#define V_TCB_MARKER_ENABLE_TX(x) ((x) << S_TCB_MARKER_ENABLE_TX) - -#define W_TCB_CRC_ENABLE 26 -#define S_TCB_CRC_ENABLE 1 -#define M_TCB_CRC_ENABLE 0x1ULL -#define V_TCB_CRC_ENABLE(x) ((x) << S_TCB_CRC_ENABLE) - -#define W_TCB_IRS_ULP 26 -#define S_TCB_IRS_ULP 2 -#define M_TCB_IRS_ULP 0x1ffULL -#define V_TCB_IRS_ULP(x) ((x) << S_TCB_IRS_ULP) - -#define W_TCB_ISS_ULP 26 -#define S_TCB_ISS_ULP 11 -#define M_TCB_ISS_ULP 0x1ffULL -#define V_TCB_ISS_ULP(x) ((x) << S_TCB_ISS_ULP) - -#define W_TCB_TX_PDU_LEN 26 -#define S_TCB_TX_PDU_LEN 20 -#define M_TCB_TX_PDU_LEN 0x3fffULL -#define V_TCB_TX_PDU_LEN(x) ((x) << S_TCB_TX_PDU_LEN) - -#define W_TCB_TX_PDU_OUT 27 -#define S_TCB_TX_PDU_OUT 2 -#define M_TCB_TX_PDU_OUT 0x1ULL -#define V_TCB_TX_PDU_OUT(x) ((x) << S_TCB_TX_PDU_OUT) - -#define W_TCB_CQ_IDX_SQ 27 -#define S_TCB_CQ_IDX_SQ 3 -#define M_TCB_CQ_IDX_SQ 0xffffULL -#define V_TCB_CQ_IDX_SQ(x) ((x) << S_TCB_CQ_IDX_SQ) - -#define W_TCB_CQ_IDX_RQ 27 -#define S_TCB_CQ_IDX_RQ 19 -#define M_TCB_CQ_IDX_RQ 0xffffULL -#define V_TCB_CQ_IDX_RQ(x) ((x) << S_TCB_CQ_IDX_RQ) - -#define W_TCB_QP_ID 28 -#define S_TCB_QP_ID 3 -#define M_TCB_QP_ID 0xffffULL -#define V_TCB_QP_ID(x) ((x) << S_TCB_QP_ID) - -#define W_TCB_PD_ID 28 -#define S_TCB_PD_ID 19 -#define M_TCB_PD_ID 0xffffULL -#define V_TCB_PD_ID(x) ((x) << S_TCB_PD_ID) - -#define W_TCB_STAG 29 -#define S_TCB_STAG 3 -#define M_TCB_STAG 0xffffffffULL -#define V_TCB_STAG(x) ((x) << S_TCB_STAG) - -#define W_TCB_RQ_START 30 -#define S_TCB_RQ_START 3 -#define M_TCB_RQ_START 0x3ffffffULL -#define V_TCB_RQ_START(x) ((x) << S_TCB_RQ_START) - -#define W_TCB_RQ_MSN 30 -#define S_TCB_RQ_MSN 29 -#define M_TCB_RQ_MSN 0x3ffULL -#define V_TCB_RQ_MSN(x) ((x) << S_TCB_RQ_MSN) - -#define W_TCB_RQ_MAX_OFFSET 31 -#define S_TCB_RQ_MAX_OFFSET 7 -#define M_TCB_RQ_MAX_OFFSET 0xfULL -#define V_TCB_RQ_MAX_OFFSET(x) ((x) << S_TCB_RQ_MAX_OFFSET) - -#define W_TCB_RQ_WRITE_PTR 31 -#define S_TCB_RQ_WRITE_PTR 11 -#define M_TCB_RQ_WRITE_PTR 0x3ffULL -#define V_TCB_RQ_WRITE_PTR(x) ((x) << S_TCB_RQ_WRITE_PTR) - -#define W_TCB_INB_WRITE_PERM 31 -#define S_TCB_INB_WRITE_PERM 21 -#define M_TCB_INB_WRITE_PERM 0x1ULL -#define V_TCB_INB_WRITE_PERM(x) ((x) << S_TCB_INB_WRITE_PERM) - -#define W_TCB_INB_READ_PERM 31 -#define S_TCB_INB_READ_PERM 22 -#define M_TCB_INB_READ_PERM 0x1ULL -#define V_TCB_INB_READ_PERM(x) ((x) << S_TCB_INB_READ_PERM) - -#define W_TCB_ORD_L_BIT_VLD 31 -#define S_TCB_ORD_L_BIT_VLD 23 -#define M_TCB_ORD_L_BIT_VLD 0x1ULL -#define V_TCB_ORD_L_BIT_VLD(x) ((x) << S_TCB_ORD_L_BIT_VLD) - -#define W_TCB_RDMAP_OPCODE 31 -#define S_TCB_RDMAP_OPCODE 24 -#define M_TCB_RDMAP_OPCODE 0xfULL -#define V_TCB_RDMAP_OPCODE(x) ((x) << S_TCB_RDMAP_OPCODE) - -#define W_TCB_TX_FLUSH 31 -#define S_TCB_TX_FLUSH 28 -#define M_TCB_TX_FLUSH 0x1ULL -#define V_TCB_TX_FLUSH(x) ((x) << S_TCB_TX_FLUSH) - -#define W_TCB_TX_OOS_RXMT 31 -#define S_TCB_TX_OOS_RXMT 29 -#define M_TCB_TX_OOS_RXMT 0x1ULL -#define V_TCB_TX_OOS_RXMT(x) ((x) << S_TCB_TX_OOS_RXMT) - -#define W_TCB_TX_OOS_TXMT 31 -#define S_TCB_TX_OOS_TXMT 30 -#define M_TCB_TX_OOS_TXMT 0x1ULL -#define V_TCB_TX_OOS_TXMT(x) ((x) << S_TCB_TX_OOS_TXMT) - -#define W_TCB_SLUSH_AUX2 31 -#define S_TCB_SLUSH_AUX2 31 -#define M_TCB_SLUSH_AUX2 0x1ULL -#define V_TCB_SLUSH_AUX2(x) ((x) << S_TCB_SLUSH_AUX2) - -#define W_TCB_RX_FRAG1_PTR_RAW2 25 -#define S_TCB_RX_FRAG1_PTR_RAW2 30 -#define M_TCB_RX_FRAG1_PTR_RAW2 0x1ffffULL -#define V_TCB_RX_FRAG1_PTR_RAW2(x) ((x) << S_TCB_RX_FRAG1_PTR_RAW2) - -#define W_TCB_RX_DDP_FLAGS 26 -#define S_TCB_RX_DDP_FLAGS 15 -#define M_TCB_RX_DDP_FLAGS 0x3ffULL -#define V_TCB_RX_DDP_FLAGS(x) ((x) << S_TCB_RX_DDP_FLAGS) - -#define W_TCB_SLUSH_AUX3 26 -#define S_TCB_SLUSH_AUX3 31 -#define M_TCB_SLUSH_AUX3 0x1ffULL -#define V_TCB_SLUSH_AUX3(x) ((x) << S_TCB_SLUSH_AUX3) - -#define W_TCB_RX_DDP_BUF0_OFFSET 27 -#define S_TCB_RX_DDP_BUF0_OFFSET 8 -#define M_TCB_RX_DDP_BUF0_OFFSET 0x3fffffULL -#define V_TCB_RX_DDP_BUF0_OFFSET(x) ((x) << S_TCB_RX_DDP_BUF0_OFFSET) - -#define W_TCB_RX_DDP_BUF0_LEN 27 -#define S_TCB_RX_DDP_BUF0_LEN 30 -#define M_TCB_RX_DDP_BUF0_LEN 0x3fffffULL -#define V_TCB_RX_DDP_BUF0_LEN(x) ((x) << S_TCB_RX_DDP_BUF0_LEN) - -#define W_TCB_RX_DDP_BUF1_OFFSET 28 -#define S_TCB_RX_DDP_BUF1_OFFSET 20 -#define M_TCB_RX_DDP_BUF1_OFFSET 0x3fffffULL -#define V_TCB_RX_DDP_BUF1_OFFSET(x) ((x) << S_TCB_RX_DDP_BUF1_OFFSET) - -#define W_TCB_RX_DDP_BUF1_LEN 29 -#define S_TCB_RX_DDP_BUF1_LEN 10 -#define M_TCB_RX_DDP_BUF1_LEN 0x3fffffULL -#define V_TCB_RX_DDP_BUF1_LEN(x) ((x) << S_TCB_RX_DDP_BUF1_LEN) - -#define W_TCB_RX_DDP_BUF0_TAG 30 -#define S_TCB_RX_DDP_BUF0_TAG 0 -#define M_TCB_RX_DDP_BUF0_TAG 0xffffffffULL -#define V_TCB_RX_DDP_BUF0_TAG(x) ((x) << S_TCB_RX_DDP_BUF0_TAG) - -#define W_TCB_RX_DDP_BUF1_TAG 31 -#define S_TCB_RX_DDP_BUF1_TAG 0 -#define M_TCB_RX_DDP_BUF1_TAG 0xffffffffULL -#define V_TCB_RX_DDP_BUF1_TAG(x) ((x) << S_TCB_RX_DDP_BUF1_TAG) - -#define S_TF_DACK 10 -#define V_TF_DACK(x) ((x) << S_TF_DACK) - -#define S_TF_NAGLE 11 -#define V_TF_NAGLE(x) ((x) << S_TF_NAGLE) - -#define S_TF_RECV_SCALE 12 -#define V_TF_RECV_SCALE(x) ((x) << S_TF_RECV_SCALE) - -#define S_TF_RECV_TSTMP 13 -#define V_TF_RECV_TSTMP(x) ((x) << S_TF_RECV_TSTMP) - -#define S_TF_RECV_SACK 14 -#define V_TF_RECV_SACK(x) ((x) << S_TF_RECV_SACK) - -#define S_TF_TURBO 15 -#define V_TF_TURBO(x) ((x) << S_TF_TURBO) - -#define S_TF_KEEPALIVE 16 -#define V_TF_KEEPALIVE(x) ((x) << S_TF_KEEPALIVE) - -#define S_TF_TCAM_BYPASS 17 -#define V_TF_TCAM_BYPASS(x) ((x) << S_TF_TCAM_BYPASS) - -#define S_TF_CORE_FIN 18 -#define V_TF_CORE_FIN(x) ((x) << S_TF_CORE_FIN) - -#define S_TF_CORE_MORE 19 -#define V_TF_CORE_MORE(x) ((x) << S_TF_CORE_MORE) - -#define S_TF_MIGRATING 20 -#define V_TF_MIGRATING(x) ((x) << S_TF_MIGRATING) - -#define S_TF_ACTIVE_OPEN 21 -#define V_TF_ACTIVE_OPEN(x) ((x) << S_TF_ACTIVE_OPEN) - -#define S_TF_ASK_MODE 22 -#define V_TF_ASK_MODE(x) ((x) << S_TF_ASK_MODE) - -#define S_TF_NON_OFFLOAD 23 -#define V_TF_NON_OFFLOAD(x) ((x) << S_TF_NON_OFFLOAD) - -#define S_TF_MOD_SCHD 24 -#define V_TF_MOD_SCHD(x) ((x) << S_TF_MOD_SCHD) - -#define S_TF_MOD_SCHD_REASON0 25 -#define V_TF_MOD_SCHD_REASON0(x) ((x) << S_TF_MOD_SCHD_REASON0) - -#define S_TF_MOD_SCHD_REASON1 26 -#define V_TF_MOD_SCHD_REASON1(x) ((x) << S_TF_MOD_SCHD_REASON1) - -#define S_TF_MOD_SCHD_RX 27 -#define V_TF_MOD_SCHD_RX(x) ((x) << S_TF_MOD_SCHD_RX) - -#define S_TF_CORE_PUSH 28 -#define V_TF_CORE_PUSH(x) ((x) << S_TF_CORE_PUSH) - -#define S_TF_RCV_COALESCE_ENABLE 29 -#define V_TF_RCV_COALESCE_ENABLE(x) ((x) << S_TF_RCV_COALESCE_ENABLE) - -#define S_TF_RCV_COALESCE_PUSH 30 -#define V_TF_RCV_COALESCE_PUSH(x) ((x) << S_TF_RCV_COALESCE_PUSH) - -#define S_TF_RCV_COALESCE_LAST_PSH 31 -#define V_TF_RCV_COALESCE_LAST_PSH(x) ((x) << S_TF_RCV_COALESCE_LAST_PSH) - -#define S_TF_RCV_COALESCE_HEARTBEAT 32 -#define V_TF_RCV_COALESCE_HEARTBEAT(x) ((x) << S_TF_RCV_COALESCE_HEARTBEAT) - -#define S_TF_HALF_CLOSE 33 -#define V_TF_HALF_CLOSE(x) ((x) << S_TF_HALF_CLOSE) - -#define S_TF_DACK_MSS 34 -#define V_TF_DACK_MSS(x) ((x) << S_TF_DACK_MSS) - -#define S_TF_CCTRL_SEL0 35 -#define V_TF_CCTRL_SEL0(x) ((x) << S_TF_CCTRL_SEL0) - -#define S_TF_CCTRL_SEL1 36 -#define V_TF_CCTRL_SEL1(x) ((x) << S_TF_CCTRL_SEL1) - -#define S_TF_TCP_NEWRENO_FAST_RECOVERY 37 -#define V_TF_TCP_NEWRENO_FAST_RECOVERY(x) ((x) << S_TF_TCP_NEWRENO_FAST_RECOVERY) - -#define S_TF_TX_PACE_AUTO 38 -#define V_TF_TX_PACE_AUTO(x) ((x) << S_TF_TX_PACE_AUTO) - -#define S_TF_PEER_FIN_HELD 39 -#define V_TF_PEER_FIN_HELD(x) ((x) << S_TF_PEER_FIN_HELD) - -#define S_TF_CORE_URG 40 -#define V_TF_CORE_URG(x) ((x) << S_TF_CORE_URG) - -#define S_TF_RDMA_ERROR 41 -#define V_TF_RDMA_ERROR(x) ((x) << S_TF_RDMA_ERROR) - -#define S_TF_SSWS_DISABLED 42 -#define V_TF_SSWS_DISABLED(x) ((x) << S_TF_SSWS_DISABLED) - -#define S_TF_DUPACK_COUNT_ODD 43 -#define V_TF_DUPACK_COUNT_ODD(x) ((x) << S_TF_DUPACK_COUNT_ODD) - -#define S_TF_TX_CHANNEL 44 -#define V_TF_TX_CHANNEL(x) ((x) << S_TF_TX_CHANNEL) - -#define S_TF_RX_CHANNEL 45 -#define V_TF_RX_CHANNEL(x) ((x) << S_TF_RX_CHANNEL) - -#define S_TF_TX_PACE_FIXED 46 -#define V_TF_TX_PACE_FIXED(x) ((x) << S_TF_TX_PACE_FIXED) - -#define S_TF_RDMA_FLM_ERROR 47 -#define V_TF_RDMA_FLM_ERROR(x) ((x) << S_TF_RDMA_FLM_ERROR) - -#define S_TF_RX_FLOW_CONTROL_DISABLE 48 -#define V_TF_RX_FLOW_CONTROL_DISABLE(x) ((x) << S_TF_RX_FLOW_CONTROL_DISABLE) - -#endif /* _TCB_DEFS_H */ -- cgit From 4b2a67362e7814641730fa4c561a9ef609fc7a03 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 2 Oct 2019 15:25:14 +0300 Subject: RDMA/mlx5: Group boolean parameters to take less space Clean the code to store all boolean parameters inside one variable. Link: https://lore.kernel.org/r/20191002122517.17721-2-leon@kernel.org Signed-off-by: Leon Romanovsky Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 2ceaef3ea3fb..bf30d53d94dc 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -958,7 +958,10 @@ struct mlx5_ib_dev { /* serialize update of capability mask */ struct mutex cap_mask_mutex; - bool ib_active; + u8 ib_active:1; + u8 fill_delay:1; + u8 is_rep:1; + u8 lag_active:1; struct umr_common umrc; /* sync used page count stats */ @@ -967,7 +970,6 @@ struct mlx5_ib_dev { struct timer_list delay_timer; /* Prevents soft lock on massive reg MRs */ struct mutex slow_path_mutex; - int fill_delay; struct ib_odp_caps odp_caps; u64 odp_max_size; struct mlx5_ib_pf_eq odp_pf_eq; @@ -988,8 +990,6 @@ struct mlx5_ib_dev { struct mlx5_sq_bfreg fp_bfreg; struct mlx5_ib_delay_drop delay_drop; const struct mlx5_ib_profile *profile; - bool is_rep; - int lag_active; struct mlx5_ib_lb_state lb; u8 umr_fence; -- cgit From 2d67c0798821c453d2f165e986567de50f33cbed Mon Sep 17 00:00:00 2001 From: Erez Alfasi Date: Wed, 2 Oct 2019 15:25:15 +0300 Subject: IB/mlx5: Remove unnecessary return statement There is no reason to call return at the end of function which returns void. Remove this unnecessary statement. Link: https://lore.kernel.org/r/20191002122517.17721-3-leon@kernel.org Signed-off-by: Erez Alfasi Signed-off-by: Leon Romanovsky Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/odp.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 2e9b43061797..95cf0249b015 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -359,8 +359,6 @@ void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset) && !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled)) caps->general_caps |= IB_ODP_SUPPORT_IMPLICIT; - - return; } static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev, -- cgit From 6f26b2ac699cc53f7da9598be6151818461af2a1 Mon Sep 17 00:00:00 2001 From: Erez Alfasi Date: Wed, 2 Oct 2019 15:25:16 +0300 Subject: IB/mlx5: Remove unnecessary else statement 'else' is not generally useful after a break or return. Remove this unnecessary statement. Link: https://lore.kernel.org/r/20191002122517.17721-4-leon@kernel.org Signed-off-by: Erez Alfasi Signed-off-by: Leon Romanovsky Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 831539419c30..b95c2b05f682 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -844,8 +844,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, resp_len = sizeof(resp.comp_mask) + sizeof(resp.response_length); if (uhw->outlen && uhw->outlen < resp_len) return -EINVAL; - else - resp.response_length = resp_len; + + resp.response_length = resp_len; if (uhw->inlen && !ib_is_udata_cleared(uhw, 0, uhw->inlen)) return -EINVAL; -- cgit From 39c48c514601d76f8750d1739928c9577b1785d9 Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Thu, 3 Oct 2019 01:48:35 -0400 Subject: RDMA/bnxt_re: Enable SRIOV VF support on Broadcom's 57500 adapter series Broadcom's 575xx adapter series has support for SRIOV VFs. Making changes to enable SRIOV VF support. There are two major area where changes are done: - Added new DB location for control-path and data-path DB ring - New devices do not need to issue the sriov-config slow-path command thus, skipping to call that firmware command. For now enabling support for 64 RoCE VFs. Link: https://lore.kernel.org/r/1570081715-14301-1-git-send-email-devesh.sharma@broadcom.com Signed-off-by: Devesh Sharma Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/bnxt_re.h | 1 + drivers/infiniband/hw/bnxt_re/main.c | 133 +++++++++++++++++------------ drivers/infiniband/hw/bnxt_re/qplib_rcfw.c | 5 +- 3 files changed, 82 insertions(+), 57 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h index e55a1666c0cd..725b2350e349 100644 --- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h +++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h @@ -108,6 +108,7 @@ struct bnxt_re_sqp_entries { #define BNXT_RE_MAX_MSIX 9 #define BNXT_RE_AEQ_IDX 0 #define BNXT_RE_NQ_IDX 1 +#define BNXT_RE_GEN_P5_MAX_VF 64 struct bnxt_re_dev { struct ib_device ibdev; diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 7b914bdd7c13..d6785b8339d2 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -119,61 +119,76 @@ static void bnxt_re_get_sriov_func_type(struct bnxt_re_dev *rdev) * reserved for the function. The driver may choose to allocate fewer * resources than the firmware maximum. */ -static void bnxt_re_set_resource_limits(struct bnxt_re_dev *rdev) +static void bnxt_re_limit_pf_res(struct bnxt_re_dev *rdev) { - u32 vf_qps = 0, vf_srqs = 0, vf_cqs = 0, vf_mrws = 0, vf_gids = 0; - u32 i; - u32 vf_pct; - u32 num_vfs; - struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr; + struct bnxt_qplib_dev_attr *attr; + struct bnxt_qplib_ctx *ctx; + int i; - rdev->qplib_ctx.qpc_count = min_t(u32, BNXT_RE_MAX_QPC_COUNT, - dev_attr->max_qp); + attr = &rdev->dev_attr; + ctx = &rdev->qplib_ctx; - rdev->qplib_ctx.mrw_count = BNXT_RE_MAX_MRW_COUNT_256K; + ctx->qpc_count = min_t(u32, BNXT_RE_MAX_QPC_COUNT, + attr->max_qp); + ctx->mrw_count = BNXT_RE_MAX_MRW_COUNT_256K; /* Use max_mr from fw since max_mrw does not get set */ - rdev->qplib_ctx.mrw_count = min_t(u32, rdev->qplib_ctx.mrw_count, - dev_attr->max_mr); - rdev->qplib_ctx.srqc_count = min_t(u32, BNXT_RE_MAX_SRQC_COUNT, - dev_attr->max_srq); - rdev->qplib_ctx.cq_count = min_t(u32, BNXT_RE_MAX_CQ_COUNT, - dev_attr->max_cq); - - for (i = 0; i < MAX_TQM_ALLOC_REQ; i++) - rdev->qplib_ctx.tqm_count[i] = - rdev->dev_attr.tqm_alloc_reqs[i]; - - if (rdev->num_vfs) { - /* - * Reserve a set of resources for the PF. Divide the remaining - * resources among the VFs - */ - vf_pct = 100 - BNXT_RE_PCT_RSVD_FOR_PF; - num_vfs = 100 * rdev->num_vfs; - vf_qps = (rdev->qplib_ctx.qpc_count * vf_pct) / num_vfs; - vf_srqs = (rdev->qplib_ctx.srqc_count * vf_pct) / num_vfs; - vf_cqs = (rdev->qplib_ctx.cq_count * vf_pct) / num_vfs; - /* - * The driver allows many more MRs than other resources. If the - * firmware does also, then reserve a fixed amount for the PF - * and divide the rest among VFs. VFs may use many MRs for NFS - * mounts, ISER, NVME applications, etc. If the firmware - * severely restricts the number of MRs, then let PF have - * half and divide the rest among VFs, as for the other - * resource types. - */ - if (rdev->qplib_ctx.mrw_count < BNXT_RE_MAX_MRW_COUNT_64K) - vf_mrws = rdev->qplib_ctx.mrw_count * vf_pct / num_vfs; - else - vf_mrws = (rdev->qplib_ctx.mrw_count - - BNXT_RE_RESVD_MR_FOR_PF) / rdev->num_vfs; - vf_gids = BNXT_RE_MAX_GID_PER_VF; + ctx->mrw_count = min_t(u32, ctx->mrw_count, attr->max_mr); + ctx->srqc_count = min_t(u32, BNXT_RE_MAX_SRQC_COUNT, + attr->max_srq); + ctx->cq_count = min_t(u32, BNXT_RE_MAX_CQ_COUNT, attr->max_cq); + if (!bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx)) + for (i = 0; i < MAX_TQM_ALLOC_REQ; i++) + rdev->qplib_ctx.tqm_count[i] = + rdev->dev_attr.tqm_alloc_reqs[i]; +} + +static void bnxt_re_limit_vf_res(struct bnxt_qplib_ctx *qplib_ctx, u32 num_vf) +{ + struct bnxt_qplib_vf_res *vf_res; + u32 mrws = 0; + u32 vf_pct; + u32 nvfs; + + vf_res = &qplib_ctx->vf_res; + /* + * Reserve a set of resources for the PF. Divide the remaining + * resources among the VFs + */ + vf_pct = 100 - BNXT_RE_PCT_RSVD_FOR_PF; + nvfs = num_vf; + num_vf = 100 * num_vf; + vf_res->max_qp_per_vf = (qplib_ctx->qpc_count * vf_pct) / num_vf; + vf_res->max_srq_per_vf = (qplib_ctx->srqc_count * vf_pct) / num_vf; + vf_res->max_cq_per_vf = (qplib_ctx->cq_count * vf_pct) / num_vf; + /* + * The driver allows many more MRs than other resources. If the + * firmware does also, then reserve a fixed amount for the PF and + * divide the rest among VFs. VFs may use many MRs for NFS + * mounts, ISER, NVME applications, etc. If the firmware severely + * restricts the number of MRs, then let PF have half and divide + * the rest among VFs, as for the other resource types. + */ + if (qplib_ctx->mrw_count < BNXT_RE_MAX_MRW_COUNT_64K) { + mrws = qplib_ctx->mrw_count * vf_pct; + nvfs = num_vf; + } else { + mrws = qplib_ctx->mrw_count - BNXT_RE_RESVD_MR_FOR_PF; } - rdev->qplib_ctx.vf_res.max_mrw_per_vf = vf_mrws; - rdev->qplib_ctx.vf_res.max_gid_per_vf = vf_gids; - rdev->qplib_ctx.vf_res.max_qp_per_vf = vf_qps; - rdev->qplib_ctx.vf_res.max_srq_per_vf = vf_srqs; - rdev->qplib_ctx.vf_res.max_cq_per_vf = vf_cqs; + vf_res->max_mrw_per_vf = (mrws / nvfs); + vf_res->max_gid_per_vf = BNXT_RE_MAX_GID_PER_VF; +} + +static void bnxt_re_set_resource_limits(struct bnxt_re_dev *rdev) +{ + u32 num_vfs; + + memset(&rdev->qplib_ctx.vf_res, 0, sizeof(struct bnxt_qplib_vf_res)); + bnxt_re_limit_pf_res(rdev); + + num_vfs = bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx) ? + BNXT_RE_GEN_P5_MAX_VF : rdev->num_vfs; + if (num_vfs) + bnxt_re_limit_vf_res(&rdev->qplib_ctx, num_vfs); } /* for handling bnxt_en callbacks later */ @@ -193,9 +208,11 @@ static void bnxt_re_sriov_config(void *p, int num_vfs) return; rdev->num_vfs = num_vfs; - bnxt_re_set_resource_limits(rdev); - bnxt_qplib_set_func_resources(&rdev->qplib_res, &rdev->rcfw, - &rdev->qplib_ctx); + if (!bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx)) { + bnxt_re_set_resource_limits(rdev); + bnxt_qplib_set_func_resources(&rdev->qplib_res, &rdev->rcfw, + &rdev->qplib_ctx); + } } static void bnxt_re_shutdown(void *p) @@ -894,10 +911,14 @@ static int bnxt_re_cqn_handler(struct bnxt_qplib_nq *nq, return 0; } +#define BNXT_RE_GEN_P5_PF_NQ_DB 0x10000 +#define BNXT_RE_GEN_P5_VF_NQ_DB 0x4000 static u32 bnxt_re_get_nqdb_offset(struct bnxt_re_dev *rdev, u16 indx) { return bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx) ? - 0x10000 : rdev->msix_entries[indx].db_offset; + (rdev->is_virtfn ? BNXT_RE_GEN_P5_VF_NQ_DB : + BNXT_RE_GEN_P5_PF_NQ_DB) : + rdev->msix_entries[indx].db_offset; } static void bnxt_re_cleanup_res(struct bnxt_re_dev *rdev) @@ -1407,8 +1428,8 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev) rdev->is_virtfn); if (rc) goto disable_rcfw; - if (!rdev->is_virtfn) - bnxt_re_set_resource_limits(rdev); + + bnxt_re_set_resource_limits(rdev); rc = bnxt_qplib_alloc_ctx(rdev->en_dev->pdev, &rdev->qplib_ctx, 0, bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx)); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index 60c8f76aab33..5cdfa84faf85 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -494,8 +494,10 @@ int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw, * shall setup this area for VF. Skipping the * HW programming */ - if (is_virtfn || bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx)) + if (is_virtfn) goto skip_ctx_setup; + if (bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx)) + goto config_vf_res; level = ctx->qpc_tbl.level; req.qpc_pg_size_qpc_lvl = (level << CMDQ_INITIALIZE_FW_QPC_LVL_SFT) | @@ -540,6 +542,7 @@ int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw, req.number_of_srq = cpu_to_le32(ctx->srqc_tbl.max_elements); req.number_of_cq = cpu_to_le32(ctx->cq_tbl.max_elements); +config_vf_res: req.max_qp_per_vf = cpu_to_le32(ctx->vf_res.max_qp_per_vf); req.max_mrw_per_vf = cpu_to_le32(ctx->vf_res.max_mrw_per_vf); req.max_srq_per_vf = cpu_to_le32(ctx->vf_res.max_srq_per_vf); -- cgit From 03232cc43cff18ae44a30ceb455a036586ee8244 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Sun, 6 Oct 2019 18:54:43 +0300 Subject: IB/mlx5: Introduce and use mkey context setting helper routine Introduce and use set_mkc_access_pd_addr_fields() which sets mkey context's access rights, PD, address fields. Thereby avoid the code duplication. Link: https://lore.kernel.org/r/20191006155443.31068-1-leon@kernel.org Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mr.c | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 1eff031ef048..2b57f0e085d6 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -705,6 +705,20 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) return 0; } +static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr, + struct ib_pd *pd) +{ + MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC)); + MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE)); + MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ)); + MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE)); + MLX5_SET(mkc, mkc, lr, 1); + + MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + MLX5_SET64(mkc, mkc, start_addr, start_addr); +} + struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) { struct mlx5_ib_dev *dev = to_mdev(pd->device); @@ -728,16 +742,8 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA); - MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC)); - MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE)); - MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ)); - MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE)); - MLX5_SET(mkc, mkc, lr, 1); - MLX5_SET(mkc, mkc, length64, 1); - MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); - MLX5_SET(mkc, mkc, qpn, 0xffffff); - MLX5_SET64(mkc, mkc, start_addr, 0); + set_mkc_access_pd_addr_fields(mkc, acc, 0, pd); err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen); if (err) @@ -1195,16 +1201,8 @@ static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr, MLX5_SET(mkc, mkc, access_mode_1_0, mode & 0x3); MLX5_SET(mkc, mkc, access_mode_4_2, (mode >> 2) & 0x7); - MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC)); - MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE)); - MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ)); - MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE)); - MLX5_SET(mkc, mkc, lr, 1); - MLX5_SET64(mkc, mkc, len, length); - MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); - MLX5_SET(mkc, mkc, qpn, 0xffffff); - MLX5_SET64(mkc, mkc, start_addr, start_addr); + set_mkc_access_pd_addr_fields(mkc, acc, start_addr, pd); err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen); if (err) -- cgit From 9f7d7064009c37cb26eee4a83302cf077fe180d6 Mon Sep 17 00:00:00 2001 From: Weihang Li Date: Wed, 4 Sep 2019 11:14:41 +0800 Subject: RDMA/hns: remove a redundant le16_to_cpu Type of ah->av.vlan is u16, there will be a problem using le16_to_cpu on it. Fixes: 82e620d9c3a0 ("RDMA/hns: Modify the data structure of hns_roce_av") Signed-off-by: Weihang Li Link: https://lore.kernel.org/r/1567566885-23088-2-git-send-email-liweihang@hisilicon.com Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 7a89d669f8bf..c5ab8ca4d32e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -389,7 +389,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_VLAN_M, V2_UD_SEND_WQE_BYTE_36_VLAN_S, - le16_to_cpu(ah->av.vlan)); + ah->av.vlan); roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_M, V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_S, -- cgit From e8a07de57ea4ca7c2d604871c52826e66899fc70 Mon Sep 17 00:00:00 2001 From: Weihang Li Date: Wed, 4 Sep 2019 11:14:42 +0800 Subject: RDMA/hns: Fix wrong parameters when initial mtt of srq->idx_que The parameters npages used to initial mtt of srq->idx_que shouldn't be same with srq's. And page_shift should be calculated from idx_buf_pg_sz. This patch fixes above issues and use field named npage and page_shift in hns_roce_buf instead of two temporary variables to let us use them anywhere. Fixes: 18df508c7970 ("RDMA/hns: Remove if-else judgment statements for creating srq") Signed-off-by: Weihang Li Link: https://lore.kernel.org/r/1567566885-23088-3-git-send-email-liweihang@hisilicon.com Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_srq.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 9591457eb768..d96041d806f6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -180,8 +180,7 @@ static int create_user_srq(struct hns_roce_srq *srq, struct ib_udata *udata, { struct hns_roce_dev *hr_dev = to_hr_dev(srq->ibsrq.device); struct hns_roce_ib_create_srq ucmd; - u32 page_shift; - u32 npages; + struct hns_roce_buf *buf; int ret; if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) @@ -191,11 +190,13 @@ static int create_user_srq(struct hns_roce_srq *srq, struct ib_udata *udata, if (IS_ERR(srq->umem)) return PTR_ERR(srq->umem); - npages = (ib_umem_page_count(srq->umem) + - (1 << hr_dev->caps.srqwqe_buf_pg_sz) - 1) / - (1 << hr_dev->caps.srqwqe_buf_pg_sz); - page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz; - ret = hns_roce_mtt_init(hr_dev, npages, page_shift, &srq->mtt); + buf = &srq->buf; + buf->npages = (ib_umem_page_count(srq->umem) + + (1 << hr_dev->caps.srqwqe_buf_pg_sz) - 1) / + (1 << hr_dev->caps.srqwqe_buf_pg_sz); + buf->page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz; + ret = hns_roce_mtt_init(hr_dev, buf->npages, buf->page_shift, + &srq->mtt); if (ret) goto err_user_buf; @@ -212,9 +213,12 @@ static int create_user_srq(struct hns_roce_srq *srq, struct ib_udata *udata, goto err_user_srq_mtt; } - ret = hns_roce_mtt_init(hr_dev, ib_umem_page_count(srq->idx_que.umem), - PAGE_SHIFT, &srq->idx_que.mtt); - + buf = &srq->idx_que.idx_buf; + buf->npages = DIV_ROUND_UP(ib_umem_page_count(srq->idx_que.umem), + 1 << hr_dev->caps.idx_buf_pg_sz); + buf->page_shift = PAGE_SHIFT + hr_dev->caps.idx_buf_pg_sz; + ret = hns_roce_mtt_init(hr_dev, buf->npages, buf->page_shift, + &srq->idx_que.mtt); if (ret) { dev_err(hr_dev->dev, "hns_roce_mtt_init error for idx que\n"); goto err_user_idx_mtt; -- cgit From 32883228b980ed0a3ea1d13230f90622f0047908 Mon Sep 17 00:00:00 2001 From: Weihang Li Date: Wed, 4 Sep 2019 11:14:43 +0800 Subject: RDMA/hns: Modify variable/field name from vlan to vlan_id The name of vlan and vlan_tag is not clear enough, it's actually means vlan id. Signed-off-by: Weihang Li Link: https://lore.kernel.org/r/1567566885-23088-4-git-send-email-liweihang@hisilicon.com Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_ah.c | 14 +++++++------- drivers/infiniband/hw/hns/hns_roce_device.h | 2 +- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 10 +++++----- 3 files changed, 13 insertions(+), 13 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index 90e08c0c332d..8a522e14ef62 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -46,32 +46,32 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr, const struct ib_gid_attr *gid_attr; struct device *dev = hr_dev->dev; struct hns_roce_ah *ah = to_hr_ah(ibah); - u16 vlan_tag = 0xffff; const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); + u16 vlan_id = 0xffff; bool vlan_en = false; int ret; gid_attr = ah_attr->grh.sgid_attr; - ret = rdma_read_gid_l2_fields(gid_attr, &vlan_tag, NULL); + ret = rdma_read_gid_l2_fields(gid_attr, &vlan_id, NULL); if (ret) return ret; /* Get mac address */ memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN); - if (vlan_tag < VLAN_CFI_MASK) { + if (vlan_id < VLAN_N_VID) { vlan_en = true; - vlan_tag |= (rdma_ah_get_sl(ah_attr) & + vlan_id |= (rdma_ah_get_sl(ah_attr) & HNS_ROCE_VLAN_SL_BIT_MASK) << HNS_ROCE_VLAN_SL_SHIFT; } ah->av.port = rdma_ah_get_port_num(ah_attr); ah->av.gid_index = grh->sgid_index; - ah->av.vlan = vlan_tag; + ah->av.vlan_id = vlan_id; ah->av.vlan_en = vlan_en; - dev_dbg(dev, "gid_index = 0x%x,vlan = 0x%x\n", ah->av.gid_index, - ah->av.vlan); + dev_dbg(dev, "gid_index = 0x%x,vlan_id = 0x%x\n", ah->av.gid_index, + ah->av.vlan_id); if (rdma_ah_get_static_rate(ah_attr)) ah->av.stat_rate = IB_RATE_10_GBPS; diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 96d1302abde1..c810898e0676 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -582,7 +582,7 @@ struct hns_roce_av { u8 tclass; u8 dgid[HNS_ROCE_GID_SIZE]; u8 mac[ETH_ALEN]; - u16 vlan; + u16 vlan_id; bool vlan_en; }; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index c5ab8ca4d32e..f7c83564b431 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -389,7 +389,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_VLAN_M, V2_UD_SEND_WQE_BYTE_36_VLAN_S, - ah->av.vlan); + ah->av.vlan_id); roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_M, V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_S, @@ -4061,8 +4061,8 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp, struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); const struct ib_gid_attr *gid_attr = NULL; int is_roce_protocol; + u16 vlan_id = 0xffff; bool is_udp = false; - u16 vlan = 0xffff; u8 ib_port; u8 hr_port; int ret; @@ -4074,7 +4074,7 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp, if (is_roce_protocol) { gid_attr = attr->ah_attr.grh.sgid_attr; - ret = rdma_read_gid_l2_fields(gid_attr, &vlan, NULL); + ret = rdma_read_gid_l2_fields(gid_attr, &vlan_id, NULL); if (ret) return ret; @@ -4083,7 +4083,7 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp, IB_GID_TYPE_ROCE_UDP_ENCAP); } - if (vlan < VLAN_CFI_MASK) { + if (vlan_id < VLAN_N_VID) { roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQ_VLAN_EN_S, 1); roce_set_bit(qpc_mask->byte_76_srqn_op_en, @@ -4095,7 +4095,7 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp, } roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_VLAN_ID_M, - V2_QPC_BYTE_24_VLAN_ID_S, vlan); + V2_QPC_BYTE_24_VLAN_ID_S, vlan_id); roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_VLAN_ID_M, V2_QPC_BYTE_24_VLAN_ID_S, 0); -- cgit From cfd82da4e741c16d71a12123bf0cb585af2b8796 Mon Sep 17 00:00:00 2001 From: Lang Cheng Date: Wed, 4 Sep 2019 11:14:44 +0800 Subject: RDMA/hns: Modify return value of restrack functions The restrack function return EINVAL instead of EMSGSIZE when the driver operation fails. Fixes: 4b42d05d0b2c ("RDMA/hns: Remove unnecessary kzalloc") Signed-off-by: Lang Cheng Signed-off-by: Weihang Li Link: https://lore.kernel.org/r/1567566885-23088-5-git-send-email-liweihang@hisilicon.com Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_restrack.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hns/hns_roce_restrack.c b/drivers/infiniband/hw/hns/hns_roce_restrack.c index 0a31d0a3d657..a0d608ec81c1 100644 --- a/drivers/infiniband/hw/hns/hns_roce_restrack.c +++ b/drivers/infiniband/hw/hns/hns_roce_restrack.c @@ -95,7 +95,7 @@ static int hns_roce_fill_res_cq_entry(struct sk_buff *msg, ret = hr_dev->dfx->query_cqc_info(hr_dev, hr_cq->cqn, (int *)context); if (ret) - goto err; + return -EINVAL; table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER); if (!table_attr) -- cgit From 3dcad1f8421f5684c64ef3079c32dfb884a3b910 Mon Sep 17 00:00:00 2001 From: Yixing Liu Date: Wed, 4 Sep 2019 11:14:45 +0800 Subject: RDMA/hns: Fix a spelling mistake in a macro HNS_ROCE_ALOGN_UP should be HNS_ROCE_ALIGN_UP, this patch fix it. Signed-off-by: Yixing Liu Signed-off-by: Weihang Li Link: https://lore.kernel.org/r/1567566885-23088-6-git-send-email-liweihang@hisilicon.com Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_device.h | 2 +- drivers/infiniband/hw/hns/hns_roce_qp.c | 24 ++++++++++++------------ 2 files changed, 13 insertions(+), 13 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index c810898e0676..cbd75e466417 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -45,7 +45,7 @@ #define HNS_ROCE_MAX_MSG_LEN 0x80000000 -#define HNS_ROCE_ALOGN_UP(a, b) ((((a) + (b) - 1) / (b)) * (b)) +#define HNS_ROCE_ALIGN_UP(a, b) ((((a) + (b) - 1) / (b)) * (b)) #define HNS_ROCE_IB_MIN_SQ_STRIDE 6 diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index bd78ff90d998..bec48f2593f4 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -391,37 +391,37 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev, /* Get buf size, SQ and RQ are aligned to page_szie */ if (hr_dev->caps.max_sq_sg <= 2) { - hr_qp->buff_size = HNS_ROCE_ALOGN_UP((hr_qp->rq.wqe_cnt << + hr_qp->buff_size = HNS_ROCE_ALIGN_UP((hr_qp->rq.wqe_cnt << hr_qp->rq.wqe_shift), PAGE_SIZE) + - HNS_ROCE_ALOGN_UP((hr_qp->sq.wqe_cnt << + HNS_ROCE_ALIGN_UP((hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift), PAGE_SIZE); hr_qp->sq.offset = 0; - hr_qp->rq.offset = HNS_ROCE_ALOGN_UP((hr_qp->sq.wqe_cnt << + hr_qp->rq.offset = HNS_ROCE_ALIGN_UP((hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift), PAGE_SIZE); } else { page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT); hr_qp->sge.sge_cnt = ex_sge_num ? max(page_size / (1 << hr_qp->sge.sge_shift), ex_sge_num) : 0; - hr_qp->buff_size = HNS_ROCE_ALOGN_UP((hr_qp->rq.wqe_cnt << + hr_qp->buff_size = HNS_ROCE_ALIGN_UP((hr_qp->rq.wqe_cnt << hr_qp->rq.wqe_shift), page_size) + - HNS_ROCE_ALOGN_UP((hr_qp->sge.sge_cnt << + HNS_ROCE_ALIGN_UP((hr_qp->sge.sge_cnt << hr_qp->sge.sge_shift), page_size) + - HNS_ROCE_ALOGN_UP((hr_qp->sq.wqe_cnt << + HNS_ROCE_ALIGN_UP((hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift), page_size); hr_qp->sq.offset = 0; if (ex_sge_num) { - hr_qp->sge.offset = HNS_ROCE_ALOGN_UP( + hr_qp->sge.offset = HNS_ROCE_ALIGN_UP( (hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift), page_size); hr_qp->rq.offset = hr_qp->sge.offset + - HNS_ROCE_ALOGN_UP((hr_qp->sge.sge_cnt << + HNS_ROCE_ALIGN_UP((hr_qp->sge.sge_cnt << hr_qp->sge.sge_shift), page_size); } else { - hr_qp->rq.offset = HNS_ROCE_ALOGN_UP( + hr_qp->rq.offset = HNS_ROCE_ALIGN_UP( (hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift), page_size); @@ -591,19 +591,19 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev, /* Get buf size, SQ and RQ are aligned to PAGE_SIZE */ page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT); hr_qp->sq.offset = 0; - size = HNS_ROCE_ALOGN_UP(hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift, + size = HNS_ROCE_ALIGN_UP(hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift, page_size); if (hr_dev->caps.max_sq_sg > 2 && hr_qp->sge.sge_cnt) { hr_qp->sge.sge_cnt = max(page_size/(1 << hr_qp->sge.sge_shift), (u32)hr_qp->sge.sge_cnt); hr_qp->sge.offset = size; - size += HNS_ROCE_ALOGN_UP(hr_qp->sge.sge_cnt << + size += HNS_ROCE_ALIGN_UP(hr_qp->sge.sge_cnt << hr_qp->sge.sge_shift, page_size); } hr_qp->rq.offset = size; - size += HNS_ROCE_ALOGN_UP((hr_qp->rq.wqe_cnt << hr_qp->rq.wqe_shift), + size += HNS_ROCE_ALIGN_UP((hr_qp->rq.wqe_cnt << hr_qp->rq.wqe_shift), page_size); hr_qp->buff_size = size; -- cgit From d302c6e3a6895608a5856bc708c47bda1770b24d Mon Sep 17 00:00:00 2001 From: Yangyang Li Date: Wed, 9 Oct 2019 09:21:50 +0800 Subject: RDMA/hns: Release qp resources when failed to destroy qp Even if no response from hardware, we should make sure that qp related resources are released to avoid memory leaks. Fixes: 926a01dc000d ("RDMA/hns: Add QP operations support for hip08 SoC") Signed-off-by: Yangyang Li Signed-off-by: Weihang Li Link: https://lore.kernel.org/r/1570584110-3659-1-git-send-email-liweihang@hisilicon.com Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index f7c83564b431..14e24b4ef6ae 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -4650,16 +4650,14 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, { struct hns_roce_cq *send_cq, *recv_cq; struct ib_device *ibdev = &hr_dev->ib_dev; - int ret; + int ret = 0; if (hr_qp->ibqp.qp_type == IB_QPT_RC && hr_qp->state != IB_QPS_RESET) { /* Modify qp to reset before destroying qp */ ret = hns_roce_v2_modify_qp(&hr_qp->ibqp, NULL, 0, hr_qp->state, IB_QPS_RESET); - if (ret) { + if (ret) ibdev_err(ibdev, "modify QP to Reset failed.\n"); - return ret; - } } send_cq = to_hr_cq(hr_qp->ibqp.send_cq); @@ -4715,7 +4713,7 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, kfree(hr_qp->rq_inl_buf.wqe_list); } - return 0; + return ret; } static int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) @@ -4725,11 +4723,9 @@ static int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) int ret; ret = hns_roce_v2_destroy_qp_common(hr_dev, hr_qp, udata); - if (ret) { + if (ret) ibdev_err(&hr_dev->ib_dev, "Destroy qp 0x%06lx failed(%d)\n", hr_qp->qpn, ret); - return ret; - } if (hr_qp->ibqp.qp_type == IB_QPT_GSI) kfree(hr_to_hr_sqp(hr_qp)); -- cgit From 366090564b812453f531051129b82b63dad8f38b Mon Sep 17 00:00:00 2001 From: Yamin Friedman Date: Mon, 7 Oct 2019 16:59:33 +0300 Subject: RDMA/mlx5: Add capability for max sge to get optimized performance Allows the IB device to provide a value of maximum scatter gather entries per RDMA READ. In certain cases it may be preferable for a device to perform UMR memory registration rather than have many scatter entries in a single RDMA READ. This provides a significant performance increase in devices capable of using different memory registration schemes based on the number of scatter gather entries. This general capability allows each device vendor to fine tune when it is better to use memory registration. Link: https://lore.kernel.org/r/20191007135933.12483-4-leon@kernel.org Signed-off-by: Yamin Friedman Reviewed-by: Or Gerlitz Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 831539419c30..bb3a4e82ee5a 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1011,6 +1011,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, 1 << MLX5_CAP_GEN(mdev, log_max_klm_list_size); props->max_pi_fast_reg_page_list_len = props->max_fast_reg_page_list_len / 2; + props->max_sgl_rd = + MLX5_CAP_GEN(mdev, max_sgl_for_optimized_performance); get_atomic_caps_qp(dev, props); props->masked_atomic_cap = IB_ATOMIC_NONE; props->max_mcast_grp = 1 << MLX5_CAP_GEN(mdev, log_max_mcg); -- cgit From a3de94e3d61ec6e6c57ee066ec4d28ebc260dafa Mon Sep 17 00:00:00 2001 From: Erez Alfasi Date: Wed, 16 Oct 2019 09:23:05 +0300 Subject: IB/mlx5: Introduce ODP diagnostic counters Introduce ODP diagnostic counters and count the following per MR within IB/mlx5 driver: 1) Page faults: Total number of faulted pages. 2) Page invalidations: Total number of pages invalidated by the OS during all invalidation events. The translations can be no longer valid due to either non-present pages or mapping changes. Link: https://lore.kernel.org/r/20191016062308.11886-2-leon@kernel.org Signed-off-by: Erez Alfasi Signed-off-by: Leon Romanovsky Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 4 ++++ drivers/infiniband/hw/mlx5/odp.c | 15 +++++++++++++++ 2 files changed, 19 insertions(+) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index bf30d53d94dc..5aae05ebf64b 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -585,6 +585,9 @@ struct mlx5_ib_dm { IB_ACCESS_REMOTE_READ |\ IB_ZERO_BASED) +#define mlx5_update_odp_stats(mr, counter_name, value) \ + atomic64_add(value, &((mr)->odp_stats.counter_name)) + struct mlx5_ib_mr { struct ib_mr ibmr; void *descs; @@ -622,6 +625,7 @@ struct mlx5_ib_mr { wait_queue_head_t q_leaf_free; struct mlx5_async_work cb_work; atomic_t num_pending_prefetch; + struct ib_odp_counters odp_stats; }; static inline bool is_odp_mr(struct mlx5_ib_mr *mr) diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 95cf0249b015..3601c6ad96f9 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -224,6 +224,7 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, const u64 umr_block_mask = (MLX5_UMR_MTT_ALIGNMENT / sizeof(struct mlx5_mtt)) - 1; u64 idx = 0, blk_start_idx = 0; + u64 invalidations = 0; int in_block = 0; u64 addr; @@ -261,6 +262,9 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, blk_start_idx = idx; in_block = 1; } + + /* Count page invalidations */ + invalidations += idx - blk_start_idx + 1; } else { u64 umr_offset = idx & umr_block_mask; @@ -279,6 +283,9 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, MLX5_IB_UPD_XLT_ZAP | MLX5_IB_UPD_XLT_ATOMIC); mutex_unlock(&umem_odp->umem_mutex); + + mlx5_update_odp_stats(mr, invalidations, invalidations); + /* * We are now sure that the device will not access the * memory. We can safely unmap it, and mark it as dirty if @@ -287,6 +294,7 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, ib_umem_odp_unmap_dma_pages(umem_odp, start, end); + if (unlikely(!umem_odp->npages && mr->parent && !umem_odp->dying)) { WRITE_ONCE(umem_odp->dying, 1); @@ -801,6 +809,13 @@ next_mr: if (ret < 0) goto srcu_unlock; + /* + * When prefetching a page, page fault is generated + * in order to bring the page to the main memory. + * In the current flow, page faults are being counted. + */ + mlx5_update_odp_stats(mr, faults, ret); + npages += ret; ret = 0; break; -- cgit From e1b95ae0b0ea4987afca73d1dc71dfc0b8ad4e49 Mon Sep 17 00:00:00 2001 From: Erez Alfasi Date: Wed, 16 Oct 2019 09:23:07 +0300 Subject: RDMA/mlx5: Return ODP type per MR Provide an ODP explicit/implicit type as part of 'rdma -dd resource show mr' dump. For example: $ rdma -dd resource show mr dev mlx5_0 mrn 1 rkey 0xa99a lkey 0xa99a mrlen 50000000 pdn 9 pid 7372 comm ibv_rc_pingpong drv_odp explicit For non-ODP MRs, we won't print "drv_odp ..." at all. Link: https://lore.kernel.org/r/20191016062308.11886-4-leon@kernel.org Signed-off-by: Erez Alfasi Signed-off-by: Leon Romanovsky Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/Makefile | 2 +- drivers/infiniband/hw/mlx5/main.c | 1 + drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 +++ drivers/infiniband/hw/mlx5/odp.c | 2 ++ drivers/infiniband/hw/mlx5/restrack.c | 48 +++++++++++++++++++++++++++++++++++ 5 files changed, 55 insertions(+), 1 deletion(-) create mode 100644 drivers/infiniband/hw/mlx5/restrack.c (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile index 9924be8384d8..d0a043ccbe58 100644 --- a/drivers/infiniband/hw/mlx5/Makefile +++ b/drivers/infiniband/hw/mlx5/Makefile @@ -3,7 +3,7 @@ obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq_cmd.o \ srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o \ - cong.o + cong.o restrack.o mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += devx.o diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 1106a2238b14..4a731cafbf7d 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -6271,6 +6271,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .disassociate_ucontext = mlx5_ib_disassociate_ucontext, .drain_rq = mlx5_ib_drain_rq, .drain_sq = mlx5_ib_drain_sq, + .fill_res_entry = mlx5_ib_fill_res_entry, .get_dev_fw_str = get_dev_fw_str, .get_dma_mr = mlx5_ib_get_dma_mr, .get_link_layer = mlx5_ib_port_link_layer, diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 5aae05ebf64b..a0ca1ef16e4e 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -626,6 +626,7 @@ struct mlx5_ib_mr { struct mlx5_async_work cb_work; atomic_t num_pending_prefetch; struct ib_odp_counters odp_stats; + bool is_odp_implicit; }; static inline bool is_odp_mr(struct mlx5_ib_mr *mr) @@ -1339,6 +1340,8 @@ struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *dev, u8 *native_port_num); void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *dev, u8 port_num); +int mlx5_ib_fill_res_entry(struct sk_buff *msg, + struct rdma_restrack_entry *res); #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user); diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 3601c6ad96f9..2ab6e44aeaae 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -543,6 +543,8 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, atomic_set(&imr->num_leaf_free, 0); atomic_set(&imr->num_pending_prefetch, 0); + imr->is_odp_implicit = true; + return imr; } diff --git a/drivers/infiniband/hw/mlx5/restrack.c b/drivers/infiniband/hw/mlx5/restrack.c new file mode 100644 index 000000000000..065049f52b83 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/restrack.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2019, Mellanox Technologies inc. All rights reserved. + */ + +#include +#include +#include +#include "mlx5_ib.h" + +static int fill_res_mr_entry(struct sk_buff *msg, + struct rdma_restrack_entry *res) +{ + struct ib_mr *ibmr = container_of(res, struct ib_mr, res); + struct mlx5_ib_mr *mr = to_mmr(ibmr); + struct nlattr *table_attr; + + if (!(mr->access_flags & IB_ACCESS_ON_DEMAND)) + return 0; + + table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER); + if (!table_attr) + goto err; + + if (mr->is_odp_implicit) { + if (rdma_nl_put_driver_string(msg, "odp", "implicit")) + goto err; + } else { + if (rdma_nl_put_driver_string(msg, "odp", "explicit")) + goto err; + } + + nla_nest_end(msg, table_attr); + return 0; + +err: + nla_nest_cancel(msg, table_attr); + return -EMSGSIZE; +} + +int mlx5_ib_fill_res_entry(struct sk_buff *msg, + struct rdma_restrack_entry *res) +{ + if (res->type == RDMA_RESTRACK_MR) + return fill_res_mr_entry(msg, res); + + return 0; +} -- cgit From 4061ff7aa379fa770a82da0ed7ec4f9163034518 Mon Sep 17 00:00:00 2001 From: Erez Alfasi Date: Wed, 16 Oct 2019 09:23:08 +0300 Subject: RDMA/nldev: Provide MR statistics Add RDMA nldev netlink interface for dumping MR statistics information. Output example: $ ./ibv_rc_pingpong -o -P -s 500000000 local address: LID 0x0001, QPN 0x00008a, PSN 0xf81096, GID :: $ rdma stat show mr dev mlx5_0 mrn 2 page_faults 122071 page_invalidations 0 Link: https://lore.kernel.org/r/20191016062308.11886-5-leon@kernel.org Signed-off-by: Erez Alfasi Signed-off-by: Leon Romanovsky Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 2 ++ drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 ++ drivers/infiniband/hw/mlx5/restrack.c | 42 +++++++++++++++++++++++++++++++++++ 3 files changed, 46 insertions(+) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 4a731cafbf7d..39d54e285ae9 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -67,6 +67,7 @@ #include #include #include +#include #define UVERBS_MODULE_NAME mlx5_ib #include @@ -6272,6 +6273,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .drain_rq = mlx5_ib_drain_rq, .drain_sq = mlx5_ib_drain_sq, .fill_res_entry = mlx5_ib_fill_res_entry, + .fill_stat_entry = mlx5_ib_fill_stat_entry, .get_dev_fw_str = get_dev_fw_str, .get_dma_mr = mlx5_ib_get_dma_mr, .get_link_layer = mlx5_ib_port_link_layer, diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index a0ca1ef16e4e..e9bdb48cf1d3 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1342,6 +1342,8 @@ void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *dev, u8 port_num); int mlx5_ib_fill_res_entry(struct sk_buff *msg, struct rdma_restrack_entry *res); +int mlx5_ib_fill_stat_entry(struct sk_buff *msg, + struct rdma_restrack_entry *res); #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user); diff --git a/drivers/infiniband/hw/mlx5/restrack.c b/drivers/infiniband/hw/mlx5/restrack.c index 065049f52b83..8f6c04f12531 100644 --- a/drivers/infiniband/hw/mlx5/restrack.c +++ b/drivers/infiniband/hw/mlx5/restrack.c @@ -8,6 +8,39 @@ #include #include "mlx5_ib.h" +static int fill_stat_mr_entry(struct sk_buff *msg, + struct rdma_restrack_entry *res) +{ + struct ib_mr *ibmr = container_of(res, struct ib_mr, res); + struct mlx5_ib_mr *mr = to_mmr(ibmr); + struct nlattr *table_attr; + + if (!(mr->access_flags & IB_ACCESS_ON_DEMAND)) + return 0; + + table_attr = nla_nest_start(msg, + RDMA_NLDEV_ATTR_STAT_HWCOUNTERS); + + if (!table_attr) + goto err; + + if (rdma_nl_stat_hwcounter_entry(msg, "page_faults", + atomic64_read(&mr->odp_stats.faults))) + goto err_table; + if (rdma_nl_stat_hwcounter_entry( + msg, "page_invalidations", + atomic64_read(&mr->odp_stats.invalidations))) + goto err_table; + + nla_nest_end(msg, table_attr); + return 0; + +err_table: + nla_nest_cancel(msg, table_attr); +err: + return -EMSGSIZE; +} + static int fill_res_mr_entry(struct sk_buff *msg, struct rdma_restrack_entry *res) { @@ -46,3 +79,12 @@ int mlx5_ib_fill_res_entry(struct sk_buff *msg, return 0; } + +int mlx5_ib_fill_stat_entry(struct sk_buff *msg, + struct rdma_restrack_entry *res) +{ + if (res->type == RDMA_RESTRACK_MR) + return fill_stat_mr_entry(msg, res); + + return 0; +} -- cgit From 68abaa765e410dc1583de1fa285ec7b0c58c6252 Mon Sep 17 00:00:00 2001 From: Ran Rozenstein Date: Sun, 20 Oct 2019 09:44:54 +0300 Subject: IB/mlx5: Remove dead code mlx5_ib_dc_atomic_is_supported function is not used anywhere. Remove the dead code. Fixes: a60109dc9a95 ("IB/mlx5: Add support for extended atomic operations") Link: https://lore.kernel.org/r/20191020064454.8551-1-leon@kernel.org Signed-off-by: Ran Rozenstein Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 15 --------------- drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 - 2 files changed, 16 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 39d54e285ae9..20fabb368f61 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -694,21 +694,6 @@ static void get_atomic_caps_qp(struct mlx5_ib_dev *dev, get_atomic_caps(dev, atomic_size_qp, props); } -static void get_atomic_caps_dc(struct mlx5_ib_dev *dev, - struct ib_device_attr *props) -{ - u8 atomic_size_qp = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_dc); - - get_atomic_caps(dev, atomic_size_qp, props); -} - -bool mlx5_ib_dc_atomic_is_supported(struct mlx5_ib_dev *dev) -{ - struct ib_device_attr props = {}; - - get_atomic_caps_dc(dev, &props); - return (props.atomic_cap == IB_ATOMIC_HCA) ? true : false; -} static int mlx5_query_system_image_guid(struct ib_device *ibdev, __be64 *sys_image_guid) { diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index e9bdb48cf1d3..64f56926bf6b 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1241,7 +1241,6 @@ struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device, struct ib_rwq_ind_table_init_attr *init_attr, struct ib_udata *udata); int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table); -bool mlx5_ib_dc_atomic_is_supported(struct mlx5_ib_dev *dev); struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev, struct ib_ucontext *context, struct ib_dm_alloc_attr *attr, -- cgit From 3f89b01f4bbab2dcd1a6e7e31e64faacb448e3be Mon Sep 17 00:00:00 2001 From: Michael Guralnik Date: Sun, 20 Oct 2019 09:43:59 +0300 Subject: IB/mlx5: Align usage of QP1 create flags with rest of mlx5 defines There is little value in keeping separate function for one flag, provide it directly like any other mlx5 define. Link: https://lore.kernel.org/r/20191020064400.8344-2-leon@kernel.org Signed-off-by: Michael Guralnik Signed-off-by: Leon Romanovsky Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/gsi.c | 2 +- drivers/infiniband/hw/mlx5/mlx5_ib.h | 7 +------ drivers/infiniband/hw/mlx5/qp.c | 8 ++++---- 3 files changed, 6 insertions(+), 11 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/gsi.c b/drivers/infiniband/hw/mlx5/gsi.c index 4950df3f71b6..ac4d8d1b9a07 100644 --- a/drivers/infiniband/hw/mlx5/gsi.c +++ b/drivers/infiniband/hw/mlx5/gsi.c @@ -263,7 +263,7 @@ static struct ib_qp *create_gsi_ud_qp(struct mlx5_ib_gsi_qp *gsi) }, .sq_sig_type = gsi->sq_sig_type, .qp_type = IB_QPT_UD, - .create_flags = mlx5_ib_create_qp_sqpn_qp1(), + .create_flags = MLX5_IB_QP_CREATE_SQPN_QP1, }; return ib_create_qp(pd, &init_attr); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 64f56926bf6b..d53739e43a93 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -247,12 +247,7 @@ struct mlx5_ib_flow_db { * These flags are intended for internal use by the mlx5_ib driver, and they * rely on the range reserved for that use in the ib_qp_create_flags enum. */ - -/* Create a UD QP whose source QP number is 1 */ -static inline enum ib_qp_create_flags mlx5_ib_create_qp_sqpn_qp1(void) -{ - return IB_QP_CREATE_RESERVED_START; -} +#define MLX5_IB_QP_CREATE_SQPN_QP1 IB_QP_CREATE_RESERVED_START struct wr_list { u16 opcode; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 8937d72ddcf6..bb3f432e2fb6 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1041,7 +1041,7 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK | IB_QP_CREATE_IPOIB_UD_LSO | IB_QP_CREATE_NETIF_QP | - mlx5_ib_create_qp_sqpn_qp1())) + MLX5_IB_QP_CREATE_SQPN_QP1)) return -EINVAL; if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR) @@ -1104,7 +1104,7 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, MLX5_SET(qpc, qpc, fre, 1); MLX5_SET(qpc, qpc, rlky, 1); - if (init_attr->create_flags & mlx5_ib_create_qp_sqpn_qp1()) { + if (init_attr->create_flags & MLX5_IB_QP_CREATE_SQPN_QP1) { MLX5_SET(qpc, qpc, deth_sqpn, 1); qp->flags |= MLX5_IB_QP_SQPN_QP1; } @@ -2140,7 +2140,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, return -EINVAL; } if (init_attr->create_flags & - mlx5_ib_create_qp_sqpn_qp1()) { + MLX5_IB_QP_CREATE_SQPN_QP1) { mlx5_ib_dbg(dev, "user-space is not allowed to create UD QPs spoofing as QP1\n"); return -EINVAL; } @@ -5823,7 +5823,7 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, if (qp->flags & MLX5_IB_QP_MANAGED_RECV) qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_RECV; if (qp->flags & MLX5_IB_QP_SQPN_QP1) - qp_init_attr->create_flags |= mlx5_ib_create_qp_sqpn_qp1(); + qp_init_attr->create_flags |= MLX5_IB_QP_CREATE_SQPN_QP1; qp_init_attr->sq_sig_type = qp->sq_signal_bits & MLX5_WQE_CTRL_CQ_UPDATE ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; -- cgit From 515f60004ed985d2b2f03659365752e0b6142986 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sat, 8 Jun 2019 12:25:14 +0300 Subject: RDMA/hns: Prevent undefined behavior in hns_roce_set_user_sq_size() The "ucmd->log_sq_bb_count" variable is a user controlled variable in the 0-255 range. If we shift more than then number of bits in an int then it's undefined behavior (it shift wraps), and potentially the int could become negative. Fixes: 9a4435375cd1 ("IB/hns: Add driver files for hns RoCE driver") Link: https://lore.kernel.org/r/20190608092514.GC28890@mwanda Reported-by: Dan Carpenter Signed-off-by: Jason Gunthorpe Reviewed-by: Dan Carpenter --- drivers/infiniband/hw/hns/hns_roce_qp.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index bec48f2593f4..6aa27d6ea3a6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -332,9 +332,8 @@ static int check_sq_size_with_integrity(struct hns_roce_dev *hr_dev, u8 max_sq_stride = ilog2(roundup_sq_stride); /* Sanity check SQ size before proceeding */ - if ((u32)(1 << ucmd->log_sq_bb_count) > hr_dev->caps.max_wqes || - ucmd->log_sq_stride > max_sq_stride || - ucmd->log_sq_stride < HNS_ROCE_IB_MIN_SQ_STRIDE) { + if (ucmd->log_sq_stride > max_sq_stride || + ucmd->log_sq_stride < HNS_ROCE_IB_MIN_SQ_STRIDE) { ibdev_err(&hr_dev->ib_dev, "check SQ size error!\n"); return -EINVAL; } @@ -358,13 +357,16 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev, u32 max_cnt; int ret; + if (check_shl_overflow(1, ucmd->log_sq_bb_count, &hr_qp->sq.wqe_cnt) || + hr_qp->sq.wqe_cnt > hr_dev->caps.max_wqes) + return -EINVAL; + ret = check_sq_size_with_integrity(hr_dev, cap, ucmd); if (ret) { ibdev_err(&hr_dev->ib_dev, "Sanity check sq size failed\n"); return ret; } - hr_qp->sq.wqe_cnt = 1 << ucmd->log_sq_bb_count; hr_qp->sq.wqe_shift = ucmd->log_sq_stride; max_cnt = max(1U, cap->max_send_sge); -- cgit From f9e66db143162a72bb41369fadddcd65fef8664f Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 23 Oct 2019 08:42:39 +0300 Subject: RDMA/hns: Delete BITS_PER_BYTE redefinition HNS redefined available in bits.h define and didn't use it, we can safely delete it. Link: https://lore.kernel.org/r/20191023054239.31648-1-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_device.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index cbd75e466417..940761310430 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -53,8 +53,6 @@ #define BA_BYTE_LEN 8 -#define BITS_PER_BYTE 8 - /* Hardware specification only for v1 engine */ #define HNS_ROCE_MIN_CQE_NUM 0x40 #define HNS_ROCE_MIN_WQE_NUM 0x20 -- cgit From 5c7e76fb7cb5071be800c938ebf2c475e140d3f0 Mon Sep 17 00:00:00 2001 From: Lijun Ou Date: Thu, 24 Oct 2019 17:21:56 +0800 Subject: RDMA/hns: Fix to support 64K page for srq SRQ's page size configuration of BA and buffer should depend on current PAGE_SHIFT, or it can't work in scenario of 64K page. Fixes: c7bcb13442e1 ("RDMA/hns: Add SRQ support for hip08 kernel mode") Link: https://lore.kernel.org/r/1571908917-16220-2-git-send-email-liweihang@hisilicon.com Signed-off-by: Lijun Ou Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 14e24b4ef6ae..7218f6d4101a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -6088,11 +6088,11 @@ static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev, roce_set_field(srq_context->byte_44_idxbufpgsz_addr, SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_M, SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_S, - hr_dev->caps.idx_ba_pg_sz); + hr_dev->caps.idx_ba_pg_sz + PG_SHIFT_OFFSET); roce_set_field(srq_context->byte_44_idxbufpgsz_addr, SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_M, SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_S, - hr_dev->caps.idx_buf_pg_sz); + hr_dev->caps.idx_buf_pg_sz + PG_SHIFT_OFFSET); srq_context->idx_nxt_blk_addr = cpu_to_le32(mtts_idx[1] >> PAGE_ADDR_SHIFT); -- cgit From 887803db866a7a4e1817a3cb8a3eee4e9879fed2 Mon Sep 17 00:00:00 2001 From: Yangyang Li Date: Thu, 24 Oct 2019 17:21:57 +0800 Subject: RDMA/hns: Bugfix for qpc/cqc timer configuration qpc/cqc timer entry size needs one page, but currently they are fixedly configured to 4096, which is not appropriate in 64K page scenarios. So they should be modified to PAGE_SIZE. Fixes: 0e40dc2f70cd ("RDMA/hns: Add timer allocation support for hip08") Link: https://lore.kernel.org/r/1571908917-16220-3-git-send-email-liweihang@hisilicon.com Signed-off-by: Yangyang Li Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 43219d2f7de0..76a14db7028d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -87,8 +87,8 @@ #define HNS_ROCE_V2_MTT_ENTRY_SZ 64 #define HNS_ROCE_V2_CQE_ENTRY_SIZE 32 #define HNS_ROCE_V2_SCCC_ENTRY_SZ 32 -#define HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ 4096 -#define HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ 4096 +#define HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ PAGE_SIZE +#define HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ PAGE_SIZE #define HNS_ROCE_V2_PAGE_SIZE_SUPPORTED 0xFFFFF000 #define HNS_ROCE_V2_MAX_INNER_MTPT_NUM 2 #define HNS_ROCE_INVALID_LKEY 0x100 -- cgit From 994195e1537074f56df216a9309f6e366cb35b67 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 24 Oct 2019 14:10:34 +0100 Subject: RDMA/hns: Fix memory leak on 'context' on error return path Currently, the error return path when the call to function dev->dfx->query_cqc_info fails will leak object 'context'. Fix this by making the error return path via 'err' return return codes rather than -EMSGSIZE, set ret appropriately for all error return paths and for the memory leak now return via 'err' rather than just returning without freeing context. Link: https://lore.kernel.org/r/20191024131034.19989-1-colin.king@canonical.com Addresses-Coverity: ("Resource leak") Fixes: e1c9a0dc2939 ("RDMA/hns: Dump detailed driver-specific CQ") Signed-off-by: Colin Ian King Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_restrack.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hns/hns_roce_restrack.c b/drivers/infiniband/hw/hns/hns_roce_restrack.c index a0d608ec81c1..06871731ac43 100644 --- a/drivers/infiniband/hw/hns/hns_roce_restrack.c +++ b/drivers/infiniband/hw/hns/hns_roce_restrack.c @@ -95,14 +95,18 @@ static int hns_roce_fill_res_cq_entry(struct sk_buff *msg, ret = hr_dev->dfx->query_cqc_info(hr_dev, hr_cq->cqn, (int *)context); if (ret) - return -EINVAL; + goto err; table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER); - if (!table_attr) + if (!table_attr) { + ret = -EMSGSIZE; goto err; + } - if (hns_roce_fill_cq(msg, context)) + if (hns_roce_fill_cq(msg, context)) { + ret = -EMSGSIZE; goto err_cancel_table; + } nla_nest_end(msg, table_attr); kfree(context); @@ -113,7 +117,7 @@ err_cancel_table: nla_nest_cancel(msg, table_attr); err: kfree(context); - return -EMSGSIZE; + return ret; } int hns_roce_fill_res_entry(struct sk_buff *msg, -- cgit From 73ab512f720298aabe23b34110e3f6a8545b0ba5 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Sun, 27 Oct 2019 22:04:48 +0200 Subject: RDMA/qedr: Fix srqs xarray initialization There was a missing initialization for the srqs xarray. SRQs xarray can also be called from irq context when searching for an element and uses the xa_XXX_irq apis, therefore should be initialized with IRQ flags. Fixes: 9fd15987ed27 ("qedr: Convert srqidr to XArray") Link: https://lore.kernel.org/r/20191027200451.28187-2-michal.kalderon@marvell.com Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qedr/main.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index 5136b835e1ba..aa0bda428690 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -357,6 +357,7 @@ static int qedr_alloc_resources(struct qedr_dev *dev) return -ENOMEM; spin_lock_init(&dev->sgid_lock); + xa_init_flags(&dev->srqs, XA_FLAGS_LOCK_IRQ); if (IS_IWARP(dev)) { xa_init_flags(&dev->qps, XA_FLAGS_LOCK_IRQ); -- cgit From 5fdff18b4dc64e2d1e912ad2b90495cd487f791b Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Sun, 27 Oct 2019 22:04:49 +0200 Subject: RDMA/qedr: Fix qpids xarray api used The qpids xarray isn't accessed from irq context and therefore there is no need to use the xa_XXX_irq version of the apis. Remove the _irq. Fixes: b6014f9e5f39 ("qedr: Convert qpidr to XArray") Link: https://lore.kernel.org/r/20191027200451.28187-3-michal.kalderon@marvell.com Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qedr/main.c | 2 +- drivers/infiniband/hw/qedr/qedr_iw_cm.c | 2 +- drivers/infiniband/hw/qedr/verbs.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index aa0bda428690..1ff5407270d2 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -360,7 +360,7 @@ static int qedr_alloc_resources(struct qedr_dev *dev) xa_init_flags(&dev->srqs, XA_FLAGS_LOCK_IRQ); if (IS_IWARP(dev)) { - xa_init_flags(&dev->qps, XA_FLAGS_LOCK_IRQ); + xa_init(&dev->qps); dev->iwarp_wq = create_singlethread_workqueue("qedr_iwarpq"); } diff --git a/drivers/infiniband/hw/qedr/qedr_iw_cm.c b/drivers/infiniband/hw/qedr/qedr_iw_cm.c index 22881d4442b9..7fea74739c1f 100644 --- a/drivers/infiniband/hw/qedr/qedr_iw_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_iw_cm.c @@ -739,7 +739,7 @@ void qedr_iw_qp_rem_ref(struct ib_qp *ibqp) struct qedr_qp *qp = get_qedr_qp(ibqp); if (atomic_dec_and_test(&qp->refcnt)) { - xa_erase_irq(&qp->dev->qps, qp->qp_id); + xa_erase(&qp->dev->qps, qp->qp_id); kfree(qp); } } diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 6f3ce86019b7..84b666c67cff 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -1918,7 +1918,7 @@ struct ib_qp *qedr_create_qp(struct ib_pd *ibpd, qp->ibqp.qp_num = qp->qp_id; if (rdma_protocol_iwarp(&dev->ibdev, 1)) { - rc = xa_insert_irq(&dev->qps, qp->qp_id, qp, GFP_KERNEL); + rc = xa_insert(&dev->qps, qp->qp_id, qp, GFP_KERNEL); if (rc) goto err; } @@ -2492,7 +2492,7 @@ int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) if (atomic_dec_and_test(&qp->refcnt) && rdma_protocol_iwarp(&dev->ibdev, 1)) { - xa_erase_irq(&dev->qps, qp->qp_id); + xa_erase(&dev->qps, qp->qp_id); kfree(qp); } return 0; -- cgit From 82af6d19d8d9227c22a53ff00b40fb2a4f9fce69 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Sun, 27 Oct 2019 22:04:50 +0200 Subject: RDMA/qedr: Fix synchronization methods and memory leaks in qedr Re-design of the iWARP CM related objects reference counting and synchronization methods, to ensure operations are synchronized correctly and that memory allocated for "ep" is properly released. Also makes sure QP memory is not released before ep is finished accessing it. Where as the QP object is created/destroyed by external operations, the ep is created/destroyed by internal operations and represents the tcp connection associated with the QP. QP destruction flow: - needs to wait for ep establishment to complete (either successfully or with error) - needs to wait for ep disconnect to be fully posted to avoid a race condition of disconnect being called after reset. - both the operations above don't always happen, so we use atomic flags to indicate whether the qp destruction flow needs to wait for these completions or not, if the destroy is called before these operations began, the flows will check the flags and not execute them ( connect / disconnect). We use completion structure for waiting for the completions mentioned above. The QP refcnt was modified to kref object. The EP has a kref added to it to handle additional worker thread accessing it. Memory Leaks - https://www.spinics.net/lists/linux-rdma/msg83762.html Concurrency not managed correctly - https://www.spinics.net/lists/linux-rdma/msg67949.html Fixes: de0089e692a9 ("RDMA/qedr: Add iWARP connection management qp related callbacks") Link: https://lore.kernel.org/r/20191027200451.28187-4-michal.kalderon@marvell.com Reported-by: Chuck Lever Reported-by: Jason Gunthorpe Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qedr/qedr.h | 23 ++++- drivers/infiniband/hw/qedr/qedr_iw_cm.c | 148 +++++++++++++++++++++----------- drivers/infiniband/hw/qedr/verbs.c | 62 ++++++++----- 3 files changed, 158 insertions(+), 75 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h index 0cfd849b13d6..8e927f6c1520 100644 --- a/drivers/infiniband/hw/qedr/qedr.h +++ b/drivers/infiniband/hw/qedr/qedr.h @@ -40,6 +40,7 @@ #include #include #include +#include #include "qedr_hsi_rdma.h" #define QEDR_NODE_DESC "QLogic 579xx RoCE HCA" @@ -377,10 +378,20 @@ enum qedr_qp_err_bitmap { QEDR_QP_ERR_RQ_PBL_FULL = 32, }; +enum qedr_qp_create_type { + QEDR_QP_CREATE_NONE, + QEDR_QP_CREATE_USER, + QEDR_QP_CREATE_KERNEL, +}; + +enum qedr_iwarp_cm_flags { + QEDR_IWARP_CM_WAIT_FOR_CONNECT = BIT(0), + QEDR_IWARP_CM_WAIT_FOR_DISCONNECT = BIT(1), +}; + struct qedr_qp { struct ib_qp ibqp; /* must be first */ struct qedr_dev *dev; - struct qedr_iw_ep *ep; struct qedr_qp_hwq_info sq; struct qedr_qp_hwq_info rq; @@ -395,6 +406,7 @@ struct qedr_qp { u32 id; struct qedr_pd *pd; enum ib_qp_type qp_type; + enum qedr_qp_create_type create_type; struct qed_rdma_qp *qed_qp; u32 qp_id; u16 icid; @@ -437,8 +449,11 @@ struct qedr_qp { /* Relevant to qps created from user space only (applications) */ struct qedr_userq usq; struct qedr_userq urq; - atomic_t refcnt; - bool destroyed; + + /* synchronization objects used with iwarp ep */ + struct kref refcnt; + struct completion iwarp_cm_comp; + unsigned long iwarp_cm_flags; /* enum iwarp_cm_flags */ }; struct qedr_ah { @@ -531,7 +546,7 @@ struct qedr_iw_ep { struct iw_cm_id *cm_id; struct qedr_qp *qp; void *qed_context; - u8 during_connect; + struct kref refcnt; }; static inline diff --git a/drivers/infiniband/hw/qedr/qedr_iw_cm.c b/drivers/infiniband/hw/qedr/qedr_iw_cm.c index 7fea74739c1f..5e9732990be5 100644 --- a/drivers/infiniband/hw/qedr/qedr_iw_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_iw_cm.c @@ -79,6 +79,27 @@ qedr_fill_sockaddr6(const struct qed_iwarp_cm_info *cm_info, } } +static void qedr_iw_free_qp(struct kref *ref) +{ + struct qedr_qp *qp = container_of(ref, struct qedr_qp, refcnt); + + kfree(qp); +} + +static void +qedr_iw_free_ep(struct kref *ref) +{ + struct qedr_iw_ep *ep = container_of(ref, struct qedr_iw_ep, refcnt); + + if (ep->qp) + kref_put(&ep->qp->refcnt, qedr_iw_free_qp); + + if (ep->cm_id) + ep->cm_id->rem_ref(ep->cm_id); + + kfree(ep); +} + static void qedr_iw_mpa_request(void *context, struct qed_iwarp_cm_event_params *params) { @@ -93,6 +114,7 @@ qedr_iw_mpa_request(void *context, struct qed_iwarp_cm_event_params *params) ep->dev = dev; ep->qed_context = params->ep_context; + kref_init(&ep->refcnt); memset(&event, 0, sizeof(event)); event.event = IW_CM_EVENT_CONNECT_REQUEST; @@ -141,12 +163,10 @@ qedr_iw_close_event(void *context, struct qed_iwarp_cm_event_params *params) { struct qedr_iw_ep *ep = (struct qedr_iw_ep *)context; - if (ep->cm_id) { + if (ep->cm_id) qedr_iw_issue_event(context, params, IW_CM_EVENT_CLOSE); - ep->cm_id->rem_ref(ep->cm_id); - ep->cm_id = NULL; - } + kref_put(&ep->refcnt, qedr_iw_free_ep); } static void @@ -186,11 +206,13 @@ static void qedr_iw_disconnect_worker(struct work_struct *work) struct qedr_qp *qp = ep->qp; struct iw_cm_event event; - if (qp->destroyed) { - kfree(dwork); - qedr_iw_qp_rem_ref(&qp->ibqp); - return; - } + /* The qp won't be released until we release the ep. + * the ep's refcnt was increased before calling this + * function, therefore it is safe to access qp + */ + if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_DISCONNECT, + &qp->iwarp_cm_flags)) + goto out; memset(&event, 0, sizeof(event)); event.status = dwork->status; @@ -204,7 +226,6 @@ static void qedr_iw_disconnect_worker(struct work_struct *work) else qp_params.new_state = QED_ROCE_QP_STATE_SQD; - kfree(dwork); if (ep->cm_id) ep->cm_id->event_handler(ep->cm_id, &event); @@ -214,7 +235,10 @@ static void qedr_iw_disconnect_worker(struct work_struct *work) dev->ops->rdma_modify_qp(dev->rdma_ctx, qp->qed_qp, &qp_params); - qedr_iw_qp_rem_ref(&qp->ibqp); + complete(&ep->qp->iwarp_cm_comp); +out: + kfree(dwork); + kref_put(&ep->refcnt, qedr_iw_free_ep); } static void @@ -224,13 +248,17 @@ qedr_iw_disconnect_event(void *context, struct qedr_discon_work *work; struct qedr_iw_ep *ep = (struct qedr_iw_ep *)context; struct qedr_dev *dev = ep->dev; - struct qedr_qp *qp = ep->qp; work = kzalloc(sizeof(*work), GFP_ATOMIC); if (!work) return; - qedr_iw_qp_add_ref(&qp->ibqp); + /* We can't get a close event before disconnect, but since + * we're scheduling a work queue we need to make sure close + * won't delete the ep, so we increase the refcnt + */ + kref_get(&ep->refcnt); + work->ep = ep; work->event = params->event; work->status = params->status; @@ -252,16 +280,30 @@ qedr_iw_passive_complete(void *context, if ((params->status == -ECONNREFUSED) && (!ep->qp)) { DP_DEBUG(dev, QEDR_MSG_IWARP, "PASSIVE connection refused releasing ep...\n"); - kfree(ep); + kref_put(&ep->refcnt, qedr_iw_free_ep); return; } + complete(&ep->qp->iwarp_cm_comp); qedr_iw_issue_event(context, params, IW_CM_EVENT_ESTABLISHED); if (params->status < 0) qedr_iw_close_event(context, params); } +static void +qedr_iw_active_complete(void *context, + struct qed_iwarp_cm_event_params *params) +{ + struct qedr_iw_ep *ep = (struct qedr_iw_ep *)context; + + complete(&ep->qp->iwarp_cm_comp); + qedr_iw_issue_event(context, params, IW_CM_EVENT_CONNECT_REPLY); + + if (params->status < 0) + kref_put(&ep->refcnt, qedr_iw_free_ep); +} + static int qedr_iw_mpa_reply(void *context, struct qed_iwarp_cm_event_params *params) { @@ -288,27 +330,15 @@ qedr_iw_event_handler(void *context, struct qed_iwarp_cm_event_params *params) qedr_iw_mpa_reply(context, params); break; case QED_IWARP_EVENT_PASSIVE_COMPLETE: - ep->during_connect = 0; qedr_iw_passive_complete(context, params); break; - case QED_IWARP_EVENT_ACTIVE_COMPLETE: - ep->during_connect = 0; - qedr_iw_issue_event(context, - params, - IW_CM_EVENT_CONNECT_REPLY); - if (params->status < 0) { - struct qedr_iw_ep *ep = (struct qedr_iw_ep *)context; - - ep->cm_id->rem_ref(ep->cm_id); - ep->cm_id = NULL; - } + qedr_iw_active_complete(context, params); break; case QED_IWARP_EVENT_DISCONNECT: qedr_iw_disconnect_event(context, params); break; case QED_IWARP_EVENT_CLOSE: - ep->during_connect = 0; qedr_iw_close_event(context, params); break; case QED_IWARP_EVENT_RQ_EMPTY: @@ -476,6 +506,19 @@ qedr_addr6_resolve(struct qedr_dev *dev, return rc; } +struct qedr_qp *qedr_iw_load_qp(struct qedr_dev *dev, u32 qpn) +{ + struct qedr_qp *qp; + + xa_lock(&dev->qps); + qp = xa_load(&dev->qps, qpn); + if (qp) + kref_get(&qp->refcnt); + xa_unlock(&dev->qps); + + return qp; +} + int qedr_iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) { struct qedr_dev *dev = get_qedr_dev(cm_id->device); @@ -491,10 +534,6 @@ int qedr_iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) int rc = 0; int i; - qp = xa_load(&dev->qps, conn_param->qpn); - if (unlikely(!qp)) - return -EINVAL; - laddr = (struct sockaddr_in *)&cm_id->m_local_addr; raddr = (struct sockaddr_in *)&cm_id->m_remote_addr; laddr6 = (struct sockaddr_in6 *)&cm_id->m_local_addr; @@ -516,8 +555,15 @@ int qedr_iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) return -ENOMEM; ep->dev = dev; + kref_init(&ep->refcnt); + + qp = qedr_iw_load_qp(dev, conn_param->qpn); + if (!qp) { + rc = -EINVAL; + goto err; + } + ep->qp = qp; - qp->ep = ep; cm_id->add_ref(cm_id); ep->cm_id = cm_id; @@ -580,16 +626,20 @@ int qedr_iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) in_params.qp = qp->qed_qp; memcpy(in_params.local_mac_addr, dev->ndev->dev_addr, ETH_ALEN); - ep->during_connect = 1; + if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_CONNECT, + &qp->iwarp_cm_flags)) + goto err; /* QP already being destroyed */ + rc = dev->ops->iwarp_connect(dev->rdma_ctx, &in_params, &out_params); - if (rc) + if (rc) { + complete(&qp->iwarp_cm_comp); goto err; + } return rc; err: - cm_id->rem_ref(cm_id); - kfree(ep); + kref_put(&ep->refcnt, qedr_iw_free_ep); return rc; } @@ -677,18 +727,17 @@ int qedr_iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) struct qedr_dev *dev = ep->dev; struct qedr_qp *qp; struct qed_iwarp_accept_in params; - int rc; + int rc = 0; DP_DEBUG(dev, QEDR_MSG_IWARP, "Accept on qpid=%d\n", conn_param->qpn); - qp = xa_load(&dev->qps, conn_param->qpn); + qp = qedr_iw_load_qp(dev, conn_param->qpn); if (!qp) { DP_ERR(dev, "Invalid QP number %d\n", conn_param->qpn); return -EINVAL; } ep->qp = qp; - qp->ep = ep; cm_id->add_ref(cm_id); ep->cm_id = cm_id; @@ -700,15 +749,21 @@ int qedr_iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) params.ird = conn_param->ird; params.ord = conn_param->ord; - ep->during_connect = 1; + if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_CONNECT, + &qp->iwarp_cm_flags)) + goto err; /* QP already destroyed */ + rc = dev->ops->iwarp_accept(dev->rdma_ctx, ¶ms); - if (rc) + if (rc) { + complete(&qp->iwarp_cm_comp); goto err; + } return rc; + err: - ep->during_connect = 0; - cm_id->rem_ref(cm_id); + kref_put(&ep->refcnt, qedr_iw_free_ep); + return rc; } @@ -731,17 +786,14 @@ void qedr_iw_qp_add_ref(struct ib_qp *ibqp) { struct qedr_qp *qp = get_qedr_qp(ibqp); - atomic_inc(&qp->refcnt); + kref_get(&qp->refcnt); } void qedr_iw_qp_rem_ref(struct ib_qp *ibqp) { struct qedr_qp *qp = get_qedr_qp(ibqp); - if (atomic_dec_and_test(&qp->refcnt)) { - xa_erase(&qp->dev->qps, qp->qp_id); - kfree(qp); - } + kref_put(&qp->refcnt, qedr_iw_free_qp); } struct ib_qp *qedr_iw_get_qp(struct ib_device *ibdev, int qpn) diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 84b666c67cff..a17b388ee3b3 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -51,6 +51,7 @@ #include "verbs.h" #include #include "qedr_roce_cm.h" +#include "qedr_iw_cm.h" #define QEDR_SRQ_WQE_ELEM_SIZE sizeof(union rdma_srq_elm) #define RDMA_MAX_SGE_PER_SRQ (4) @@ -1193,7 +1194,10 @@ static void qedr_set_common_qp_params(struct qedr_dev *dev, struct ib_qp_init_attr *attrs) { spin_lock_init(&qp->q_lock); - atomic_set(&qp->refcnt, 1); + if (rdma_protocol_iwarp(&dev->ibdev, 1)) { + kref_init(&qp->refcnt); + init_completion(&qp->iwarp_cm_comp); + } qp->pd = pd; qp->qp_type = attrs->qp_type; qp->max_inline_data = attrs->cap.max_inline_data; @@ -1592,6 +1596,7 @@ static int qedr_create_user_qp(struct qedr_dev *dev, int alloc_and_init = rdma_protocol_roce(&dev->ibdev, 1); int rc = -EINVAL; + qp->create_type = QEDR_QP_CREATE_USER; memset(&ureq, 0, sizeof(ureq)); rc = ib_copy_from_udata(&ureq, udata, sizeof(ureq)); if (rc) { @@ -1805,6 +1810,7 @@ static int qedr_create_kernel_qp(struct qedr_dev *dev, u32 n_sq_entries; memset(&in_params, 0, sizeof(in_params)); + qp->create_type = QEDR_QP_CREATE_KERNEL; /* A single work request may take up to QEDR_MAX_SQ_WQE_SIZE elements in * the ring. The ring should allow at least a single WR, even if the @@ -2437,7 +2443,7 @@ static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp, return rc; } - if (udata) + if (qp->create_type == QEDR_QP_CREATE_USER) qedr_cleanup_user(dev, qp); else qedr_cleanup_kernel(dev, qp); @@ -2467,34 +2473,44 @@ int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) qedr_modify_qp(ibqp, &attr, attr_mask, NULL); } } else { - /* Wait for the connect/accept to complete */ - if (qp->ep) { - int wait_count = 1; - - while (qp->ep->during_connect) { - DP_DEBUG(dev, QEDR_MSG_QP, - "Still in during connect/accept\n"); - - msleep(100); - if (wait_count++ > 200) { - DP_NOTICE(dev, - "during connect timeout\n"); - break; - } - } - } + /* If connection establishment started the WAIT_FOR_CONNECT + * bit will be on and we need to Wait for the establishment + * to complete before destroying the qp. + */ + if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_CONNECT, + &qp->iwarp_cm_flags)) + wait_for_completion(&qp->iwarp_cm_comp); + + /* If graceful disconnect started, the WAIT_FOR_DISCONNECT + * bit will be on, and we need to wait for the disconnect to + * complete before continuing. We can use the same completion, + * iwarp_cm_comp, since this is the only place that waits for + * this completion and it is sequential. In addition, + * disconnect can't occur before the connection is fully + * established, therefore if WAIT_FOR_DISCONNECT is on it + * means WAIT_FOR_CONNECT is also on and the completion for + * CONNECT already occurred. + */ + if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_DISCONNECT, + &qp->iwarp_cm_flags)) + wait_for_completion(&qp->iwarp_cm_comp); } if (qp->qp_type == IB_QPT_GSI) qedr_destroy_gsi_qp(dev); + /* We need to remove the entry from the xarray before we release the + * qp_id to avoid a race of the qp_id being reallocated and failing + * on xa_insert + */ + if (rdma_protocol_iwarp(&dev->ibdev, 1)) + xa_erase(&dev->qps, qp->qp_id); + qedr_free_qp_resources(dev, qp, udata); - if (atomic_dec_and_test(&qp->refcnt) && - rdma_protocol_iwarp(&dev->ibdev, 1)) { - xa_erase(&dev->qps, qp->qp_id); - kfree(qp); - } + if (rdma_protocol_iwarp(&dev->ibdev, 1)) + qedr_iw_qp_rem_ref(&qp->ibqp); + return 0; } -- cgit From 24e412c1e00ebfe73619e6b88cbc26c2c7d41b85 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Sun, 27 Oct 2019 22:04:51 +0200 Subject: RDMA/qedr: Fix memory leak in user qp and mr User QPs pbl's weren't freed properly. MR pbls weren't freed properly. Fixes: e0290cce6ac0 ("qedr: Add support for memory registeration verbs") Link: https://lore.kernel.org/r/20191027200451.28187-5-michal.kalderon@marvell.com Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qedr/verbs.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index a17b388ee3b3..8b4240c1cc76 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -1581,6 +1581,14 @@ static void qedr_cleanup_user(struct qedr_dev *dev, struct qedr_qp *qp) ib_umem_release(qp->urq.umem); qp->urq.umem = NULL; + + if (rdma_protocol_roce(&dev->ibdev, 1)) { + qedr_free_pbl(dev, &qp->usq.pbl_info, qp->usq.pbl_tbl); + qedr_free_pbl(dev, &qp->urq.pbl_info, qp->urq.pbl_tbl); + } else { + kfree(qp->usq.pbl_tbl); + kfree(qp->urq.pbl_tbl); + } } static int qedr_create_user_qp(struct qedr_dev *dev, @@ -2689,8 +2697,8 @@ int qedr_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid); - if ((mr->type != QEDR_MR_DMA) && (mr->type != QEDR_MR_FRMR)) - qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table); + if (mr->type != QEDR_MR_DMA) + free_mr_info(dev, &mr->info); /* it could be user registered memory. */ ib_umem_release(mr->umem); -- cgit From 5212c3fda2225af66a6a83afd9eb0a6f0c80b99c Mon Sep 17 00:00:00 2001 From: Potnuri Bharat Teja Date: Fri, 25 Oct 2019 16:27:02 +0530 Subject: RDMA/iw_cxgb4: Report correct port speed/width Query speed/width from corresponding netdev. Link: https://lore.kernel.org/r/1572001022-4533-1-git-send-email-bharat@chelsio.com Signed-off-by: Potnuri Bharat Teja Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/cxgb4/provider.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index d373ac0fe2cb..ba83d942997c 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -305,7 +305,10 @@ static int c4iw_query_device(struct ib_device *ibdev, struct ib_device_attr *pro static int c4iw_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props) { + int ret = 0; pr_debug("ibdev %p\n", ibdev); + ret = ib_get_eth_speed(ibdev, port, &props->active_speed, + &props->active_width); props->port_cap_flags = IB_PORT_CM_SUP | @@ -315,11 +318,9 @@ static int c4iw_query_port(struct ib_device *ibdev, u8 port, IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP; props->gid_tbl_len = 1; props->pkey_tbl_len = 1; - props->active_width = 2; - props->active_speed = IB_SPEED_DDR; props->max_msg_sz = -1; - return 0; + return ret; } static ssize_t hw_rev_show(struct device *dev, -- cgit From a52dc3a100958f4bf5e921067ba626c2caf8e55f Mon Sep 17 00:00:00 2001 From: Bryan Tan Date: Mon, 28 Oct 2019 18:14:52 +0000 Subject: RDMA/vmw_pvrdma: Use resource ids from physical device if available This change allows the RDMA stack to use physical resource numbers if they are passed up from the device. This is accomplished by separating the concept of the QP number from the QP handle. Previously, the two were the same, as the QP number was exposed to the guest and also used to reference a virtual QP in the device backend. With physical resource numbers exposed, the QP number given to the guest is the number assigned from the physical HCA's QP, while the QP handle is still the internal handle used to reference a virtual QP. Regardless of whether the device is exposing physical ids, the driver will still try to pick up the QP handle from the backend if possible. The MR keys exposed to the guest will also be the MR keys created by the physical HCA, instead of virtual MR keys. The distinction between handle and keys is already present for MRs so there is no need to do anything special here. A new version of the create QP response has been added to the device API to pass up the QP number and handle. The driver will also report these to userspace in the udata response if userspace supports it or not create the queuepair if not. I also had to do a refactor of the destroy qp code to reuse it if we fail to copy to userspace. Link: https://lore.kernel.org/r/20191028181444.19448-1-aditr@vmware.com Reviewed-by: Jorgen Hansen Signed-off-by: Adit Ranadive Signed-off-by: Bryan Tan Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h | 15 ++- drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c | 115 ++++++++++++++++------ 2 files changed, 101 insertions(+), 29 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h index 8f9749d54688..86a6c054ea26 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h @@ -58,7 +58,8 @@ #define PVRDMA_ROCEV1_VERSION 17 #define PVRDMA_ROCEV2_VERSION 18 #define PVRDMA_PPN64_VERSION 19 -#define PVRDMA_VERSION PVRDMA_PPN64_VERSION +#define PVRDMA_QPHANDLE_VERSION 20 +#define PVRDMA_VERSION PVRDMA_QPHANDLE_VERSION #define PVRDMA_BOARD_ID 1 #define PVRDMA_REV_ID 1 @@ -581,6 +582,17 @@ struct pvrdma_cmd_create_qp_resp { u32 max_inline_data; }; +struct pvrdma_cmd_create_qp_resp_v2 { + struct pvrdma_cmd_resp_hdr hdr; + u32 qpn; + u32 qp_handle; + u32 max_send_wr; + u32 max_recv_wr; + u32 max_send_sge; + u32 max_recv_sge; + u32 max_inline_data; +}; + struct pvrdma_cmd_modify_qp { struct pvrdma_cmd_hdr hdr; u32 qp_handle; @@ -663,6 +675,7 @@ union pvrdma_cmd_resp { struct pvrdma_cmd_create_cq_resp create_cq_resp; struct pvrdma_cmd_resize_cq_resp resize_cq_resp; struct pvrdma_cmd_create_qp_resp create_qp_resp; + struct pvrdma_cmd_create_qp_resp_v2 create_qp_resp_v2; struct pvrdma_cmd_query_qp_resp query_qp_resp; struct pvrdma_cmd_destroy_qp_resp destroy_qp_resp; struct pvrdma_cmd_create_srq_resp create_srq_resp; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c index bca6a58a442e..22daf2389d95 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c @@ -52,6 +52,9 @@ #include "pvrdma.h" +static void __pvrdma_destroy_qp(struct pvrdma_dev *dev, + struct pvrdma_qp *qp); + static inline void get_cqs(struct pvrdma_qp *qp, struct pvrdma_cq **send_cq, struct pvrdma_cq **recv_cq) { @@ -195,7 +198,9 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, union pvrdma_cmd_resp rsp; struct pvrdma_cmd_create_qp *cmd = &req.create_qp; struct pvrdma_cmd_create_qp_resp *resp = &rsp.create_qp_resp; + struct pvrdma_cmd_create_qp_resp_v2 *resp_v2 = &rsp.create_qp_resp_v2; struct pvrdma_create_qp ucmd; + struct pvrdma_create_qp_resp qp_resp = {}; unsigned long flags; int ret; bool is_srq = !!init_attr->srq; @@ -260,6 +265,15 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, goto err_qp; } + /* Userspace supports qpn and qp handles? */ + if (dev->dsr_version >= PVRDMA_QPHANDLE_VERSION && + udata->outlen < sizeof(qp_resp)) { + dev_warn(&dev->pdev->dev, + "create queuepair not supported\n"); + ret = -EOPNOTSUPP; + goto err_qp; + } + if (!is_srq) { /* set qp->sq.wqe_cnt, shift, buf_size.. */ qp->rumem = ib_umem_get(udata, ucmd.rbuf_addr, @@ -379,13 +393,33 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, } /* max_send_wr/_recv_wr/_send_sge/_recv_sge/_inline_data */ - qp->qp_handle = resp->qpn; qp->port = init_attr->port_num; - qp->ibqp.qp_num = resp->qpn; + + if (dev->dsr_version >= PVRDMA_QPHANDLE_VERSION) { + qp->ibqp.qp_num = resp_v2->qpn; + qp->qp_handle = resp_v2->qp_handle; + } else { + qp->ibqp.qp_num = resp->qpn; + qp->qp_handle = resp->qpn; + } + spin_lock_irqsave(&dev->qp_tbl_lock, flags); dev->qp_tbl[qp->qp_handle % dev->dsr->caps.max_qp] = qp; spin_unlock_irqrestore(&dev->qp_tbl_lock, flags); + if (udata) { + qp_resp.qpn = qp->ibqp.qp_num; + qp_resp.qp_handle = qp->qp_handle; + + if (ib_copy_to_udata(udata, &qp_resp, + min(udata->outlen, sizeof(qp_resp)))) { + dev_warn(&dev->pdev->dev, + "failed to copy back udata\n"); + __pvrdma_destroy_qp(dev, qp); + return ERR_PTR(-EINVAL); + } + } + return &qp->ibqp; err_pdir: @@ -400,27 +434,15 @@ err_qp: return ERR_PTR(ret); } -static void pvrdma_free_qp(struct pvrdma_qp *qp) +static void _pvrdma_free_qp(struct pvrdma_qp *qp) { + unsigned long flags; struct pvrdma_dev *dev = to_vdev(qp->ibqp.device); - struct pvrdma_cq *scq; - struct pvrdma_cq *rcq; - unsigned long flags, scq_flags, rcq_flags; - - /* In case cq is polling */ - get_cqs(qp, &scq, &rcq); - pvrdma_lock_cqs(scq, rcq, &scq_flags, &rcq_flags); - - _pvrdma_flush_cqe(qp, scq); - if (scq != rcq) - _pvrdma_flush_cqe(qp, rcq); spin_lock_irqsave(&dev->qp_tbl_lock, flags); dev->qp_tbl[qp->qp_handle] = NULL; spin_unlock_irqrestore(&dev->qp_tbl_lock, flags); - pvrdma_unlock_cqs(scq, rcq, &scq_flags, &rcq_flags); - if (refcount_dec_and_test(&qp->refcnt)) complete(&qp->free); wait_for_completion(&qp->free); @@ -435,34 +457,71 @@ static void pvrdma_free_qp(struct pvrdma_qp *qp) atomic_dec(&dev->num_qps); } -/** - * pvrdma_destroy_qp - destroy a queue pair - * @qp: the queue pair to destroy - * @udata: user data or null for kernel object - * - * @return: 0 on success. - */ -int pvrdma_destroy_qp(struct ib_qp *qp, struct ib_udata *udata) +static void pvrdma_free_qp(struct pvrdma_qp *qp) +{ + struct pvrdma_cq *scq; + struct pvrdma_cq *rcq; + unsigned long scq_flags, rcq_flags; + + /* In case cq is polling */ + get_cqs(qp, &scq, &rcq); + pvrdma_lock_cqs(scq, rcq, &scq_flags, &rcq_flags); + + _pvrdma_flush_cqe(qp, scq); + if (scq != rcq) + _pvrdma_flush_cqe(qp, rcq); + + /* + * We're now unlocking the CQs before clearing out the qp handle this + * should still be safe. We have destroyed the backend QP and flushed + * the CQEs so there should be no other completions for this QP. + */ + pvrdma_unlock_cqs(scq, rcq, &scq_flags, &rcq_flags); + + _pvrdma_free_qp(qp); +} + +static inline void _pvrdma_destroy_qp_work(struct pvrdma_dev *dev, + u32 qp_handle) { - struct pvrdma_qp *vqp = to_vqp(qp); union pvrdma_cmd_req req; struct pvrdma_cmd_destroy_qp *cmd = &req.destroy_qp; int ret; memset(cmd, 0, sizeof(*cmd)); cmd->hdr.cmd = PVRDMA_CMD_DESTROY_QP; - cmd->qp_handle = vqp->qp_handle; + cmd->qp_handle = qp_handle; - ret = pvrdma_cmd_post(to_vdev(qp->device), &req, NULL, 0); + ret = pvrdma_cmd_post(dev, &req, NULL, 0); if (ret < 0) - dev_warn(&to_vdev(qp->device)->pdev->dev, + dev_warn(&dev->pdev->dev, "destroy queuepair failed, error: %d\n", ret); +} +/** + * pvrdma_destroy_qp - destroy a queue pair + * @qp: the queue pair to destroy + * @udata: user data or null for kernel object + * + * @return: always 0. + */ +int pvrdma_destroy_qp(struct ib_qp *qp, struct ib_udata *udata) +{ + struct pvrdma_qp *vqp = to_vqp(qp); + + _pvrdma_destroy_qp_work(to_vdev(qp->device), vqp->qp_handle); pvrdma_free_qp(vqp); return 0; } +static void __pvrdma_destroy_qp(struct pvrdma_dev *dev, + struct pvrdma_qp *qp) +{ + _pvrdma_destroy_qp_work(dev, qp->qp_handle); + _pvrdma_free_qp(qp); +} + /** * pvrdma_modify_qp - modify queue pair attributes * @ibqp: the queue pair -- cgit From fb985e278a30224183fdf3d56e2f69cfdef88d4e Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 9 Oct 2019 13:09:21 -0300 Subject: RDMA/mlx5: Use SRCU properly in ODP prefetch When working with SRCU protected xarrays the xarray itself should be the SRCU 'update' point. Instead prefetch is using live as the SRCU update point and this prevents switching the locking design to use the xarray instead. To solve this the prefetch must only read from the xarray once, and hold on to the actual MR pointer for the duration of the async operation. Incrementing num_pending_prefetch delays destruction of the MR, so it is suitable. Prefetch calls directly to the pagefault_mr using the MR pointer and only does a single xarray lookup. All the testing if a MR is prefetchable or not is now done only in the prefetch code and removed from the pagefault critical path. Link: https://lore.kernel.org/r/20191009160934.3143-2-jgg@ziepe.ca Reviewed-by: Artemy Kovalyov Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/odp.c | 262 ++++++++++++++++++--------------------- 1 file changed, 121 insertions(+), 141 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 3f9478d19376..09dac97e4ca4 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -606,16 +606,13 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) wait_event(imr->q_leaf_free, !atomic_read(&imr->num_leaf_free)); } -#define MLX5_PF_FLAGS_PREFETCH BIT(0) #define MLX5_PF_FLAGS_DOWNGRADE BIT(1) -static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, - u64 io_virt, size_t bcnt, u32 *bytes_mapped, - u32 flags) +static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt, + u32 *bytes_mapped, u32 flags) { int npages = 0, current_seq, page_shift, ret, np; struct ib_umem_odp *odp_mr = to_ib_umem_odp(mr->umem); bool downgrade = flags & MLX5_PF_FLAGS_DOWNGRADE; - bool prefetch = flags & MLX5_PF_FLAGS_PREFETCH; u64 access_mask; u64 start_idx, page_mask; struct ib_umem_odp *odp; @@ -639,14 +636,6 @@ next_mr: start_idx = (io_virt - (mr->mmkey.iova & page_mask)) >> page_shift; access_mask = ODP_READ_ALLOWED_BIT; - if (prefetch && !downgrade && !odp->umem.writable) { - /* prefetch with write-access must - * be supported by the MR - */ - ret = -EINVAL; - goto out; - } - if (odp->umem.writable && !downgrade) access_mask |= ODP_WRITE_ALLOWED_BIT; @@ -681,7 +670,8 @@ next_mr: if (ret < 0) { if (ret != -EAGAIN) - mlx5_ib_err(dev, "Failed to update mkey page tables\n"); + mlx5_ib_err(mr->dev, + "Failed to update mkey page tables\n"); goto out; } @@ -700,8 +690,10 @@ next_mr: io_virt += size; next = odp_next(odp); if (unlikely(!next || ib_umem_start(next) != io_virt)) { - mlx5_ib_dbg(dev, "next implicit leaf removed at 0x%llx. got %p\n", - io_virt, next); + mlx5_ib_dbg( + mr->dev, + "next implicit leaf removed at 0x%llx. got %p\n", + io_virt, next); return -EAGAIN; } odp = next; @@ -718,7 +710,7 @@ out: if (!wait_for_completion_timeout(&odp->notifier_completion, timeout)) { mlx5_ib_warn( - dev, + mr->dev, "timeout waiting for mmu notifier. seq %d against %d. notifiers_count=%d\n", current_seq, odp->notifiers_seq, odp->notifiers_count); @@ -775,10 +767,9 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, struct ib_pd *pd, u32 key, u64 io_virt, size_t bcnt, u32 *bytes_committed, - u32 *bytes_mapped, u32 flags) + u32 *bytes_mapped) { int npages = 0, srcu_key, ret, i, outlen, cur_outlen = 0, depth = 0; - bool prefetch = flags & MLX5_PF_FLAGS_PREFETCH; struct pf_frame *head = NULL, *frame; struct mlx5_core_mkey *mmkey; struct mlx5_ib_mr *mr; @@ -800,12 +791,6 @@ next_mr: goto srcu_unlock; } - if (prefetch && mmkey->type != MLX5_MKEY_MR) { - mlx5_ib_dbg(dev, "prefetch is allowed only for MR\n"); - ret = -EINVAL; - goto srcu_unlock; - } - switch (mmkey->type) { case MLX5_MKEY_MR: mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); @@ -815,17 +800,6 @@ next_mr: goto srcu_unlock; } - if (prefetch) { - if (!is_odp_mr(mr) || - mr->ibmr.pd != pd) { - mlx5_ib_dbg(dev, "Invalid prefetch request: %s\n", - is_odp_mr(mr) ? "MR is not ODP" : - "PD is not of the MR"); - ret = -EINVAL; - goto srcu_unlock; - } - } - if (!is_odp_mr(mr)) { mlx5_ib_dbg(dev, "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n", key); @@ -835,7 +809,7 @@ next_mr: goto srcu_unlock; } - ret = pagefault_mr(dev, mr, io_virt, bcnt, bytes_mapped, flags); + ret = pagefault_mr(mr, io_virt, bcnt, bytes_mapped, 0); if (ret < 0) goto srcu_unlock; @@ -1009,7 +983,7 @@ static int pagefault_data_segments(struct mlx5_ib_dev *dev, ret = pagefault_single_data_segment(dev, NULL, key, io_virt, bcnt, &pfault->bytes_committed, - bytes_mapped, 0); + bytes_mapped); if (ret < 0) break; npages += ret; @@ -1292,8 +1266,7 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev, } ret = pagefault_single_data_segment(dev, NULL, rkey, address, length, - &pfault->bytes_committed, NULL, - 0); + &pfault->bytes_committed, NULL); if (ret == -EAGAIN) { /* We're racing with an invalidation, don't prefetch */ prefetch_activated = 0; @@ -1320,8 +1293,7 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev, ret = pagefault_single_data_segment(dev, NULL, rkey, address, prefetch_len, - &bytes_committed, NULL, - 0); + &bytes_committed, NULL); if (ret < 0 && ret != -EAGAIN) { mlx5_ib_dbg(dev, "Prefetch failed. ret: %d, QP 0x%x, address: 0x%.16llx, length = 0x%.16x\n", ret, pfault->token, address, prefetch_len); @@ -1624,114 +1596,138 @@ int mlx5_ib_odp_init(void) struct prefetch_mr_work { struct work_struct work; - struct ib_pd *pd; u32 pf_flags; u32 num_sge; - struct ib_sge sg_list[0]; + struct { + u64 io_virt; + struct mlx5_ib_mr *mr; + size_t length; + } frags[]; }; -static void num_pending_prefetch_dec(struct mlx5_ib_dev *dev, - struct ib_sge *sg_list, u32 num_sge, - u32 from) +static void destroy_prefetch_work(struct prefetch_mr_work *work) { u32 i; - int srcu_key; - - srcu_key = srcu_read_lock(&dev->mr_srcu); - for (i = from; i < num_sge; ++i) { - struct mlx5_core_mkey *mmkey; - struct mlx5_ib_mr *mr; - - mmkey = xa_load(&dev->mdev->priv.mkey_table, - mlx5_base_mkey(sg_list[i].lkey)); - mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); - atomic_dec(&mr->num_pending_prefetch); - } - - srcu_read_unlock(&dev->mr_srcu, srcu_key); + for (i = 0; i < work->num_sge; ++i) + atomic_dec(&work->frags[i].mr->num_pending_prefetch); + kvfree(work); } -static bool num_pending_prefetch_inc(struct ib_pd *pd, - struct ib_sge *sg_list, u32 num_sge) +static struct mlx5_ib_mr * +get_prefetchable_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, + u32 lkey) { struct mlx5_ib_dev *dev = to_mdev(pd->device); - bool ret = true; - u32 i; + struct mlx5_core_mkey *mmkey; + struct ib_umem_odp *odp; + struct mlx5_ib_mr *mr; - for (i = 0; i < num_sge; ++i) { - struct mlx5_core_mkey *mmkey; - struct mlx5_ib_mr *mr; + lockdep_assert_held(&dev->mr_srcu); - mmkey = xa_load(&dev->mdev->priv.mkey_table, - mlx5_base_mkey(sg_list[i].lkey)); - if (!mmkey || mmkey->key != sg_list[i].lkey) { - ret = false; - break; - } + mmkey = xa_load(&dev->mdev->priv.mkey_table, mlx5_base_mkey(lkey)); + if (!mmkey || mmkey->key != lkey || mmkey->type != MLX5_MKEY_MR) + return NULL; - if (mmkey->type != MLX5_MKEY_MR) { - ret = false; - break; - } + mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); - mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); + if (!smp_load_acquire(&mr->live)) + return NULL; - if (!smp_load_acquire(&mr->live)) { - ret = false; - break; - } + if (mr->ibmr.pd != pd || !is_odp_mr(mr)) + return NULL; - if (mr->ibmr.pd != pd) { - ret = false; - break; - } + /* + * Implicit child MRs are internal and userspace should not refer to + * them. + */ + if (mr->parent) + return NULL; - atomic_inc(&mr->num_pending_prefetch); - } + odp = to_ib_umem_odp(mr->umem); - if (!ret) - num_pending_prefetch_dec(dev, sg_list, i, 0); + /* prefetch with write-access must be supported by the MR */ + if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE && + !odp->umem.writable) + return NULL; - return ret; + return mr; } -static int mlx5_ib_prefetch_sg_list(struct ib_pd *pd, u32 pf_flags, - struct ib_sge *sg_list, u32 num_sge) +static void mlx5_ib_prefetch_mr_work(struct work_struct *w) { + struct prefetch_mr_work *work = + container_of(w, struct prefetch_mr_work, work); + u32 bytes_mapped = 0; u32 i; - int ret = 0; - struct mlx5_ib_dev *dev = to_mdev(pd->device); + + for (i = 0; i < work->num_sge; ++i) + pagefault_mr(work->frags[i].mr, work->frags[i].io_virt, + work->frags[i].length, &bytes_mapped, + work->pf_flags); + + destroy_prefetch_work(work); +} + +static bool init_prefetch_work(struct ib_pd *pd, + enum ib_uverbs_advise_mr_advice advice, + u32 pf_flags, struct prefetch_mr_work *work, + struct ib_sge *sg_list, u32 num_sge) +{ + u32 i; + + INIT_WORK(&work->work, mlx5_ib_prefetch_mr_work); + work->pf_flags = pf_flags; for (i = 0; i < num_sge; ++i) { - struct ib_sge *sg = &sg_list[i]; - int bytes_committed = 0; + work->frags[i].io_virt = sg_list[i].addr; + work->frags[i].length = sg_list[i].length; + work->frags[i].mr = + get_prefetchable_mr(pd, advice, sg_list[i].lkey); + if (!work->frags[i].mr) { + work->num_sge = i - 1; + if (i) + destroy_prefetch_work(work); + return false; + } - ret = pagefault_single_data_segment(dev, pd, sg->lkey, sg->addr, - sg->length, - &bytes_committed, NULL, - pf_flags); - if (ret < 0) - break; + /* Keep the MR pointer will valid outside the SRCU */ + atomic_inc(&work->frags[i].mr->num_pending_prefetch); } - - return ret < 0 ? ret : 0; + work->num_sge = num_sge; + return true; } -static void mlx5_ib_prefetch_mr_work(struct work_struct *work) +static int mlx5_ib_prefetch_sg_list(struct ib_pd *pd, + enum ib_uverbs_advise_mr_advice advice, + u32 pf_flags, struct ib_sge *sg_list, + u32 num_sge) { - struct prefetch_mr_work *w = - container_of(work, struct prefetch_mr_work, work); + struct mlx5_ib_dev *dev = to_mdev(pd->device); + u32 bytes_mapped = 0; + int srcu_key; + int ret = 0; + u32 i; - if (ib_device_try_get(w->pd->device)) { - mlx5_ib_prefetch_sg_list(w->pd, w->pf_flags, w->sg_list, - w->num_sge); - ib_device_put(w->pd->device); + srcu_key = srcu_read_lock(&dev->mr_srcu); + for (i = 0; i < num_sge; ++i) { + struct mlx5_ib_mr *mr; + + mr = get_prefetchable_mr(pd, advice, sg_list[i].lkey); + if (!mr) { + ret = -ENOENT; + goto out; + } + ret = pagefault_mr(mr, sg_list[i].addr, sg_list[i].length, + &bytes_mapped, pf_flags); + if (ret < 0) + goto out; } + ret = 0; - num_pending_prefetch_dec(to_mdev(w->pd->device), w->sg_list, - w->num_sge, 0); - kvfree(w); +out: + srcu_read_unlock(&dev->mr_srcu, srcu_key); + return ret; } int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, @@ -1739,43 +1735,27 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, u32 flags, struct ib_sge *sg_list, u32 num_sge) { struct mlx5_ib_dev *dev = to_mdev(pd->device); - u32 pf_flags = MLX5_PF_FLAGS_PREFETCH; + u32 pf_flags = 0; struct prefetch_mr_work *work; - bool valid_req; int srcu_key; if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH) pf_flags |= MLX5_PF_FLAGS_DOWNGRADE; if (flags & IB_UVERBS_ADVISE_MR_FLAG_FLUSH) - return mlx5_ib_prefetch_sg_list(pd, pf_flags, sg_list, + return mlx5_ib_prefetch_sg_list(pd, advice, pf_flags, sg_list, num_sge); - work = kvzalloc(struct_size(work, sg_list, num_sge), GFP_KERNEL); + work = kvzalloc(struct_size(work, frags, num_sge), GFP_KERNEL); if (!work) return -ENOMEM; - memcpy(work->sg_list, sg_list, num_sge * sizeof(struct ib_sge)); - - /* It is guaranteed that the pd when work is executed is the pd when - * work was queued since pd can't be destroyed while it holds MRs and - * destroying a MR leads to flushing the workquque - */ - work->pd = pd; - work->pf_flags = pf_flags; - work->num_sge = num_sge; - - INIT_WORK(&work->work, mlx5_ib_prefetch_mr_work); - srcu_key = srcu_read_lock(&dev->mr_srcu); - - valid_req = num_pending_prefetch_inc(pd, sg_list, num_sge); - if (valid_req) - queue_work(system_unbound_wq, &work->work); - else - kvfree(work); - + if (!init_prefetch_work(pd, advice, pf_flags, work, sg_list, num_sge)) { + srcu_read_unlock(&dev->mr_srcu, srcu_key); + return -EINVAL; + } + queue_work(system_unbound_wq, &work->work); srcu_read_unlock(&dev->mr_srcu, srcu_key); - - return valid_req ? 0 : -EINVAL; + return 0; } -- cgit From 50211ec9443ff2e16db43f691dfcc0ef435cf45d Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 9 Oct 2019 13:09:22 -0300 Subject: RDMA/mlx5: Split sig_err MR data into its own xarray The locking model for signature is completely different than ODP, do not share the same xarray that relies on SRCU locking to support ODP. Simply store the active mlx5_core_sig_ctx's in an xarray when signature MRs are created and rely on trivial xarray locking to serialize everything. The overhead of storing only a handful of SIG related MRs is going to be much less than an xarray full of every mkey. Link: https://lore.kernel.org/r/20191009160934.3143-3-jgg@ziepe.ca Reviewed-by: Artemy Kovalyov Reviewed-by: Max Gurtovoy Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/cq.c | 33 ++++++++++++++++----------------- drivers/infiniband/hw/mlx5/main.c | 2 ++ drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 ++ drivers/infiniband/hw/mlx5/mr.c | 8 ++++++++ 4 files changed, 28 insertions(+), 17 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 45f48cde6b9d..cc938d27f913 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -423,9 +423,6 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq, struct mlx5_cqe64 *cqe64; struct mlx5_core_qp *mqp; struct mlx5_ib_wq *wq; - struct mlx5_sig_err_cqe *sig_err_cqe; - struct mlx5_core_mkey *mmkey; - struct mlx5_ib_mr *mr; uint8_t opcode; uint32_t qpn; u16 wqe_ctr; @@ -519,27 +516,29 @@ repoll: } } break; - case MLX5_CQE_SIG_ERR: - sig_err_cqe = (struct mlx5_sig_err_cqe *)cqe64; + case MLX5_CQE_SIG_ERR: { + struct mlx5_sig_err_cqe *sig_err_cqe = + (struct mlx5_sig_err_cqe *)cqe64; + struct mlx5_core_sig_ctx *sig; - xa_lock(&dev->mdev->priv.mkey_table); - mmkey = xa_load(&dev->mdev->priv.mkey_table, + xa_lock(&dev->sig_mrs); + sig = xa_load(&dev->sig_mrs, mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey))); - mr = to_mibmr(mmkey); - get_sig_err_item(sig_err_cqe, &mr->sig->err_item); - mr->sig->sig_err_exists = true; - mr->sig->sigerr_count++; + get_sig_err_item(sig_err_cqe, &sig->err_item); + sig->sig_err_exists = true; + sig->sigerr_count++; mlx5_ib_warn(dev, "CQN: 0x%x Got SIGERR on key: 0x%x err_type %x err_offset %llx expected %x actual %x\n", - cq->mcq.cqn, mr->sig->err_item.key, - mr->sig->err_item.err_type, - mr->sig->err_item.sig_err_offset, - mr->sig->err_item.expected, - mr->sig->err_item.actual); + cq->mcq.cqn, sig->err_item.key, + sig->err_item.err_type, + sig->err_item.sig_err_offset, + sig->err_item.expected, + sig->err_item.actual); - xa_unlock(&dev->mdev->priv.mkey_table); + xa_unlock(&dev->sig_mrs); goto repoll; } + } return 0; } diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 831539419c30..b7eea724beaa 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -6150,6 +6150,7 @@ static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) cleanup_srcu_struct(&dev->mr_srcu); } + WARN_ON(!xa_empty(&dev->sig_mrs)); WARN_ON(!bitmap_empty(dev->dm.memic_alloc_pages, MLX5_MAX_MEMIC_PAGES)); } @@ -6201,6 +6202,7 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) mutex_init(&dev->cap_mask_mutex); INIT_LIST_HEAD(&dev->qp_list); spin_lock_init(&dev->reset_flow_resource_lock); + xa_init(&dev->sig_mrs); spin_lock_init(&dev->dm.lock); dev->dm.dev = mdev; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 1a98ee2e01c4..f4ce58354544 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -999,6 +999,8 @@ struct mlx5_ib_dev { struct mlx5_srq_table srq_table; struct mlx5_async_ctx async_ctx; struct mlx5_devx_event_table devx_event_table; + + struct xarray sig_mrs; }; static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 630599311586..fd24640606c1 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1560,6 +1560,7 @@ static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) mr->sig->psv_wire.psv_idx)) mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", mr->sig->psv_wire.psv_idx); + xa_erase(&dev->sig_mrs, mlx5_base_mkey(mr->mmkey.key)); kfree(mr->sig); mr->sig = NULL; } @@ -1797,8 +1798,15 @@ static int mlx5_alloc_integrity_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr, if (err) goto err_free_mtt_mr; + err = xa_err(xa_store(&dev->sig_mrs, mlx5_base_mkey(mr->mmkey.key), + mr->sig, GFP_KERNEL)); + if (err) + goto err_free_descs; return 0; +err_free_descs: + destroy_mkey(dev, mr); + mlx5_free_priv_descs(mr); err_free_mtt_mr: dereg_mr(to_mdev(mr->mtt_mr->ibmr.device), mr->mtt_mr); mr->mtt_mr = NULL; -- cgit From 806b101b2bfa800a9c779336b750bee39c7fb3b4 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 9 Oct 2019 13:09:23 -0300 Subject: RDMA/mlx5: Use a dedicated mkey xarray for ODP There is a per device xarray storing mkeys that is used to store every mkey in the system. However, this xarray is now only read by ODP for certain ODP designated MRs (ODP, implicit ODP, MW, DEVX_INDIRECT). Create an xarray only for use by ODP, that only contains ODP related MKeys. This xarray is protected by SRCU and all erases are protected by a synchronize. This improves performance: - All MRs in the odp_mkeys xarray are ODP MRs, so some tests for is_odp() can be deleted. The xarray will also consume fewer nodes. - normal MR's are never mixed with ODP MRs in a SRCU data structure so performance sucking synchronize_srcu() on every MR destruction is not needed. - No smp_load_acquire(live) and xa_load() double barrier on read Due to the SRCU locking scheme care must be taken with the placement of the xa_store(). Once it completes the MR is immediately visible to other threads and only through a xa_erase() & synchronize_srcu() cycle could it be destroyed. Link: https://lore.kernel.org/r/20191009160934.3143-4-jgg@ziepe.ca Reviewed-by: Artemy Kovalyov Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/devx.c | 8 ++-- drivers/infiniband/hw/mlx5/main.c | 17 ++++---- drivers/infiniband/hw/mlx5/mlx5_ib.h | 5 ++- drivers/infiniband/hw/mlx5/mr.c | 43 ++++++++++--------- drivers/infiniband/hw/mlx5/odp.c | 83 +++++++++++++++++++----------------- 5 files changed, 83 insertions(+), 73 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index d609f4659afb..6b1fca91d7d3 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -1265,8 +1265,8 @@ static int devx_handle_mkey_indirect(struct devx_obj *obj, mkey->pd = MLX5_GET(mkc, mkc, pd); devx_mr->ndescs = MLX5_GET(mkc, mkc, translations_octword_size); - return xa_err(xa_store(&dev->mdev->priv.mkey_table, - mlx5_base_mkey(mkey->key), mkey, GFP_KERNEL)); + return xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(mkey->key), mkey, + GFP_KERNEL)); } static int devx_handle_mkey_create(struct mlx5_ib_dev *dev, @@ -1345,9 +1345,9 @@ static int devx_obj_cleanup(struct ib_uobject *uobject, * the mmkey, we must wait for that to stop before freeing the * mkey, as another allocation could get the same mkey #. */ - xa_erase(&obj->ib_dev->mdev->priv.mkey_table, + xa_erase(&obj->ib_dev->odp_mkeys, mlx5_base_mkey(obj->devx_mr.mmkey.key)); - synchronize_srcu(&dev->mr_srcu); + synchronize_srcu(&dev->odp_srcu); } if (obj->flags & DEVX_OBJ_FLAGS_DCT) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index b7eea724beaa..4692c37b057c 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -6145,10 +6145,10 @@ static struct ib_counters *mlx5_ib_create_counters(struct ib_device *device, static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) { mlx5_ib_cleanup_multiport_master(dev); - if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { - srcu_barrier(&dev->mr_srcu); - cleanup_srcu_struct(&dev->mr_srcu); - } + WARN_ON(!xa_empty(&dev->odp_mkeys)); + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) + srcu_barrier(&dev->odp_srcu); + cleanup_srcu_struct(&dev->odp_srcu); WARN_ON(!xa_empty(&dev->sig_mrs)); WARN_ON(!bitmap_empty(dev->dm.memic_alloc_pages, MLX5_MAX_MEMIC_PAGES)); @@ -6202,16 +6202,15 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) mutex_init(&dev->cap_mask_mutex); INIT_LIST_HEAD(&dev->qp_list); spin_lock_init(&dev->reset_flow_resource_lock); + xa_init(&dev->odp_mkeys); xa_init(&dev->sig_mrs); spin_lock_init(&dev->dm.lock); dev->dm.dev = mdev; - if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { - err = init_srcu_struct(&dev->mr_srcu); - if (err) - goto err_mp; - } + err = init_srcu_struct(&dev->odp_srcu); + if (err) + goto err_mp; return 0; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index f4ce58354544..2cf91db6a36f 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -606,7 +606,6 @@ struct mlx5_ib_mr { struct mlx5_ib_dev *dev; u32 out[MLX5_ST_SZ_DW(create_mkey_out)]; struct mlx5_core_sig_ctx *sig; - unsigned int live; void *descs_alloc; int access_flags; /* Needed for rereg MR */ @@ -975,7 +974,9 @@ struct mlx5_ib_dev { * Sleepable RCU that prevents destruction of MRs while they are still * being used by a page fault handler. */ - struct srcu_struct mr_srcu; + struct srcu_struct odp_srcu; + struct xarray odp_mkeys; + u32 null_mkey; struct mlx5_ib_flow_db *flow_db; /* protect resources needed as part of reset flow */ diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index fd24640606c1..60b12dc53004 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -59,13 +59,9 @@ static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev) static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { - int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); + WARN_ON(xa_load(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key))); - if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) - /* Wait until all page fault handlers using the mr complete. */ - synchronize_srcu(&dev->mr_srcu); - - return err; + return mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); } static int order2idx(struct mlx5_ib_dev *dev, int order) @@ -218,9 +214,6 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num) mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); } - if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) - synchronize_srcu(&dev->mr_srcu); - list_for_each_entry_safe(mr, tmp_mr, &del_list, list) { list_del(&mr->list); kfree(mr); @@ -555,10 +548,6 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c) mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); } -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - synchronize_srcu(&dev->mr_srcu); -#endif - list_for_each_entry_safe(mr, tmp_mr, &del_list, list) { list_del(&mr->list); kfree(mr); @@ -1338,9 +1327,14 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (is_odp_mr(mr)) { to_ib_umem_odp(mr->umem)->private = mr; atomic_set(&mr->num_pending_prefetch, 0); + err = xa_err(xa_store(&dev->odp_mkeys, + mlx5_base_mkey(mr->mmkey.key), &mr->mmkey, + GFP_KERNEL)); + if (err) { + dereg_mr(dev, mr); + return ERR_PTR(err); + } } - if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) - smp_store_release(&mr->live, 1); return &mr->ibmr; error: @@ -1582,10 +1576,10 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) /* Prevent new page faults and * prefetch requests from succeeding */ - WRITE_ONCE(mr->live, 0); + xa_erase(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)); /* Wait for all running page-fault handlers to finish. */ - synchronize_srcu(&dev->mr_srcu); + synchronize_srcu(&dev->odp_srcu); /* dequeue pending prefetch requests for the mr */ if (atomic_read(&mr->num_pending_prefetch)) @@ -1959,9 +1953,19 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, } } + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { + err = xa_err(xa_store(&dev->odp_mkeys, + mlx5_base_mkey(mw->mmkey.key), &mw->mmkey, + GFP_KERNEL)); + if (err) + goto free_mkey; + } + kfree(in); return &mw->ibmw; +free_mkey: + mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey); free: kfree(mw); kfree(in); @@ -1975,13 +1979,12 @@ int mlx5_ib_dealloc_mw(struct ib_mw *mw) int err; if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { - xa_erase(&dev->mdev->priv.mkey_table, - mlx5_base_mkey(mmw->mmkey.key)); + xa_erase(&dev->odp_mkeys, mlx5_base_mkey(mmw->mmkey.key)); /* * pagefault_single_data_segment() may be accessing mmw under * SRCU if the user bound an ODP MR to this MW. */ - synchronize_srcu(&dev->mr_srcu); + synchronize_srcu(&dev->odp_srcu); } err = mlx5_core_destroy_mkey(dev->mdev, &mmw->mmkey); diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 09dac97e4ca4..1f06433bcfc8 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -199,7 +199,7 @@ void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset, * locking around the children list. */ lockdep_assert_held(&to_ib_umem_odp(mr->umem)->umem_mutex); - lockdep_assert_held(&mr->dev->mr_srcu); + lockdep_assert_held(&mr->dev->odp_srcu); odp = odp_lookup(offset * MLX5_IMR_MTT_SIZE, nentries * MLX5_IMR_MTT_SIZE, mr); @@ -229,16 +229,16 @@ static void mr_leaf_free_action(struct work_struct *work) int srcu_key; mr->parent = NULL; - synchronize_srcu(&mr->dev->mr_srcu); + synchronize_srcu(&mr->dev->odp_srcu); - if (smp_load_acquire(&imr->live)) { - srcu_key = srcu_read_lock(&mr->dev->mr_srcu); + if (xa_load(&mr->dev->odp_mkeys, mlx5_base_mkey(imr->mmkey.key))) { + srcu_key = srcu_read_lock(&mr->dev->odp_srcu); mutex_lock(&odp_imr->umem_mutex); mlx5_ib_update_xlt(imr, idx, 1, 0, MLX5_IB_UPD_XLT_INDIRECT | MLX5_IB_UPD_XLT_ATOMIC); mutex_unlock(&odp_imr->umem_mutex); - srcu_read_unlock(&mr->dev->mr_srcu, srcu_key); + srcu_read_unlock(&mr->dev->odp_srcu, srcu_key); } ib_umem_odp_release(odp); mlx5_mr_cache_free(mr->dev, mr); @@ -318,7 +318,7 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, if (unlikely(!umem_odp->npages && mr->parent && !umem_odp->dying)) { - WRITE_ONCE(mr->live, 0); + xa_erase(&mr->dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)); umem_odp->dying = 1; atomic_inc(&mr->parent->num_leaf_free); schedule_work(&umem_odp->work); @@ -430,6 +430,11 @@ static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd, if (IS_ERR(mr)) return mr; + err = xa_reserve(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key), + GFP_KERNEL); + if (err) + goto out_mr; + mr->ibmr.pd = pd; mr->dev = dev; @@ -455,7 +460,7 @@ static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd, } if (err) - goto fail; + goto out_release; mr->ibmr.lkey = mr->mmkey.key; mr->ibmr.rkey = mr->mmkey.key; @@ -465,7 +470,9 @@ static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd, return mr; -fail: +out_release: + xa_release(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)); +out_mr: mlx5_ib_err(dev, "Failed to register MKEY %d\n", err); mlx5_mr_cache_free(dev, mr); @@ -513,7 +520,8 @@ next_mr: mtt->parent = mr; INIT_WORK(&odp->work, mr_leaf_free_action); - smp_store_release(&mtt->live, 1); + xa_store(&dev->odp_mkeys, mlx5_base_mkey(mtt->mmkey.key), + &mtt->mmkey, GFP_ATOMIC); if (!nentries) start_idx = addr >> MLX5_IMR_MTT_SHIFT; @@ -567,7 +575,8 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, init_waitqueue_head(&imr->q_leaf_free); atomic_set(&imr->num_leaf_free, 0); atomic_set(&imr->num_pending_prefetch, 0); - smp_store_release(&imr->live, 1); + xa_store(&imr->dev->odp_mkeys, mlx5_base_mkey(imr->mmkey.key), + &imr->mmkey, GFP_ATOMIC); return imr; } @@ -778,13 +787,28 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, size_t offset; int ndescs; - srcu_key = srcu_read_lock(&dev->mr_srcu); + srcu_key = srcu_read_lock(&dev->odp_srcu); io_virt += *bytes_committed; bcnt -= *bytes_committed; next_mr: - mmkey = xa_load(&dev->mdev->priv.mkey_table, mlx5_base_mkey(key)); + mmkey = xa_load(&dev->odp_mkeys, mlx5_base_mkey(key)); + if (!mmkey) { + mlx5_ib_dbg( + dev, + "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n", + key); + if (bytes_mapped) + *bytes_mapped += bcnt; + /* + * The user could specify a SGL with multiple lkeys and only + * some of them are ODP. Treat the non-ODP ones as fully + * faulted. + */ + ret = 0; + goto srcu_unlock; + } if (!mkey_is_eq(mmkey, key)) { mlx5_ib_dbg(dev, "failed to find mkey %x\n", key); ret = -EFAULT; @@ -794,20 +818,6 @@ next_mr: switch (mmkey->type) { case MLX5_MKEY_MR: mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); - if (!smp_load_acquire(&mr->live) || !mr->ibmr.pd) { - mlx5_ib_dbg(dev, "got dead MR\n"); - ret = -EFAULT; - goto srcu_unlock; - } - - if (!is_odp_mr(mr)) { - mlx5_ib_dbg(dev, "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n", - key); - if (bytes_mapped) - *bytes_mapped += bcnt; - ret = 0; - goto srcu_unlock; - } ret = pagefault_mr(mr, io_virt, bcnt, bytes_mapped, 0); if (ret < 0) @@ -902,7 +912,7 @@ srcu_unlock: } kfree(out); - srcu_read_unlock(&dev->mr_srcu, srcu_key); + srcu_read_unlock(&dev->odp_srcu, srcu_key); *bytes_committed = 0; return ret ? ret : npages; } @@ -1623,18 +1633,15 @@ get_prefetchable_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, struct ib_umem_odp *odp; struct mlx5_ib_mr *mr; - lockdep_assert_held(&dev->mr_srcu); + lockdep_assert_held(&dev->odp_srcu); - mmkey = xa_load(&dev->mdev->priv.mkey_table, mlx5_base_mkey(lkey)); + mmkey = xa_load(&dev->odp_mkeys, mlx5_base_mkey(lkey)); if (!mmkey || mmkey->key != lkey || mmkey->type != MLX5_MKEY_MR) return NULL; mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); - if (!smp_load_acquire(&mr->live)) - return NULL; - - if (mr->ibmr.pd != pd || !is_odp_mr(mr)) + if (mr->ibmr.pd != pd) return NULL; /* @@ -1709,7 +1716,7 @@ static int mlx5_ib_prefetch_sg_list(struct ib_pd *pd, int ret = 0; u32 i; - srcu_key = srcu_read_lock(&dev->mr_srcu); + srcu_key = srcu_read_lock(&dev->odp_srcu); for (i = 0; i < num_sge; ++i) { struct mlx5_ib_mr *mr; @@ -1726,7 +1733,7 @@ static int mlx5_ib_prefetch_sg_list(struct ib_pd *pd, ret = 0; out: - srcu_read_unlock(&dev->mr_srcu, srcu_key); + srcu_read_unlock(&dev->odp_srcu, srcu_key); return ret; } @@ -1750,12 +1757,12 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, if (!work) return -ENOMEM; - srcu_key = srcu_read_lock(&dev->mr_srcu); + srcu_key = srcu_read_lock(&dev->odp_srcu); if (!init_prefetch_work(pd, advice, pf_flags, work, sg_list, num_sge)) { - srcu_read_unlock(&dev->mr_srcu, srcu_key); + srcu_read_unlock(&dev->odp_srcu, srcu_key); return -EINVAL; } queue_work(system_unbound_wq, &work->work); - srcu_read_unlock(&dev->mr_srcu, srcu_key); + srcu_read_unlock(&dev->odp_srcu, srcu_key); return 0; } -- cgit From 74bddb3682f60df16ba24be335c94de348ba1b07 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 9 Oct 2019 13:09:24 -0300 Subject: RDMA/mlx5: Delete struct mlx5_priv->mkey_table No users are left, delete it. Link: https://lore.kernel.org/r/20191009160934.3143-5-jgg@ziepe.ca Reviewed-by: Artemy Kovalyov Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mr.c | 9 --------- 1 file changed, 9 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 60b12dc53004..fd94838a8845 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -90,8 +90,6 @@ static void reg_mr_callback(int status, struct mlx5_async_work *context) struct mlx5_cache_ent *ent = &cache->ent[c]; u8 key; unsigned long flags; - struct xarray *mkeys = &dev->mdev->priv.mkey_table; - int err; spin_lock_irqsave(&ent->lock, flags); ent->pending--; @@ -118,13 +116,6 @@ static void reg_mr_callback(int status, struct mlx5_async_work *context) ent->size++; spin_unlock_irqrestore(&ent->lock, flags); - xa_lock_irqsave(mkeys, flags); - err = xa_err(__xa_store(mkeys, mlx5_base_mkey(mr->mmkey.key), - &mr->mmkey, GFP_ATOMIC)); - xa_unlock_irqrestore(mkeys, flags); - if (err) - pr_err("Error inserting to mkey tree. 0x%x\n", -err); - if (!completion_done(&ent->compl)) complete(&ent->compl); } -- cgit From 3d5f3c54e7bc82a279c80c18087462c0ce00ba44 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 9 Oct 2019 13:09:25 -0300 Subject: RDMA/mlx5: Rework implicit_mr_get_data This function is intended to loop across each MTT chunk in the implicit parent that intersects the range [io_virt, io_virt+bnct). But it is has a confusing construction, so: - Consistently use imr and odp_imr to refer to the implicit parent to avoid confusion with the normal mr and odp of the child - Directly compute the inclusive start/end indexes by shifting. This is clearer to understand the intent and avoids any errors from unaligned values of addr - Iterate directly over the range of MTT indexes, do not make a loop out of goto - Follow 'success oriented flow', with goto error unwind - Directly calculate the range of idx's that need update_xlt - Ensure that any leaf MR added to the interval tree always results in an update to the XLT Link: https://lore.kernel.org/r/20191009160934.3143-6-jgg@ziepe.ca Reviewed-by: Artemy Kovalyov Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/odp.c | 123 ++++++++++++++++++++++----------------- 1 file changed, 69 insertions(+), 54 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 1f06433bcfc8..78e31dc694d9 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -479,78 +479,93 @@ out_mr: return ERR_PTR(err); } -static struct ib_umem_odp *implicit_mr_get_data(struct mlx5_ib_mr *mr, - u64 io_virt, size_t bcnt) +static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, + unsigned long idx) { - struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.pd->device); - struct ib_umem_odp *odp, *result = NULL; - struct ib_umem_odp *odp_mr = to_ib_umem_odp(mr->umem); - u64 addr = io_virt & MLX5_IMR_MTT_MASK; - int nentries = 0, start_idx = 0, ret; + struct ib_umem_odp *odp; struct mlx5_ib_mr *mtt; - mutex_lock(&odp_mr->umem_mutex); - odp = odp_lookup(addr, 1, mr); + odp = ib_umem_odp_alloc_child(to_ib_umem_odp(imr->umem), + idx * MLX5_IMR_MTT_SIZE, + MLX5_IMR_MTT_SIZE); + if (IS_ERR(odp)) + return ERR_CAST(odp); - mlx5_ib_dbg(dev, "io_virt:%llx bcnt:%zx addr:%llx odp:%p\n", - io_virt, bcnt, addr, odp); + mtt = implicit_mr_alloc(imr->ibmr.pd, odp, 0, imr->access_flags); + if (IS_ERR(mtt)) { + ib_umem_odp_release(odp); + return mtt; + } -next_mr: - if (likely(odp)) { - if (nentries) - nentries++; - } else { - odp = ib_umem_odp_alloc_child(odp_mr, addr, MLX5_IMR_MTT_SIZE); - if (IS_ERR(odp)) { - mutex_unlock(&odp_mr->umem_mutex); - return ERR_CAST(odp); - } + odp->private = mtt; + mtt->umem = &odp->umem; + mtt->mmkey.iova = idx * MLX5_IMR_MTT_SIZE; + mtt->parent = imr; + INIT_WORK(&odp->work, mr_leaf_free_action); - mtt = implicit_mr_alloc(mr->ibmr.pd, odp, 0, - mr->access_flags); - if (IS_ERR(mtt)) { - mutex_unlock(&odp_mr->umem_mutex); - ib_umem_odp_release(odp); - return ERR_CAST(mtt); - } + xa_store(&mtt->dev->odp_mkeys, mlx5_base_mkey(mtt->mmkey.key), + &mtt->mmkey, GFP_ATOMIC); + return mtt; +} - odp->private = mtt; - mtt->umem = &odp->umem; - mtt->mmkey.iova = addr; - mtt->parent = mr; - INIT_WORK(&odp->work, mr_leaf_free_action); +static struct ib_umem_odp *implicit_mr_get_data(struct mlx5_ib_mr *imr, + u64 io_virt, size_t bcnt) +{ + struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem); + unsigned long end_idx = (io_virt + bcnt - 1) >> MLX5_IMR_MTT_SHIFT; + unsigned long idx = io_virt >> MLX5_IMR_MTT_SHIFT; + unsigned long inv_start_idx = end_idx + 1; + unsigned long inv_len = 0; + struct ib_umem_odp *result = NULL; + struct ib_umem_odp *odp; + int ret; - xa_store(&dev->odp_mkeys, mlx5_base_mkey(mtt->mmkey.key), - &mtt->mmkey, GFP_ATOMIC); + mutex_lock(&odp_imr->umem_mutex); + odp = odp_lookup(idx * MLX5_IMR_MTT_SIZE, 1, imr); + for (idx = idx; idx <= end_idx; idx++) { + if (unlikely(!odp)) { + struct mlx5_ib_mr *mtt; - if (!nentries) - start_idx = addr >> MLX5_IMR_MTT_SHIFT; - nentries++; - } + mtt = implicit_get_child_mr(imr, idx); + if (IS_ERR(mtt)) { + result = ERR_CAST(mtt); + goto out; + } + odp = to_ib_umem_odp(mtt->umem); + inv_start_idx = min(inv_start_idx, idx); + inv_len = idx - inv_start_idx + 1; + } - /* Return first odp if region not covered by single one */ - if (likely(!result)) - result = odp; + /* Return first odp if region not covered by single one */ + if (likely(!result)) + result = odp; - addr += MLX5_IMR_MTT_SIZE; - if (unlikely(addr < io_virt + bcnt)) { odp = odp_next(odp); - if (odp && ib_umem_start(odp) != addr) + if (odp && ib_umem_start(odp) != idx * MLX5_IMR_MTT_SIZE) odp = NULL; - goto next_mr; } - if (unlikely(nentries)) { - ret = mlx5_ib_update_xlt(mr, start_idx, nentries, 0, - MLX5_IB_UPD_XLT_INDIRECT | + /* + * Any time the children in the interval tree are changed we must + * perform an update of the xlt before exiting to ensure the HW and + * the tree remains synchronized. + */ +out: + if (likely(!inv_len)) + goto out_unlock; + + ret = mlx5_ib_update_xlt(imr, inv_start_idx, inv_len, 0, + MLX5_IB_UPD_XLT_INDIRECT | MLX5_IB_UPD_XLT_ATOMIC); - if (ret) { - mlx5_ib_err(dev, "Failed to update PAS\n"); - result = ERR_PTR(ret); - } + if (ret) { + mlx5_ib_err(to_mdev(imr->ibmr.pd->device), + "Failed to update PAS\n"); + result = ERR_PTR(ret); + goto out_unlock; } - mutex_unlock(&odp_mr->umem_mutex); +out_unlock: + mutex_unlock(&odp_imr->umem_mutex); return result; } -- cgit From c2edcd69351f681594a30b17b7fbc5259a038fb0 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 9 Oct 2019 13:09:26 -0300 Subject: RDMA/mlx5: Lift implicit_mr_alloc() into the two routines that call it This makes the routines easier to understand, particularly with respect the locking requirements of the entire sequence. The implicit_mr_alloc() had a lot of ifs specializing it to each of the callers, and only a very small amount of code was actually shared. Following patches will cause the flow in the two functions to diverge further. Link: https://lore.kernel.org/r/20191009160934.3143-7-jgg@ziepe.ca Reviewed-by: Artemy Kovalyov Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/odp.c | 151 +++++++++++++++++++-------------------- 1 file changed, 74 insertions(+), 77 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 78e31dc694d9..dfaa39fc4d3f 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -416,96 +416,66 @@ static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev, wq_num, err); } -static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd, - struct ib_umem_odp *umem_odp, - bool ksm, int access_flags) +static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, + unsigned long idx) { - struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct ib_umem_odp *odp; struct mlx5_ib_mr *mr; + struct mlx5_ib_mr *ret; int err; - mr = mlx5_mr_cache_alloc(dev, ksm ? MLX5_IMR_KSM_CACHE_ENTRY : - MLX5_IMR_MTT_CACHE_ENTRY); + odp = ib_umem_odp_alloc_child(to_ib_umem_odp(imr->umem), + idx * MLX5_IMR_MTT_SIZE, + MLX5_IMR_MTT_SIZE); + if (IS_ERR(odp)) + return ERR_CAST(odp); + ret = mr = mlx5_mr_cache_alloc(imr->dev, MLX5_IMR_MTT_CACHE_ENTRY); if (IS_ERR(mr)) - return mr; + goto out_umem; - err = xa_reserve(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key), + err = xa_reserve(&imr->dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key), GFP_KERNEL); - if (err) + if (err) { + ret = ERR_PTR(err); goto out_mr; - - mr->ibmr.pd = pd; - - mr->dev = dev; - mr->access_flags = access_flags; - mr->mmkey.iova = 0; - mr->umem = &umem_odp->umem; - - if (ksm) { - err = mlx5_ib_update_xlt(mr, 0, - mlx5_imr_ksm_entries, - MLX5_KSM_PAGE_SHIFT, - MLX5_IB_UPD_XLT_INDIRECT | - MLX5_IB_UPD_XLT_ZAP | - MLX5_IB_UPD_XLT_ENABLE); - - } else { - err = mlx5_ib_update_xlt(mr, 0, - MLX5_IMR_MTT_ENTRIES, - PAGE_SHIFT, - MLX5_IB_UPD_XLT_ZAP | - MLX5_IB_UPD_XLT_ENABLE | - MLX5_IB_UPD_XLT_ATOMIC); } - if (err) - goto out_release; - + mr->ibmr.pd = imr->ibmr.pd; + mr->access_flags = imr->access_flags; + mr->umem = &odp->umem; mr->ibmr.lkey = mr->mmkey.key; mr->ibmr.rkey = mr->mmkey.key; + mr->mmkey.iova = 0; + mr->parent = imr; + odp->private = mr; + INIT_WORK(&odp->work, mr_leaf_free_action); + + err = mlx5_ib_update_xlt(mr, 0, + MLX5_IMR_MTT_ENTRIES, + PAGE_SHIFT, + MLX5_IB_UPD_XLT_ZAP | + MLX5_IB_UPD_XLT_ENABLE | + MLX5_IB_UPD_XLT_ATOMIC); + if (err) { + ret = ERR_PTR(err); + goto out_release; + } - mlx5_ib_dbg(dev, "key %x dev %p mr %p\n", - mr->mmkey.key, dev->mdev, mr); + mr->mmkey.iova = idx * MLX5_IMR_MTT_SIZE; + xa_store(&imr->dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key), + &mr->mmkey, GFP_ATOMIC); + mlx5_ib_dbg(imr->dev, "key %x mr %p\n", mr->mmkey.key, mr); return mr; out_release: - xa_release(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)); + xa_release(&imr->dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)); out_mr: - mlx5_ib_err(dev, "Failed to register MKEY %d\n", err); - mlx5_mr_cache_free(dev, mr); - - return ERR_PTR(err); -} - -static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, - unsigned long idx) -{ - struct ib_umem_odp *odp; - struct mlx5_ib_mr *mtt; - - odp = ib_umem_odp_alloc_child(to_ib_umem_odp(imr->umem), - idx * MLX5_IMR_MTT_SIZE, - MLX5_IMR_MTT_SIZE); - if (IS_ERR(odp)) - return ERR_CAST(odp); - - mtt = implicit_mr_alloc(imr->ibmr.pd, odp, 0, imr->access_flags); - if (IS_ERR(mtt)) { - ib_umem_odp_release(odp); - return mtt; - } - - odp->private = mtt; - mtt->umem = &odp->umem; - mtt->mmkey.iova = idx * MLX5_IMR_MTT_SIZE; - mtt->parent = imr; - INIT_WORK(&odp->work, mr_leaf_free_action); - - xa_store(&mtt->dev->odp_mkeys, mlx5_base_mkey(mtt->mmkey.key), - &mtt->mmkey, GFP_ATOMIC); - return mtt; + mlx5_mr_cache_free(imr->dev, mr); +out_umem: + ib_umem_odp_release(odp); + return ret; } static struct ib_umem_odp *implicit_mr_get_data(struct mlx5_ib_mr *imr, @@ -573,27 +543,54 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, struct ib_udata *udata, int access_flags) { - struct mlx5_ib_mr *imr; + struct mlx5_ib_dev *dev = to_mdev(pd->ibpd.device); struct ib_umem_odp *umem_odp; + struct mlx5_ib_mr *imr; + int err; umem_odp = ib_umem_odp_alloc_implicit(udata, access_flags); if (IS_ERR(umem_odp)) return ERR_CAST(umem_odp); - imr = implicit_mr_alloc(&pd->ibpd, umem_odp, 1, access_flags); + imr = mlx5_mr_cache_alloc(dev, MLX5_IMR_KSM_CACHE_ENTRY); if (IS_ERR(imr)) { - ib_umem_odp_release(umem_odp); - return ERR_CAST(imr); + err = PTR_ERR(imr); + goto out_umem; } + imr->ibmr.pd = &pd->ibpd; + imr->access_flags = access_flags; + imr->mmkey.iova = 0; + imr->umem = &umem_odp->umem; + imr->ibmr.lkey = imr->mmkey.key; + imr->ibmr.rkey = imr->mmkey.key; imr->umem = &umem_odp->umem; init_waitqueue_head(&imr->q_leaf_free); atomic_set(&imr->num_leaf_free, 0); atomic_set(&imr->num_pending_prefetch, 0); - xa_store(&imr->dev->odp_mkeys, mlx5_base_mkey(imr->mmkey.key), - &imr->mmkey, GFP_ATOMIC); + err = mlx5_ib_update_xlt(imr, 0, + mlx5_imr_ksm_entries, + MLX5_KSM_PAGE_SHIFT, + MLX5_IB_UPD_XLT_INDIRECT | + MLX5_IB_UPD_XLT_ZAP | + MLX5_IB_UPD_XLT_ENABLE); + if (err) + goto out_mr; + + err = xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(imr->mmkey.key), + &imr->mmkey, GFP_KERNEL)); + if (err) + goto out_mr; + + mlx5_ib_dbg(dev, "key %x mr %p\n", imr->mmkey.key, imr); return imr; +out_mr: + mlx5_ib_err(dev, "Failed to register MKEY %d\n", err); + mlx5_mr_cache_free(dev, imr); +out_umem: + ib_umem_odp_release(umem_odp); + return ERR_PTR(err); } void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) -- cgit From 9162420dde49c9a8f4819f28bf2d5c675fb12552 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 9 Oct 2019 13:09:27 -0300 Subject: RDMA/mlx5: Set the HW IOVA of the child MRs to their place in the tree Instead of rewriting all the IOVA's to 0 as things progress down the tree make the IOVA of the children equal to placement in the tree. This makes things easier to understand by keeping mmkey.iova == HW configuration. Link: https://lore.kernel.org/r/20191009160934.3143-8-jgg@ziepe.ca Reviewed-by: Artemy Kovalyov Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/odp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index dfaa39fc4d3f..b25cf7836544 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -211,9 +211,11 @@ void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset, struct mlx5_ib_mr *mtt = odp->private; pklm->key = cpu_to_be32(mtt->ibmr.lkey); + pklm->va = cpu_to_be64(va); odp = odp_next(odp); } else { pklm->key = cpu_to_be32(dev->null_mkey); + pklm->va = 0; } mlx5_ib_dbg(dev, "[%d] va %lx key %x\n", i, va, be32_to_cpu(pklm->key)); @@ -446,7 +448,7 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, mr->umem = &odp->umem; mr->ibmr.lkey = mr->mmkey.key; mr->ibmr.rkey = mr->mmkey.key; - mr->mmkey.iova = 0; + mr->mmkey.iova = idx * MLX5_IMR_MTT_SIZE; mr->parent = imr; odp->private = mr; INIT_WORK(&odp->work, mr_leaf_free_action); @@ -462,7 +464,6 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, goto out_release; } - mr->mmkey.iova = idx * MLX5_IMR_MTT_SIZE; xa_store(&imr->dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key), &mr->mmkey, GFP_ATOMIC); -- cgit From 54375e7382952daded7002d1618eadaae859cecb Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 9 Oct 2019 13:09:28 -0300 Subject: RDMA/mlx5: Split implicit handling from pagefault_mr The single routine has a very confusing scheme to advance to the next child MR when working on an implicit parent. This scheme can only be used when working with an implicit parent and must not be triggered when working on a normal MR. Re-arrange things by directly putting all the single-MR stuff into one function and calling it in a loop for the implicit case. Simplify some of the error handling in the new pagefault_real_mr() to remove unneeded gotos. Link: https://lore.kernel.org/r/20191009160934.3143-9-jgg@ziepe.ca Reviewed-by: Artemy Kovalyov Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/odp.c | 125 ++++++++++++++++++++++++--------------- 1 file changed, 76 insertions(+), 49 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index b25cf7836544..8f43af6580ce 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -629,33 +629,18 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) } #define MLX5_PF_FLAGS_DOWNGRADE BIT(1) -static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt, - u32 *bytes_mapped, u32 flags) +static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp, + u64 user_va, size_t bcnt, u32 *bytes_mapped, + u32 flags) { - int npages = 0, current_seq, page_shift, ret, np; - struct ib_umem_odp *odp_mr = to_ib_umem_odp(mr->umem); + int current_seq, page_shift, ret, np; bool downgrade = flags & MLX5_PF_FLAGS_DOWNGRADE; u64 access_mask; u64 start_idx, page_mask; - struct ib_umem_odp *odp; - size_t size; - - if (odp_mr->is_implicit_odp) { - odp = implicit_mr_get_data(mr, io_virt, bcnt); - - if (IS_ERR(odp)) - return PTR_ERR(odp); - mr = odp->private; - } else { - odp = odp_mr; - } - -next_mr: - size = min_t(size_t, bcnt, ib_umem_end(odp) - io_virt); page_shift = odp->page_shift; page_mask = ~(BIT(page_shift) - 1); - start_idx = (io_virt - (mr->mmkey.iova & page_mask)) >> page_shift; + start_idx = (user_va - (mr->mmkey.iova & page_mask)) >> page_shift; access_mask = ODP_READ_ALLOWED_BIT; if (odp->umem.writable && !downgrade) @@ -668,13 +653,10 @@ next_mr: */ smp_rmb(); - ret = ib_umem_odp_map_dma_pages(odp, io_virt, size, access_mask, - current_seq); - - if (ret < 0) - goto out; - - np = ret; + np = ib_umem_odp_map_dma_pages(odp, user_va, bcnt, access_mask, + current_seq); + if (np < 0) + return np; mutex_lock(&odp->umem_mutex); if (!ib_umem_mmu_notifier_retry(odp, current_seq)) { @@ -699,31 +681,12 @@ next_mr: if (bytes_mapped) { u32 new_mappings = (np << page_shift) - - (io_virt - round_down(io_virt, 1 << page_shift)); - *bytes_mapped += min_t(u32, new_mappings, size); - } - - npages += np << (page_shift - PAGE_SHIFT); - bcnt -= size; + (user_va - round_down(user_va, 1 << page_shift)); - if (unlikely(bcnt)) { - struct ib_umem_odp *next; - - io_virt += size; - next = odp_next(odp); - if (unlikely(!next || ib_umem_start(next) != io_virt)) { - mlx5_ib_dbg( - mr->dev, - "next implicit leaf removed at 0x%llx. got %p\n", - io_virt, next); - return -EAGAIN; - } - odp = next; - mr = odp->private; - goto next_mr; + *bytes_mapped += min_t(u32, new_mappings, bcnt); } - return npages; + return np << (page_shift - PAGE_SHIFT); out: if (ret == -EAGAIN) { @@ -742,6 +705,70 @@ out: return ret; } +/* + * Returns: + * -EFAULT: The io_virt->bcnt is not within the MR, it covers pages that are + * not accessible, or the MR is no longer valid. + * -EAGAIN/-ENOMEM: The operation should be retried + * + * -EINVAL/others: General internal malfunction + * >0: Number of pages mapped + */ +static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt, + u32 *bytes_mapped, u32 flags) +{ + struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); + struct ib_umem_odp *child; + int npages = 0; + + if (!odp->is_implicit_odp) { + if (unlikely(io_virt < ib_umem_start(odp) || + ib_umem_end(odp) - io_virt < bcnt)) + return -EFAULT; + return pagefault_real_mr(mr, odp, io_virt, bcnt, bytes_mapped, + flags); + } + + if (unlikely(io_virt >= mlx5_imr_ksm_entries * MLX5_IMR_MTT_SIZE || + mlx5_imr_ksm_entries * MLX5_IMR_MTT_SIZE - io_virt < bcnt)) + return -EFAULT; + + child = implicit_mr_get_data(mr, io_virt, bcnt); + if (IS_ERR(child)) + return PTR_ERR(child); + + /* Fault each child mr that intersects with our interval. */ + while (bcnt) { + u64 end = min_t(u64, io_virt + bcnt, ib_umem_end(child)); + u64 len = end - io_virt; + int ret; + + ret = pagefault_real_mr(child->private, child, io_virt, len, + bytes_mapped, flags); + if (ret < 0) + return ret; + io_virt += len; + bcnt -= len; + npages += ret; + + if (unlikely(bcnt)) { + child = odp_next(child); + /* + * implicit_mr_get_data sets up all the leaves, this + * means they got invalidated before we got to them. + */ + if (!child || ib_umem_start(child) != io_virt) { + mlx5_ib_dbg( + mr->dev, + "next implicit leaf removed at 0x%llx.\n", + io_virt); + return -EAGAIN; + } + } + } + return npages; +} + struct pf_frame { struct pf_frame *next; u32 key; -- cgit From 423f52d65005e8f5067d94bd4f41d8a7d8388135 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 9 Oct 2019 13:09:29 -0300 Subject: RDMA/mlx5: Use an xarray for the children of an implicit ODP Currently the child leaves are stored in the shared interval tree and every lookup for a child must be done under the interval tree rwsem. This is further complicated by dropping the rwsem during iteration (ie the odp_lookup(), odp_next() pattern), which requires a very tricky an difficult to understand locking scheme with SRCU. Instead reserve the interval tree for the exclusive use of the mmu notifier related code in umem_odp.c and give each implicit MR a xarray containing all the child MRs. Since the size of each child is 1GB of VA, a 1 level xarray will index 64G of VA, and a 2 level will index 2TB, making xarray a much better data structure choice than an interval tree. The locking properties of xarray will be used in the next patches to rework the implicit ODP locking scheme into something simpler. At this point, the xarray is locked by the implicit MR's umem_mutex, and read can also be locked by the odp_srcu. Link: https://lore.kernel.org/r/20191009160934.3143-10-jgg@ziepe.ca Reviewed-by: Artemy Kovalyov Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 5 +- drivers/infiniband/hw/mlx5/odp.c | 195 +++++++++++------------------------ 2 files changed, 67 insertions(+), 133 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 2cf91db6a36f..88769fcffb5a 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -617,10 +617,13 @@ struct mlx5_ib_mr { u64 data_iova; u64 pi_iova; + /* For ODP and implicit */ atomic_t num_leaf_free; wait_queue_head_t q_leaf_free; - struct mlx5_async_work cb_work; atomic_t num_pending_prefetch; + struct xarray implicit_children; + + struct mlx5_async_work cb_work; }; static inline bool is_odp_mr(struct mlx5_ib_mr *mr) diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 8f43af6580ce..6f7eea175c72 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -93,132 +93,54 @@ struct mlx5_pagefault { static u64 mlx5_imr_ksm_entries; -static int check_parent(struct ib_umem_odp *odp, - struct mlx5_ib_mr *parent) +void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries, + struct mlx5_ib_mr *imr, int flags) { - struct mlx5_ib_mr *mr = odp->private; - - return mr && mr->parent == parent && !odp->dying; -} - -static struct ib_ucontext_per_mm *mr_to_per_mm(struct mlx5_ib_mr *mr) -{ - if (WARN_ON(!mr || !is_odp_mr(mr))) - return NULL; - - return to_ib_umem_odp(mr->umem)->per_mm; -} - -static struct ib_umem_odp *odp_next(struct ib_umem_odp *odp) -{ - struct mlx5_ib_mr *mr = odp->private, *parent = mr->parent; - struct ib_ucontext_per_mm *per_mm = odp->per_mm; - struct rb_node *rb; - - down_read(&per_mm->umem_rwsem); - while (1) { - rb = rb_next(&odp->interval_tree.rb); - if (!rb) - goto not_found; - odp = rb_entry(rb, struct ib_umem_odp, interval_tree.rb); - if (check_parent(odp, parent)) - goto end; - } -not_found: - odp = NULL; -end: - up_read(&per_mm->umem_rwsem); - return odp; -} - -static struct ib_umem_odp *odp_lookup(u64 start, u64 length, - struct mlx5_ib_mr *parent) -{ - struct ib_ucontext_per_mm *per_mm = mr_to_per_mm(parent); - struct ib_umem_odp *odp; - struct rb_node *rb; - - down_read(&per_mm->umem_rwsem); - odp = rbt_ib_umem_lookup(&per_mm->umem_tree, start, length); - if (!odp) - goto end; - - while (1) { - if (check_parent(odp, parent)) - goto end; - rb = rb_next(&odp->interval_tree.rb); - if (!rb) - goto not_found; - odp = rb_entry(rb, struct ib_umem_odp, interval_tree.rb); - if (ib_umem_start(odp) > start + length) - goto not_found; - } -not_found: - odp = NULL; -end: - up_read(&per_mm->umem_rwsem); - return odp; -} - -void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset, - size_t nentries, struct mlx5_ib_mr *mr, int flags) -{ - struct ib_pd *pd = mr->ibmr.pd; - struct mlx5_ib_dev *dev = to_mdev(pd->device); - struct ib_umem_odp *odp; - unsigned long va; - int i; + struct mlx5_klm *end = pklm + nentries; if (flags & MLX5_IB_UPD_XLT_ZAP) { - for (i = 0; i < nentries; i++, pklm++) { + for (; pklm != end; pklm++, idx++) { pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE); - pklm->key = cpu_to_be32(dev->null_mkey); + pklm->key = cpu_to_be32(imr->dev->null_mkey); pklm->va = 0; } return; } /* - * The locking here is pretty subtle. Ideally the implicit children - * list would be protected by the umem_mutex, however that is not + * The locking here is pretty subtle. Ideally the implicit_children + * xarray would be protected by the umem_mutex, however that is not * possible. Instead this uses a weaker update-then-lock pattern: * * srcu_read_lock() - * + * xa_store() * mutex_lock(umem_mutex) * mlx5_ib_update_xlt() * mutex_unlock(umem_mutex) * destroy lkey * - * ie any change the children list must be followed by the locked - * update_xlt before destroying. + * ie any change the xarray must be followed by the locked update_xlt + * before destroying. * * The umem_mutex provides the acquire/release semantic needed to make - * the children list visible to a racing thread. While SRCU is not + * the xa_store() visible to a racing thread. While SRCU is not * technically required, using it gives consistent use of the SRCU - * locking around the children list. + * locking around the xarray. */ - lockdep_assert_held(&to_ib_umem_odp(mr->umem)->umem_mutex); - lockdep_assert_held(&mr->dev->odp_srcu); + lockdep_assert_held(&to_ib_umem_odp(imr->umem)->umem_mutex); + lockdep_assert_held(&imr->dev->odp_srcu); - odp = odp_lookup(offset * MLX5_IMR_MTT_SIZE, - nentries * MLX5_IMR_MTT_SIZE, mr); + for (; pklm != end; pklm++, idx++) { + struct mlx5_ib_mr *mtt = xa_load(&imr->implicit_children, idx); - for (i = 0; i < nentries; i++, pklm++) { pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE); - va = (offset + i) * MLX5_IMR_MTT_SIZE; - if (odp && ib_umem_start(odp) == va) { - struct mlx5_ib_mr *mtt = odp->private; - + if (mtt) { pklm->key = cpu_to_be32(mtt->ibmr.lkey); - pklm->va = cpu_to_be64(va); - odp = odp_next(odp); + pklm->va = cpu_to_be64(idx * MLX5_IMR_MTT_SIZE); } else { - pklm->key = cpu_to_be32(dev->null_mkey); + pklm->key = cpu_to_be32(imr->dev->null_mkey); pklm->va = 0; } - mlx5_ib_dbg(dev, "[%d] va %lx key %x\n", - i, va, be32_to_cpu(pklm->key)); } } @@ -320,6 +242,8 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, if (unlikely(!umem_odp->npages && mr->parent && !umem_odp->dying)) { + xa_erase(&mr->parent->implicit_children, + ib_umem_start(umem_odp) >> MLX5_IMR_MTT_SHIFT); xa_erase(&mr->dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)); umem_odp->dying = 1; atomic_inc(&mr->parent->num_leaf_free); @@ -464,6 +388,16 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, goto out_release; } + /* + * Once the store to either xarray completes any error unwind has to + * use synchronize_srcu(). Avoid this with xa_reserve() + */ + err = xa_err(xa_store(&imr->implicit_children, idx, mr, GFP_KERNEL)); + if (err) { + ret = ERR_PTR(err); + goto out_release; + } + xa_store(&imr->dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key), &mr->mmkey, GFP_ATOMIC); @@ -479,7 +413,7 @@ out_umem: return ret; } -static struct ib_umem_odp *implicit_mr_get_data(struct mlx5_ib_mr *imr, +static struct mlx5_ib_mr *implicit_mr_get_data(struct mlx5_ib_mr *imr, u64 io_virt, size_t bcnt) { struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem); @@ -487,39 +421,32 @@ static struct ib_umem_odp *implicit_mr_get_data(struct mlx5_ib_mr *imr, unsigned long idx = io_virt >> MLX5_IMR_MTT_SHIFT; unsigned long inv_start_idx = end_idx + 1; unsigned long inv_len = 0; - struct ib_umem_odp *result = NULL; - struct ib_umem_odp *odp; + struct mlx5_ib_mr *result = NULL; int ret; mutex_lock(&odp_imr->umem_mutex); - odp = odp_lookup(idx * MLX5_IMR_MTT_SIZE, 1, imr); for (idx = idx; idx <= end_idx; idx++) { - if (unlikely(!odp)) { - struct mlx5_ib_mr *mtt; + struct mlx5_ib_mr *mtt = xa_load(&imr->implicit_children, idx); + if (unlikely(!mtt)) { mtt = implicit_get_child_mr(imr, idx); if (IS_ERR(mtt)) { - result = ERR_CAST(mtt); + result = mtt; goto out; } - odp = to_ib_umem_odp(mtt->umem); inv_start_idx = min(inv_start_idx, idx); inv_len = idx - inv_start_idx + 1; } /* Return first odp if region not covered by single one */ if (likely(!result)) - result = odp; - - odp = odp_next(odp); - if (odp && ib_umem_start(odp) != idx * MLX5_IMR_MTT_SIZE) - odp = NULL; + result = mtt; } /* - * Any time the children in the interval tree are changed we must - * perform an update of the xlt before exiting to ensure the HW and - * the tree remains synchronized. + * Any time the implicit_children are changed we must perform an + * update of the xlt before exiting to ensure the HW and the + * implicit_children remains synchronized. */ out: if (likely(!inv_len)) @@ -569,6 +496,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, init_waitqueue_head(&imr->q_leaf_free); atomic_set(&imr->num_leaf_free, 0); atomic_set(&imr->num_pending_prefetch, 0); + xa_init(&imr->implicit_children); err = mlx5_ib_update_xlt(imr, 0, mlx5_imr_ksm_entries, @@ -596,18 +524,15 @@ out_umem: void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) { - struct ib_ucontext_per_mm *per_mm = mr_to_per_mm(imr); - struct rb_node *node; + struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem); + struct mlx5_ib_mr *mtt; + unsigned long idx; - down_read(&per_mm->umem_rwsem); - for (node = rb_first_cached(&per_mm->umem_tree); node; - node = rb_next(node)) { - struct ib_umem_odp *umem_odp = - rb_entry(node, struct ib_umem_odp, interval_tree.rb); - struct mlx5_ib_mr *mr = umem_odp->private; + mutex_lock(&odp_imr->umem_mutex); + xa_for_each (&imr->implicit_children, idx, mtt) { + struct ib_umem_odp *umem_odp = to_ib_umem_odp(mtt->umem); - if (mr->parent != imr) - continue; + xa_erase(&imr->implicit_children, idx); mutex_lock(&umem_odp->umem_mutex); ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp), @@ -623,9 +548,12 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) schedule_work(&umem_odp->work); mutex_unlock(&umem_odp->umem_mutex); } - up_read(&per_mm->umem_rwsem); + mutex_unlock(&odp_imr->umem_mutex); wait_event(imr->q_leaf_free, !atomic_read(&imr->num_leaf_free)); + WARN_ON(!xa_empty(&imr->implicit_children)); + /* Remove any left over reserved elements */ + xa_destroy(&imr->implicit_children); } #define MLX5_PF_FLAGS_DOWNGRADE BIT(1) @@ -718,7 +646,7 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt, u32 *bytes_mapped, u32 flags) { struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); - struct ib_umem_odp *child; + struct mlx5_ib_mr *mtt; int npages = 0; if (!odp->is_implicit_odp) { @@ -733,17 +661,18 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt, mlx5_imr_ksm_entries * MLX5_IMR_MTT_SIZE - io_virt < bcnt)) return -EFAULT; - child = implicit_mr_get_data(mr, io_virt, bcnt); - if (IS_ERR(child)) - return PTR_ERR(child); + mtt = implicit_mr_get_data(mr, io_virt, bcnt); + if (IS_ERR(mtt)) + return PTR_ERR(mtt); /* Fault each child mr that intersects with our interval. */ while (bcnt) { - u64 end = min_t(u64, io_virt + bcnt, ib_umem_end(child)); + struct ib_umem_odp *umem_odp = to_ib_umem_odp(mtt->umem); + u64 end = min_t(u64, io_virt + bcnt, ib_umem_end(umem_odp)); u64 len = end - io_virt; int ret; - ret = pagefault_real_mr(child->private, child, io_virt, len, + ret = pagefault_real_mr(mtt, umem_odp, io_virt, len, bytes_mapped, flags); if (ret < 0) return ret; @@ -752,12 +681,14 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt, npages += ret; if (unlikely(bcnt)) { - child = odp_next(child); + mtt = xa_load(&mr->implicit_children, + io_virt >> MLX5_IMR_MTT_SHIFT); + /* * implicit_mr_get_data sets up all the leaves, this * means they got invalidated before we got to them. */ - if (!child || ib_umem_start(child) != io_virt) { + if (!mtt) { mlx5_ib_dbg( mr->dev, "next implicit leaf removed at 0x%llx.\n", -- cgit From 3389baa831b6a09e3c96e2a6283a1b952be2f0cd Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 9 Oct 2019 13:09:30 -0300 Subject: RDMA/mlx5: Reduce locking in implicit_mr_get_data() Now that the child MRs are stored in an xarray we can rely on the SRCU lock to protect the xa_load and use xa_cmpxchg on the slow allocation path to resolve races with concurrent page fault. This reduces the scope of the critical section of umem_mutex for implicit MRs to only cover mlx5_ib_update_xlt, and avoids taking a lock at all if the child MR is already in the xarray. This makes it consistent with the normal ODP MR critical section for umem_lock, and the locking approach used for destroying an unusued implicit child MR. The MLX5_IB_UPD_XLT_ATOMIC is no longer needed in implicit_get_child_mr() since it is no longer called with any locks. Link: https://lore.kernel.org/r/20191009160934.3143-11-jgg@ziepe.ca Reviewed-by: Artemy Kovalyov Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/odp.c | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 6f7eea175c72..00e14b6acd98 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -381,8 +381,7 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, MLX5_IMR_MTT_ENTRIES, PAGE_SHIFT, MLX5_IB_UPD_XLT_ZAP | - MLX5_IB_UPD_XLT_ENABLE | - MLX5_IB_UPD_XLT_ATOMIC); + MLX5_IB_UPD_XLT_ENABLE); if (err) { ret = ERR_PTR(err); goto out_release; @@ -392,9 +391,16 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, * Once the store to either xarray completes any error unwind has to * use synchronize_srcu(). Avoid this with xa_reserve() */ - err = xa_err(xa_store(&imr->implicit_children, idx, mr, GFP_KERNEL)); - if (err) { - ret = ERR_PTR(err); + ret = xa_cmpxchg(&imr->implicit_children, idx, NULL, mr, GFP_KERNEL); + if (unlikely(ret)) { + if (xa_is_err(ret)) { + ret = ERR_PTR(xa_err(ret)); + goto out_release; + } + /* + * Another thread beat us to creating the child mr, use + * theirs. + */ goto out_release; } @@ -424,7 +430,8 @@ static struct mlx5_ib_mr *implicit_mr_get_data(struct mlx5_ib_mr *imr, struct mlx5_ib_mr *result = NULL; int ret; - mutex_lock(&odp_imr->umem_mutex); + lockdep_assert_held(&imr->dev->odp_srcu); + for (idx = idx; idx <= end_idx; idx++) { struct mlx5_ib_mr *mtt = xa_load(&imr->implicit_children, idx); @@ -450,20 +457,27 @@ static struct mlx5_ib_mr *implicit_mr_get_data(struct mlx5_ib_mr *imr, */ out: if (likely(!inv_len)) - goto out_unlock; + return result; + /* + * Notice this is not strictly ordered right, the KSM is updated after + * the implicit_leaves is updated, so a parallel page fault could see + * a MR that is not yet visible in the KSM. This is similar to a + * parallel page fault seeing a MR that is being concurrently removed + * from the KSM. Both of these improbable situations are resolved + * safely by resuming the HW and then taking another page fault. The + * next pagefault handler will see the new information. + */ + mutex_lock(&odp_imr->umem_mutex); ret = mlx5_ib_update_xlt(imr, inv_start_idx, inv_len, 0, MLX5_IB_UPD_XLT_INDIRECT | MLX5_IB_UPD_XLT_ATOMIC); + mutex_unlock(&odp_imr->umem_mutex); if (ret) { mlx5_ib_err(to_mdev(imr->ibmr.pd->device), "Failed to update PAS\n"); - result = ERR_PTR(ret); - goto out_unlock; + return ERR_PTR(ret); } - -out_unlock: - mutex_unlock(&odp_imr->umem_mutex); return result; } -- cgit From b70d785d237c0d3e4235c511f38f8ce64620f945 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 9 Oct 2019 13:09:31 -0300 Subject: RDMA/mlx5: Avoid double lookups on the pagefault path Now that the locking is simplified combine pagefault_implicit_mr() with implicit_mr_get_data() so that we sweep over the idx range only once, and do the single xlt update at the end, after the child umems are setup. This avoids double iteration/xa_loads plus the sketchy failure path if the xa_load() fails. Link: https://lore.kernel.org/r/20191009160934.3143-12-jgg@ziepe.ca Reviewed-by: Artemy Kovalyov Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/odp.c | 186 +++++++++++++++++---------------------- 1 file changed, 80 insertions(+), 106 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 00e14b6acd98..8bd30db87c21 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -419,68 +419,6 @@ out_umem: return ret; } -static struct mlx5_ib_mr *implicit_mr_get_data(struct mlx5_ib_mr *imr, - u64 io_virt, size_t bcnt) -{ - struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem); - unsigned long end_idx = (io_virt + bcnt - 1) >> MLX5_IMR_MTT_SHIFT; - unsigned long idx = io_virt >> MLX5_IMR_MTT_SHIFT; - unsigned long inv_start_idx = end_idx + 1; - unsigned long inv_len = 0; - struct mlx5_ib_mr *result = NULL; - int ret; - - lockdep_assert_held(&imr->dev->odp_srcu); - - for (idx = idx; idx <= end_idx; idx++) { - struct mlx5_ib_mr *mtt = xa_load(&imr->implicit_children, idx); - - if (unlikely(!mtt)) { - mtt = implicit_get_child_mr(imr, idx); - if (IS_ERR(mtt)) { - result = mtt; - goto out; - } - inv_start_idx = min(inv_start_idx, idx); - inv_len = idx - inv_start_idx + 1; - } - - /* Return first odp if region not covered by single one */ - if (likely(!result)) - result = mtt; - } - - /* - * Any time the implicit_children are changed we must perform an - * update of the xlt before exiting to ensure the HW and the - * implicit_children remains synchronized. - */ -out: - if (likely(!inv_len)) - return result; - - /* - * Notice this is not strictly ordered right, the KSM is updated after - * the implicit_leaves is updated, so a parallel page fault could see - * a MR that is not yet visible in the KSM. This is similar to a - * parallel page fault seeing a MR that is being concurrently removed - * from the KSM. Both of these improbable situations are resolved - * safely by resuming the HW and then taking another page fault. The - * next pagefault handler will see the new information. - */ - mutex_lock(&odp_imr->umem_mutex); - ret = mlx5_ib_update_xlt(imr, inv_start_idx, inv_len, 0, - MLX5_IB_UPD_XLT_INDIRECT | - MLX5_IB_UPD_XLT_ATOMIC); - mutex_unlock(&odp_imr->umem_mutex); - if (ret) { - mlx5_ib_err(to_mdev(imr->ibmr.pd->device), - "Failed to update PAS\n"); - return ERR_PTR(ret); - } - return result; -} - struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, struct ib_udata *udata, int access_flags) @@ -647,6 +585,84 @@ out: return ret; } +static int pagefault_implicit_mr(struct mlx5_ib_mr *imr, + struct ib_umem_odp *odp_imr, u64 user_va, + size_t bcnt, u32 *bytes_mapped, u32 flags) +{ + unsigned long end_idx = (user_va + bcnt - 1) >> MLX5_IMR_MTT_SHIFT; + unsigned long upd_start_idx = end_idx + 1; + unsigned long upd_len = 0; + unsigned long npages = 0; + int err; + int ret; + + if (unlikely(user_va >= mlx5_imr_ksm_entries * MLX5_IMR_MTT_SIZE || + mlx5_imr_ksm_entries * MLX5_IMR_MTT_SIZE - user_va < bcnt)) + return -EFAULT; + + /* Fault each child mr that intersects with our interval. */ + while (bcnt) { + unsigned long idx = user_va >> MLX5_IMR_MTT_SHIFT; + struct ib_umem_odp *umem_odp; + struct mlx5_ib_mr *mtt; + u64 len; + + mtt = xa_load(&imr->implicit_children, idx); + if (unlikely(!mtt)) { + mtt = implicit_get_child_mr(imr, idx); + if (IS_ERR(mtt)) { + ret = PTR_ERR(mtt); + goto out; + } + upd_start_idx = min(upd_start_idx, idx); + upd_len = idx - upd_start_idx + 1; + } + + umem_odp = to_ib_umem_odp(mtt->umem); + len = min_t(u64, user_va + bcnt, ib_umem_end(umem_odp)) - + user_va; + + ret = pagefault_real_mr(mtt, umem_odp, user_va, len, + bytes_mapped, flags); + if (ret < 0) + goto out; + user_va += len; + bcnt -= len; + npages += ret; + } + + ret = npages; + + /* + * Any time the implicit_children are changed we must perform an + * update of the xlt before exiting to ensure the HW and the + * implicit_children remains synchronized. + */ +out: + if (likely(!upd_len)) + return ret; + + /* + * Notice this is not strictly ordered right, the KSM is updated after + * the implicit_children is updated, so a parallel page fault could + * see a MR that is not yet visible in the KSM. This is similar to a + * parallel page fault seeing a MR that is being concurrently removed + * from the KSM. Both of these improbable situations are resolved + * safely by resuming the HW and then taking another page fault. The + * next pagefault handler will see the new information. + */ + mutex_lock(&odp_imr->umem_mutex); + err = mlx5_ib_update_xlt(imr, upd_start_idx, upd_len, 0, + MLX5_IB_UPD_XLT_INDIRECT | + MLX5_IB_UPD_XLT_ATOMIC); + mutex_unlock(&odp_imr->umem_mutex); + if (err) { + mlx5_ib_err(imr->dev, "Failed to update PAS\n"); + return err; + } + return ret; +} + /* * Returns: * -EFAULT: The io_virt->bcnt is not within the MR, it covers pages that are @@ -660,8 +676,6 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt, u32 *bytes_mapped, u32 flags) { struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); - struct mlx5_ib_mr *mtt; - int npages = 0; if (!odp->is_implicit_odp) { if (unlikely(io_virt < ib_umem_start(odp) || @@ -670,48 +684,8 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt, return pagefault_real_mr(mr, odp, io_virt, bcnt, bytes_mapped, flags); } - - if (unlikely(io_virt >= mlx5_imr_ksm_entries * MLX5_IMR_MTT_SIZE || - mlx5_imr_ksm_entries * MLX5_IMR_MTT_SIZE - io_virt < bcnt)) - return -EFAULT; - - mtt = implicit_mr_get_data(mr, io_virt, bcnt); - if (IS_ERR(mtt)) - return PTR_ERR(mtt); - - /* Fault each child mr that intersects with our interval. */ - while (bcnt) { - struct ib_umem_odp *umem_odp = to_ib_umem_odp(mtt->umem); - u64 end = min_t(u64, io_virt + bcnt, ib_umem_end(umem_odp)); - u64 len = end - io_virt; - int ret; - - ret = pagefault_real_mr(mtt, umem_odp, io_virt, len, - bytes_mapped, flags); - if (ret < 0) - return ret; - io_virt += len; - bcnt -= len; - npages += ret; - - if (unlikely(bcnt)) { - mtt = xa_load(&mr->implicit_children, - io_virt >> MLX5_IMR_MTT_SHIFT); - - /* - * implicit_mr_get_data sets up all the leaves, this - * means they got invalidated before we got to them. - */ - if (!mtt) { - mlx5_ib_dbg( - mr->dev, - "next implicit leaf removed at 0x%llx.\n", - io_virt); - return -EAGAIN; - } - } - } - return npages; + return pagefault_implicit_mr(mr, odp, io_virt, bcnt, bytes_mapped, + flags); } struct pf_frame { -- cgit From 5256edcb98a14b11409a2d323f56a70a8b366363 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 9 Oct 2019 13:09:32 -0300 Subject: RDMA/mlx5: Rework implicit ODP destroy Use SRCU in a sensible way by removing all MRs in the implicit tree from the two xarrays (the update operation), then a synchronize, followed by a normal single threaded teardown. This is only a little unusual from the normal pattern as there can still be some work pending in the unbound wq that may also require a workqueue flush. This is tracked with a single atomic, consolidating the redundant existing atomics and wait queue. For understand-ability the entire ODP implicit create/destroy flow now largely exists in a single pair of functions within odp.c, with a few support functions for tearing down an unused child. Link: https://lore.kernel.org/r/20191009160934.3143-13-jgg@ziepe.ca Reviewed-by: Artemy Kovalyov Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 2 - drivers/infiniband/hw/mlx5/mlx5_ib.h | 9 ++- drivers/infiniband/hw/mlx5/mr.c | 21 ++--- drivers/infiniband/hw/mlx5/odp.c | 152 +++++++++++++++++++++++------------ 4 files changed, 120 insertions(+), 64 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 4692c37b057c..add24b628900 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -6146,8 +6146,6 @@ static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) { mlx5_ib_cleanup_multiport_master(dev); WARN_ON(!xa_empty(&dev->odp_mkeys)); - if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) - srcu_barrier(&dev->odp_srcu); cleanup_srcu_struct(&dev->odp_srcu); WARN_ON(!xa_empty(&dev->sig_mrs)); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 88769fcffb5a..b8c958f62628 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -618,10 +618,13 @@ struct mlx5_ib_mr { u64 pi_iova; /* For ODP and implicit */ - atomic_t num_leaf_free; - wait_queue_head_t q_leaf_free; - atomic_t num_pending_prefetch; + atomic_t num_deferred_work; struct xarray implicit_children; + union { + struct rcu_head rcu; + struct list_head elm; + struct work_struct work; + } odp_destroy; struct mlx5_async_work cb_work; }; diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index fd94838a8845..1e91f61efa8a 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1317,7 +1317,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (is_odp_mr(mr)) { to_ib_umem_odp(mr->umem)->private = mr; - atomic_set(&mr->num_pending_prefetch, 0); + atomic_set(&mr->num_deferred_work, 0); err = xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key), &mr->mmkey, GFP_KERNEL)); @@ -1573,17 +1573,15 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) synchronize_srcu(&dev->odp_srcu); /* dequeue pending prefetch requests for the mr */ - if (atomic_read(&mr->num_pending_prefetch)) + if (atomic_read(&mr->num_deferred_work)) { flush_workqueue(system_unbound_wq); - WARN_ON(atomic_read(&mr->num_pending_prefetch)); + WARN_ON(atomic_read(&mr->num_deferred_work)); + } /* Destroy all page mappings */ - if (!umem_odp->is_implicit_odp) - mlx5_ib_invalidate_range(umem_odp, - ib_umem_start(umem_odp), - ib_umem_end(umem_odp)); - else - mlx5_ib_free_implicit_mr(mr); + mlx5_ib_invalidate_range(umem_odp, ib_umem_start(umem_odp), + ib_umem_end(umem_odp)); + /* * We kill the umem before the MR for ODP, * so that there will not be any invalidations in @@ -1620,6 +1618,11 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) dereg_mr(to_mdev(mmr->klm_mr->ibmr.device), mmr->klm_mr); } + if (is_odp_mr(mmr) && to_ib_umem_odp(mmr->umem)->is_implicit_odp) { + mlx5_ib_free_implicit_mr(mmr); + return 0; + } + dereg_mr(to_mdev(ibmr->device), mmr); return 0; diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 8bd30db87c21..5cf93d8129a9 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -144,31 +144,79 @@ void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries, } } -static void mr_leaf_free_action(struct work_struct *work) +/* + * This must be called after the mr has been removed from implicit_children + * and odp_mkeys and the SRCU synchronized. NOTE: The MR does not necessarily + * have to be empty here, parallel page faults could have raced with the free + * process and added pages to it. + */ +static void free_implicit_child_mr(struct mlx5_ib_mr *mr, bool need_imr_xlt) { - struct ib_umem_odp *odp = container_of(work, struct ib_umem_odp, work); - int idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT; - struct mlx5_ib_mr *mr = odp->private, *imr = mr->parent; + struct mlx5_ib_mr *imr = mr->parent; struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem); + struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); + unsigned long idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT; int srcu_key; - mr->parent = NULL; - synchronize_srcu(&mr->dev->odp_srcu); + /* implicit_child_mr's are not allowed to have deferred work */ + WARN_ON(atomic_read(&mr->num_deferred_work)); - if (xa_load(&mr->dev->odp_mkeys, mlx5_base_mkey(imr->mmkey.key))) { + if (need_imr_xlt) { srcu_key = srcu_read_lock(&mr->dev->odp_srcu); mutex_lock(&odp_imr->umem_mutex); - mlx5_ib_update_xlt(imr, idx, 1, 0, + mlx5_ib_update_xlt(mr->parent, idx, 1, 0, MLX5_IB_UPD_XLT_INDIRECT | MLX5_IB_UPD_XLT_ATOMIC); mutex_unlock(&odp_imr->umem_mutex); srcu_read_unlock(&mr->dev->odp_srcu, srcu_key); } - ib_umem_odp_release(odp); + + mr->parent = NULL; mlx5_mr_cache_free(mr->dev, mr); + ib_umem_odp_release(odp); + atomic_dec(&imr->num_deferred_work); +} + +static void free_implicit_child_mr_work(struct work_struct *work) +{ + struct mlx5_ib_mr *mr = + container_of(work, struct mlx5_ib_mr, odp_destroy.work); + + free_implicit_child_mr(mr, true); +} + +static void free_implicit_child_mr_rcu(struct rcu_head *head) +{ + struct mlx5_ib_mr *mr = + container_of(head, struct mlx5_ib_mr, odp_destroy.rcu); + + /* Freeing a MR is a sleeping operation, so bounce to a work queue */ + INIT_WORK(&mr->odp_destroy.work, free_implicit_child_mr_work); + queue_work(system_unbound_wq, &mr->odp_destroy.work); +} + +static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr) +{ + struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); + unsigned long idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT; + struct mlx5_ib_mr *imr = mr->parent; - if (atomic_dec_and_test(&imr->num_leaf_free)) - wake_up(&imr->q_leaf_free); + xa_lock(&imr->implicit_children); + /* + * This can race with mlx5_ib_free_implicit_mr(), the first one to + * reach the xa lock wins the race and destroys the MR. + */ + if (__xa_cmpxchg(&imr->implicit_children, idx, mr, NULL, GFP_ATOMIC) != + mr) + goto out_unlock; + + __xa_erase(&mr->dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)); + atomic_inc(&imr->num_deferred_work); + call_srcu(&mr->dev->odp_srcu, &mr->odp_destroy.rcu, + free_implicit_child_mr_rcu); + +out_unlock: + xa_unlock(&imr->implicit_children); } void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, @@ -240,15 +288,8 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, ib_umem_odp_unmap_dma_pages(umem_odp, start, end); - if (unlikely(!umem_odp->npages && mr->parent && - !umem_odp->dying)) { - xa_erase(&mr->parent->implicit_children, - ib_umem_start(umem_odp) >> MLX5_IMR_MTT_SHIFT); - xa_erase(&mr->dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)); - umem_odp->dying = 1; - atomic_inc(&mr->parent->num_leaf_free); - schedule_work(&umem_odp->work); - } + if (unlikely(!umem_odp->npages && mr->parent)) + destroy_unused_implicit_child_mr(mr); mutex_unlock(&umem_odp->umem_mutex); } @@ -375,7 +416,6 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, mr->mmkey.iova = idx * MLX5_IMR_MTT_SIZE; mr->parent = imr; odp->private = mr; - INIT_WORK(&odp->work, mr_leaf_free_action); err = mlx5_ib_update_xlt(mr, 0, MLX5_IMR_MTT_ENTRIES, @@ -391,7 +431,11 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, * Once the store to either xarray completes any error unwind has to * use synchronize_srcu(). Avoid this with xa_reserve() */ - ret = xa_cmpxchg(&imr->implicit_children, idx, NULL, mr, GFP_KERNEL); + ret = xa_cmpxchg(&imr->implicit_children, idx, NULL, mr, + GFP_KERNEL); + if (likely(!ret)) + xa_store(&imr->dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key), + &mr->mmkey, GFP_ATOMIC); if (unlikely(ret)) { if (xa_is_err(ret)) { ret = ERR_PTR(xa_err(ret)); @@ -404,9 +448,6 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, goto out_release; } - xa_store(&imr->dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key), - &mr->mmkey, GFP_ATOMIC); - mlx5_ib_dbg(imr->dev, "key %x mr %p\n", mr->mmkey.key, mr); return mr; @@ -445,9 +486,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, imr->ibmr.lkey = imr->mmkey.key; imr->ibmr.rkey = imr->mmkey.key; imr->umem = &umem_odp->umem; - init_waitqueue_head(&imr->q_leaf_free); - atomic_set(&imr->num_leaf_free, 0); - atomic_set(&imr->num_pending_prefetch, 0); + atomic_set(&imr->num_deferred_work, 0); xa_init(&imr->implicit_children); err = mlx5_ib_update_xlt(imr, 0, @@ -477,35 +516,48 @@ out_umem: void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) { struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem); + struct mlx5_ib_dev *dev = imr->dev; + struct list_head destroy_list; struct mlx5_ib_mr *mtt; + struct mlx5_ib_mr *tmp; unsigned long idx; - mutex_lock(&odp_imr->umem_mutex); - xa_for_each (&imr->implicit_children, idx, mtt) { - struct ib_umem_odp *umem_odp = to_ib_umem_odp(mtt->umem); + INIT_LIST_HEAD(&destroy_list); - xa_erase(&imr->implicit_children, idx); + xa_erase(&dev->odp_mkeys, mlx5_base_mkey(imr->mmkey.key)); + /* + * This stops the SRCU protected page fault path from touching either + * the imr or any children. The page fault path can only reach the + * children xarray via the imr. + */ + synchronize_srcu(&dev->odp_srcu); - mutex_lock(&umem_odp->umem_mutex); - ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp), - ib_umem_end(umem_odp)); + xa_lock(&imr->implicit_children); + xa_for_each (&imr->implicit_children, idx, mtt) { + __xa_erase(&imr->implicit_children, idx); + __xa_erase(&dev->odp_mkeys, mlx5_base_mkey(mtt->mmkey.key)); + list_add(&mtt->odp_destroy.elm, &destroy_list); + } + xa_unlock(&imr->implicit_children); - if (umem_odp->dying) { - mutex_unlock(&umem_odp->umem_mutex); - continue; - } + /* Fence access to the child pointers via the pagefault thread */ + synchronize_srcu(&dev->odp_srcu); - umem_odp->dying = 1; - atomic_inc(&imr->num_leaf_free); - schedule_work(&umem_odp->work); - mutex_unlock(&umem_odp->umem_mutex); + /* + * num_deferred_work can only be incremented inside the odp_srcu, or + * under xa_lock while the child is in the xarray. Thus at this point + * it is only decreasing, and all work holding it is now on the wq. + */ + if (atomic_read(&imr->num_deferred_work)) { + flush_workqueue(system_unbound_wq); + WARN_ON(atomic_read(&imr->num_deferred_work)); } - mutex_unlock(&odp_imr->umem_mutex); - wait_event(imr->q_leaf_free, !atomic_read(&imr->num_leaf_free)); - WARN_ON(!xa_empty(&imr->implicit_children)); - /* Remove any left over reserved elements */ - xa_destroy(&imr->implicit_children); + list_for_each_entry_safe (mtt, tmp, &destroy_list, odp_destroy.elm) + free_implicit_child_mr(mtt, false); + + mlx5_mr_cache_free(dev, imr); + ib_umem_odp_release(odp_imr); } #define MLX5_PF_FLAGS_DOWNGRADE BIT(1) @@ -1579,7 +1631,7 @@ static void destroy_prefetch_work(struct prefetch_mr_work *work) u32 i; for (i = 0; i < work->num_sge; ++i) - atomic_dec(&work->frags[i].mr->num_pending_prefetch); + atomic_dec(&work->frags[i].mr->num_deferred_work); kvfree(work); } @@ -1658,7 +1710,7 @@ static bool init_prefetch_work(struct ib_pd *pd, } /* Keep the MR pointer will valid outside the SRCU */ - atomic_inc(&work->frags[i].mr->num_pending_prefetch); + atomic_inc(&work->frags[i].mr->num_deferred_work); } work->num_sge = num_sge; return true; -- cgit From d561987f34f263dd176ccd8fb782cb153d72f441 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 9 Oct 2019 13:09:33 -0300 Subject: RDMA/mlx5: Do not store implicit children in the odp_mkeys xarray These mkeys are entirely internal and are never used by the HW for page fault. They should also never be used by userspace for prefetch. Simplify & optimize things by not including them in the xarray. Since the prefetch path can now never see a child mkey there is no need for the second synchronize_srcu() during imr destroy. Link: https://lore.kernel.org/r/20191009160934.3143-14-jgg@ziepe.ca Reviewed-by: Artemy Kovalyov Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/odp.c | 36 ++++++------------------------------ 1 file changed, 6 insertions(+), 30 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 5cf93d8129a9..06e24d5e7609 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -146,9 +146,9 @@ void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries, /* * This must be called after the mr has been removed from implicit_children - * and odp_mkeys and the SRCU synchronized. NOTE: The MR does not necessarily - * have to be empty here, parallel page faults could have raced with the free - * process and added pages to it. + * and the SRCU synchronized. NOTE: The MR does not necessarily have to be + * empty here, parallel page faults could have raced with the free process and + * added pages to it. */ static void free_implicit_child_mr(struct mlx5_ib_mr *mr, bool need_imr_xlt) { @@ -210,7 +210,6 @@ static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr) mr) goto out_unlock; - __xa_erase(&mr->dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)); atomic_inc(&imr->num_deferred_work); call_srcu(&mr->dev->odp_srcu, &mr->odp_destroy.rcu, free_implicit_child_mr_rcu); @@ -401,13 +400,6 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, if (IS_ERR(mr)) goto out_umem; - err = xa_reserve(&imr->dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key), - GFP_KERNEL); - if (err) { - ret = ERR_PTR(err); - goto out_mr; - } - mr->ibmr.pd = imr->ibmr.pd; mr->access_flags = imr->access_flags; mr->umem = &odp->umem; @@ -424,7 +416,7 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, MLX5_IB_UPD_XLT_ENABLE); if (err) { ret = ERR_PTR(err); - goto out_release; + goto out_mr; } /* @@ -433,26 +425,21 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, */ ret = xa_cmpxchg(&imr->implicit_children, idx, NULL, mr, GFP_KERNEL); - if (likely(!ret)) - xa_store(&imr->dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key), - &mr->mmkey, GFP_ATOMIC); if (unlikely(ret)) { if (xa_is_err(ret)) { ret = ERR_PTR(xa_err(ret)); - goto out_release; + goto out_mr; } /* * Another thread beat us to creating the child mr, use * theirs. */ - goto out_release; + goto out_mr; } mlx5_ib_dbg(imr->dev, "key %x mr %p\n", mr->mmkey.key, mr); return mr; -out_release: - xa_release(&imr->dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)); out_mr: mlx5_mr_cache_free(imr->dev, mr); out_umem: @@ -535,14 +522,10 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) xa_lock(&imr->implicit_children); xa_for_each (&imr->implicit_children, idx, mtt) { __xa_erase(&imr->implicit_children, idx); - __xa_erase(&dev->odp_mkeys, mlx5_base_mkey(mtt->mmkey.key)); list_add(&mtt->odp_destroy.elm, &destroy_list); } xa_unlock(&imr->implicit_children); - /* Fence access to the child pointers via the pagefault thread */ - synchronize_srcu(&dev->odp_srcu); - /* * num_deferred_work can only be incremented inside the odp_srcu, or * under xa_lock while the child is in the xarray. Thus at this point @@ -1655,13 +1638,6 @@ get_prefetchable_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, if (mr->ibmr.pd != pd) return NULL; - /* - * Implicit child MRs are internal and userspace should not refer to - * them. - */ - if (mr->parent) - return NULL; - odp = to_ib_umem_odp(mr->umem); /* prefetch with write-access must be supported by the MR */ -- cgit From 09689703d29a3b75c510c198c3aca85d7d8b50c7 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 9 Oct 2019 13:09:34 -0300 Subject: RDMA/mlx5: Do not race with mlx5_ib_invalidate_range during create and destroy For creation, as soon as the umem_odp is created the notifier can be called, however the underlying MR may not have been setup yet. This would cause problems if mlx5_ib_invalidate_range() runs. There is some confusing/ulocked/racy code that might by trying to solve this, but without locks it isn't going to work right. Instead trivially solve the problem by short-circuiting the invalidation if there are not yet any DMA mapped pages. By definition there is nothing to invalidate in this case. The create code will have the umem fully setup before anything is DMA mapped, and npages is fully locked by the umem_mutex. For destroy, invalidate the entire MR at the HW to stop DMA then DMA unmap the pages before destroying the MR. This drives npages to zero and prevents similar racing with invalidate while the MR is undergoing destruction. Arguably it would be better if the umem was created after the MR and destroyed before, but that would require a big rework of the MR code. Fixes: 6aec21f6a832 ("IB/mlx5: Page faults handling infrastructure") Link: https://lore.kernel.org/r/20191009160934.3143-15-jgg@ziepe.ca Reviewed-by: Artemy Kovalyov Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 ++ drivers/infiniband/hw/mlx5/mr.c | 74 ++++++++++++------------------------ drivers/infiniband/hw/mlx5/odp.c | 70 +++++++++++++++++++++++++++++----- 3 files changed, 88 insertions(+), 59 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index b8c958f62628..f61d4005c6c3 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1171,6 +1171,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, struct ib_udata *udata, int access_flags); void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *mr); +void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr); int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_pd *pd, struct ib_udata *udata); @@ -1232,6 +1233,8 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev); struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry); void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); +int mlx5_mr_cache_invalidate(struct mlx5_ib_mr *mr); + int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, struct ib_mr_status *mr_status); struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd, diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 1e91f61efa8a..199f7959aaa5 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -50,7 +50,6 @@ enum { static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); static int mr_cache_max_order(struct mlx5_ib_dev *dev); -static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev) { @@ -495,7 +494,7 @@ void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) c = order2idx(dev, mr->order); WARN_ON(c < 0 || c >= MAX_MR_CACHE_ENTRIES); - if (unreg_umr(dev, mr)) { + if (mlx5_mr_cache_invalidate(mr)) { mr->allocated_from_cache = false; destroy_mkey(dev, mr); ent = &cache->ent[c]; @@ -1333,22 +1332,29 @@ error: return ERR_PTR(err); } -static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) +/** + * mlx5_mr_cache_invalidate - Fence all DMA on the MR + * @mr: The MR to fence + * + * Upon return the NIC will not be doing any DMA to the pages under the MR, + * and any DMA inprogress will be completed. Failure of this function + * indicates the HW has failed catastrophically. + */ +int mlx5_mr_cache_invalidate(struct mlx5_ib_mr *mr) { - struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_umr_wr umrwr = {}; - if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) + if (mr->dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) return 0; umrwr.wr.send_flags = MLX5_IB_SEND_UMR_DISABLE_MR | MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS; umrwr.wr.opcode = MLX5_IB_WR_UMR; - umrwr.pd = dev->umrc.pd; + umrwr.pd = mr->dev->umrc.pd; umrwr.mkey = mr->mmkey.key; umrwr.ignore_free_state = 1; - return mlx5_ib_post_send_wait(dev, &umrwr); + return mlx5_ib_post_send_wait(mr->dev, &umrwr); } static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr, @@ -1432,7 +1438,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, * UMR can't be used - MKey needs to be replaced. */ if (mr->allocated_from_cache) - err = unreg_umr(dev, mr); + err = mlx5_mr_cache_invalidate(mr); else err = destroy_mkey(dev, mr); if (err) @@ -1561,52 +1567,20 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) int npages = mr->npages; struct ib_umem *umem = mr->umem; - if (is_odp_mr(mr)) { - struct ib_umem_odp *umem_odp = to_ib_umem_odp(umem); - - /* Prevent new page faults and - * prefetch requests from succeeding - */ - xa_erase(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)); - - /* Wait for all running page-fault handlers to finish. */ - synchronize_srcu(&dev->odp_srcu); - - /* dequeue pending prefetch requests for the mr */ - if (atomic_read(&mr->num_deferred_work)) { - flush_workqueue(system_unbound_wq); - WARN_ON(atomic_read(&mr->num_deferred_work)); - } - - /* Destroy all page mappings */ - mlx5_ib_invalidate_range(umem_odp, ib_umem_start(umem_odp), - ib_umem_end(umem_odp)); - - /* - * We kill the umem before the MR for ODP, - * so that there will not be any invalidations in - * flight, looking at the *mr struct. - */ - ib_umem_odp_release(umem_odp); - atomic_sub(npages, &dev->mdev->priv.reg_pages); - - /* Avoid double-freeing the umem. */ - umem = NULL; - } + /* Stop all DMA */ + if (is_odp_mr(mr)) + mlx5_ib_fence_odp_mr(mr); + else + clean_mr(dev, mr); - clean_mr(dev, mr); + if (mr->allocated_from_cache) + mlx5_mr_cache_free(dev, mr); + else + kfree(mr); - /* - * We should unregister the DMA address from the HCA before - * remove the DMA mapping. - */ - mlx5_mr_cache_free(dev, mr); ib_umem_release(umem); - if (umem) - atomic_sub(npages, &dev->mdev->priv.reg_pages); + atomic_sub(npages, &dev->mdev->priv.reg_pages); - if (!mr->allocated_from_cache) - kfree(mr); } int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 06e24d5e7609..bcfc09846697 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -144,6 +144,27 @@ void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries, } } +static void dma_fence_odp_mr(struct mlx5_ib_mr *mr) +{ + struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); + + /* Ensure mlx5_ib_invalidate_range() will not touch the MR any more */ + mutex_lock(&odp->umem_mutex); + if (odp->npages) { + mlx5_mr_cache_invalidate(mr); + ib_umem_odp_unmap_dma_pages(odp, ib_umem_start(odp), + ib_umem_end(odp)); + WARN_ON(odp->npages); + } + odp->private = NULL; + mutex_unlock(&odp->umem_mutex); + + if (!mr->allocated_from_cache) { + mlx5_core_destroy_mkey(mr->dev->mdev, &mr->mmkey); + WARN_ON(mr->descs); + } +} + /* * This must be called after the mr has been removed from implicit_children * and the SRCU synchronized. NOTE: The MR does not necessarily have to be @@ -171,6 +192,8 @@ static void free_implicit_child_mr(struct mlx5_ib_mr *mr, bool need_imr_xlt) srcu_read_unlock(&mr->dev->odp_srcu, srcu_key); } + dma_fence_odp_mr(mr); + mr->parent = NULL; mlx5_mr_cache_free(mr->dev, mr); ib_umem_odp_release(odp); @@ -228,16 +251,15 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, int in_block = 0; u64 addr; - if (!umem_odp) { - pr_err("invalidation called on NULL umem or non-ODP umem\n"); - return; - } - + mutex_lock(&umem_odp->umem_mutex); + /* + * If npages is zero then umem_odp->private may not be setup yet. This + * does not complete until after the first page is mapped for DMA. + */ + if (!umem_odp->npages) + goto out; mr = umem_odp->private; - if (!mr || !mr->ibmr.pd) - return; - start = max_t(u64, ib_umem_start(umem_odp), start); end = min_t(u64, ib_umem_end(umem_odp), end); @@ -247,7 +269,6 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, * overwrite the same MTTs. Concurent invalidations might race us, * but they will write 0s as well, so no difference in the end result. */ - mutex_lock(&umem_odp->umem_mutex); for (addr = start; addr < end; addr += BIT(umem_odp->page_shift)) { idx = (addr - ib_umem_start(umem_odp)) >> umem_odp->page_shift; /* @@ -289,6 +310,7 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, if (unlikely(!umem_odp->npages && mr->parent)) destroy_unused_implicit_child_mr(mr); +out: mutex_unlock(&umem_odp->umem_mutex); } @@ -536,6 +558,13 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) WARN_ON(atomic_read(&imr->num_deferred_work)); } + /* + * Fence the imr before we destroy the children. This allows us to + * skip updating the XLT of the imr during destroy of the child mkey + * the imr points to. + */ + mlx5_mr_cache_invalidate(imr); + list_for_each_entry_safe (mtt, tmp, &destroy_list, odp_destroy.elm) free_implicit_child_mr(mtt, false); @@ -543,6 +572,29 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) ib_umem_odp_release(odp_imr); } +/** + * mlx5_ib_fence_odp_mr - Stop all access to the ODP MR + * @mr: to fence + * + * On return no parallel threads will be touching this MR and no DMA will be + * active. + */ +void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr) +{ + /* Prevent new page faults and prefetch requests from succeeding */ + xa_erase(&mr->dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)); + + /* Wait for all running page-fault handlers to finish. */ + synchronize_srcu(&mr->dev->odp_srcu); + + if (atomic_read(&mr->num_deferred_work)) { + flush_workqueue(system_unbound_wq); + WARN_ON(atomic_read(&mr->num_deferred_work)); + } + + dma_fence_odp_mr(mr); +} + #define MLX5_PF_FLAGS_DOWNGRADE BIT(1) static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp, u64 user_va, size_t bcnt, u32 *bytes_mapped, -- cgit From d5b60e26e86a463ca83bb5ec502dda6ea685159e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 7 Oct 2019 23:18:08 +0200 Subject: RDMA/hns: Fix build error again This is not the first attempt to fix building random configurations, unfortunately the attempt in commit a07fc0bb483e ("RDMA/hns: Fix build error") caused a new problem when CONFIG_INFINIBAND_HNS_HIP06=m and CONFIG_INFINIBAND_HNS_HIP08=y: drivers/infiniband/hw/hns/hns_roce_main.o:(.rodata+0xe60): undefined reference to `__this_module' Revert commits a07fc0bb483e ("RDMA/hns: Fix build error") and a3e2d4c7e766 ("RDMA/hns: remove obsolete Kconfig comment") to get back to the previous state, then fix the issues described there differently, by adding more specific dependencies: INFINIBAND_HNS can now only be built-in if at least one of HNS or HNS3 are built-in, and the individual back-ends are only available if that code is reachable from the main driver. Fixes: a07fc0bb483e ("RDMA/hns: Fix build error") Fixes: a3e2d4c7e766 ("RDMA/hns: remove obsolete Kconfig comment") Fixes: dd74282df573 ("RDMA/hns: Initialize the PCI device for hip08 RoCE") Fixes: 08805fdbeb2d ("RDMA/hns: Split hw v1 driver from hns roce driver") Link: https://lore.kernel.org/r/20191007211826.3361202-1-arnd@arndb.de Signed-off-by: Arnd Bergmann Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/Kconfig | 17 ++++++++++++++--- drivers/infiniband/hw/hns/Makefile | 8 ++++++-- 2 files changed, 20 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hns/Kconfig b/drivers/infiniband/hw/hns/Kconfig index d602b698b57e..4921c1e40ccd 100644 --- a/drivers/infiniband/hw/hns/Kconfig +++ b/drivers/infiniband/hw/hns/Kconfig @@ -1,23 +1,34 @@ # SPDX-License-Identifier: GPL-2.0-only config INFINIBAND_HNS - bool "HNS RoCE Driver" + tristate "HNS RoCE Driver" depends on NET_VENDOR_HISILICON depends on ARM64 || (COMPILE_TEST && 64BIT) + depends on (HNS_DSAF && HNS_ENET) || HNS3 ---help--- This is a RoCE/RDMA driver for the Hisilicon RoCE engine. The engine is used in Hisilicon Hip06 and more further ICT SoC based on platform device. + To compile HIP06 or HIP08 driver as module, choose M here. + config INFINIBAND_HNS_HIP06 - tristate "Hisilicon Hip06 Family RoCE support" + bool "Hisilicon Hip06 Family RoCE support" depends on INFINIBAND_HNS && HNS && HNS_DSAF && HNS_ENET + depends on INFINIBAND_HNS=m || (HNS_DSAF=y && HNS_ENET=y) ---help--- RoCE driver support for Hisilicon RoCE engine in Hisilicon Hip06 and Hip07 SoC. These RoCE engines are platform devices. + To compile this driver, choose Y here: if INFINIBAND_HNS is m, this + module will be called hns-roce-hw-v1 + config INFINIBAND_HNS_HIP08 - tristate "Hisilicon Hip08 Family RoCE support" + bool "Hisilicon Hip08 Family RoCE support" depends on INFINIBAND_HNS && PCI && HNS3 + depends on INFINIBAND_HNS=m || HNS3=y ---help--- RoCE driver support for Hisilicon RoCE engine in Hisilicon Hip08 SoC. The RoCE engine is a PCI device. + + To compile this driver, choose Y here: if INFINIBAND_HNS is m, this + module will be called hns-roce-hw-v2. diff --git a/drivers/infiniband/hw/hns/Makefile b/drivers/infiniband/hw/hns/Makefile index 449a2d81319d..e105945b94a1 100644 --- a/drivers/infiniband/hw/hns/Makefile +++ b/drivers/infiniband/hw/hns/Makefile @@ -9,8 +9,12 @@ hns-roce-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_pd.o \ hns_roce_ah.o hns_roce_hem.o hns_roce_mr.o hns_roce_qp.o \ hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o hns_roce_srq.o hns_roce_restrack.o +ifdef CONFIG_INFINIBAND_HNS_HIP06 hns-roce-hw-v1-objs := hns_roce_hw_v1.o $(hns-roce-objs) -obj-$(CONFIG_INFINIBAND_HNS_HIP06) += hns-roce-hw-v1.o +obj-$(CONFIG_INFINIBAND_HNS) += hns-roce-hw-v1.o +endif +ifdef CONFIG_INFINIBAND_HNS_HIP08 hns-roce-hw-v2-objs := hns_roce_hw_v2.o hns_roce_hw_v2_dfx.o $(hns-roce-objs) -obj-$(CONFIG_INFINIBAND_HNS_HIP08) += hns-roce-hw-v2.o +obj-$(CONFIG_INFINIBAND_HNS) += hns-roce-hw-v2.o +endif -- cgit From 546d30099ed204792083f043cd7e016de86016a3 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 29 Oct 2019 07:57:21 +0200 Subject: RDMA/mlx5: Return proper error value Returned value from mlx5_mr_cache_alloc() is checked to be error or real pointer. Return proper error code instead of NULL which is not checked later. Fixes: 81713d3788d2 ("IB/mlx5: Add implicit MR support") Link: https://lore.kernel.org/r/20191029055721.7192-1-leon@kernel.org Signed-off-by: Leon Romanovsky Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index f583476f5c8b..077ca10d9ed9 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -411,7 +411,7 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry) if (entry < 0 || entry >= MAX_MR_CACHE_ENTRIES) { mlx5_ib_err(dev, "cache entry %d is out of range\n", entry); - return NULL; + return ERR_PTR(-EINVAL); } ent = &cache->ent[entry]; -- cgit From 11f552e21755cb6f804572243a1502b6bbd008dd Mon Sep 17 00:00:00 2001 From: Michael Guralnik Date: Mon, 10 Jun 2019 15:21:24 +0300 Subject: IB/mlx5: Test write combining support Linux can run in all sorts of physical machines and VMs where write combining may or may not be supported. Currently there is no way to reliably tell if the system supports WC, or not. The driver uses WC to optimize posting work to the HCA, and getting this wrong in either direction can cause a significant performance loss. Add a test in mlx5_ib initialization process to test whether write-combining is supported on the machine. The test will run as part of the enable_driver callback to ensure that the test runs after the device is setup and can create and modify the QP needed, but runs before the device is exposed to the users. The test opens UD QP and posts NOP WQEs, the WQE written to the BlueFlame is different from the WQE in memory, requesting CQE only on the BlueFlame WQE. By checking whether we received a completion on one of these WQEs we can know if BlueFlame succeeded and this write-combining must be supported. Change reporting of BlueFlame support to be dependent on write-combining support instead of the FW's guess as to what the machine can do. Link: https://lore.kernel.org/r/20191027062234.10993-1-leon@kernel.org Signed-off-by: Michael Guralnik Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 15 ++- drivers/infiniband/hw/mlx5/mem.c | 199 +++++++++++++++++++++++++++++++++++ drivers/infiniband/hw/mlx5/mlx5_ib.h | 6 ++ drivers/infiniband/hw/mlx5/qp.c | 6 +- 4 files changed, 223 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 0a8a1a129f17..22db87278c40 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1796,7 +1796,7 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx, return -EINVAL; resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp); - if (mlx5_core_is_pf(dev->mdev) && MLX5_CAP_GEN(dev->mdev, bf)) + if (dev->wc_support) resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size); resp.cache_line_size = cache_line_size(); resp.max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq); @@ -6256,6 +6256,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .disassociate_ucontext = mlx5_ib_disassociate_ucontext, .drain_rq = mlx5_ib_drain_rq, .drain_sq = mlx5_ib_drain_sq, + .enable_driver = mlx5_ib_enable_driver, .fill_res_entry = mlx5_ib_fill_res_entry, .fill_stat_entry = mlx5_ib_fill_stat_entry, .get_dev_fw_str = get_dev_fw_str, @@ -6699,6 +6700,18 @@ static void mlx5_ib_stage_devx_cleanup(struct mlx5_ib_dev *dev) } } +int mlx5_ib_enable_driver(struct ib_device *dev) +{ + struct mlx5_ib_dev *mdev = to_mdev(dev); + int ret; + + ret = mlx5_ib_test_wc(mdev); + mlx5_ib_dbg(mdev, "Write-Combining %s", + mdev->wc_support ? "supported" : "not supported"); + + return ret; +} + void __mlx5_ib_remove(struct mlx5_ib_dev *dev, const struct mlx5_ib_profile *profile, int stage) diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index b5aece786b36..048f4e974a61 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -34,6 +34,7 @@ #include #include #include "mlx5_ib.h" +#include /* @umem: umem object to scan * @addr: ib virtual address requested by the user @@ -216,3 +217,201 @@ int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset) *offset = buf_off >> ilog2(off_size); return 0; } + +#define WR_ID_BF 0xBF +#define WR_ID_END 0xBAD +#define TEST_WC_NUM_WQES 255 +#define TEST_WC_POLLING_MAX_TIME_JIFFIES msecs_to_jiffies(100) +static int post_send_nop(struct mlx5_ib_dev *dev, struct ib_qp *ibqp, u64 wr_id, + bool signaled) +{ + struct mlx5_ib_qp *qp = to_mqp(ibqp); + struct mlx5_wqe_ctrl_seg *ctrl; + struct mlx5_bf *bf = &qp->bf; + __be32 mmio_wqe[16] = {}; + unsigned long flags; + unsigned int idx; + int i; + + if (unlikely(dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)) + return -EIO; + + spin_lock_irqsave(&qp->sq.lock, flags); + + idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1); + ctrl = mlx5_frag_buf_get_wqe(&qp->sq.fbc, idx); + + memset(ctrl, 0, sizeof(struct mlx5_wqe_ctrl_seg)); + ctrl->fm_ce_se = signaled ? MLX5_WQE_CTRL_CQ_UPDATE : 0; + ctrl->opmod_idx_opcode = + cpu_to_be32(((u32)(qp->sq.cur_post) << 8) | MLX5_OPCODE_NOP); + ctrl->qpn_ds = cpu_to_be32((sizeof(struct mlx5_wqe_ctrl_seg) / 16) | + (qp->trans_qp.base.mqp.qpn << 8)); + + qp->sq.wrid[idx] = wr_id; + qp->sq.w_list[idx].opcode = MLX5_OPCODE_NOP; + qp->sq.wqe_head[idx] = qp->sq.head + 1; + qp->sq.cur_post += DIV_ROUND_UP(sizeof(struct mlx5_wqe_ctrl_seg), + MLX5_SEND_WQE_BB); + qp->sq.w_list[idx].next = qp->sq.cur_post; + qp->sq.head++; + + memcpy(mmio_wqe, ctrl, sizeof(*ctrl)); + ((struct mlx5_wqe_ctrl_seg *)&mmio_wqe)->fm_ce_se |= + MLX5_WQE_CTRL_CQ_UPDATE; + + /* Make sure that descriptors are written before + * updating doorbell record and ringing the doorbell + */ + wmb(); + + qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post); + + /* Make sure doorbell record is visible to the HCA before + * we hit doorbell + */ + wmb(); + for (i = 0; i < 8; i++) + mlx5_write64(&mmio_wqe[i * 2], + bf->bfreg->map + bf->offset + i * 8); + + bf->offset ^= bf->buf_size; + + spin_unlock_irqrestore(&qp->sq.lock, flags); + + return 0; +} + +static int test_wc_poll_cq_result(struct mlx5_ib_dev *dev, struct ib_cq *cq) +{ + int ret; + struct ib_wc wc = {}; + unsigned long end = jiffies + TEST_WC_POLLING_MAX_TIME_JIFFIES; + + do { + ret = ib_poll_cq(cq, 1, &wc); + if (ret < 0 || wc.status) + return ret < 0 ? ret : -EINVAL; + if (ret) + break; + } while (!time_after(jiffies, end)); + + if (!ret) + return -ETIMEDOUT; + + if (wc.wr_id != WR_ID_BF) + ret = 0; + + return ret; +} + +static int test_wc_do_send(struct mlx5_ib_dev *dev, struct ib_qp *qp) +{ + int err, i; + + for (i = 0; i < TEST_WC_NUM_WQES; i++) { + err = post_send_nop(dev, qp, WR_ID_BF, false); + if (err) + return err; + } + + return post_send_nop(dev, qp, WR_ID_END, true); +} + +int mlx5_ib_test_wc(struct mlx5_ib_dev *dev) +{ + struct ib_cq_init_attr cq_attr = { .cqe = TEST_WC_NUM_WQES + 1 }; + int port_type_cap = MLX5_CAP_GEN(dev->mdev, port_type); + struct ib_qp_init_attr qp_init_attr = { + .cap = { .max_send_wr = TEST_WC_NUM_WQES }, + .qp_type = IB_QPT_UD, + .sq_sig_type = IB_SIGNAL_REQ_WR, + .create_flags = MLX5_IB_QP_CREATE_WC_TEST, + }; + struct ib_qp_attr qp_attr = { .port_num = 1 }; + struct ib_device *ibdev = &dev->ib_dev; + struct ib_qp *qp; + struct ib_cq *cq; + struct ib_pd *pd; + int ret; + + if (!MLX5_CAP_GEN(dev->mdev, bf)) + return 0; + + if (!dev->mdev->roce.roce_en && + port_type_cap == MLX5_CAP_PORT_TYPE_ETH) { + if (mlx5_core_is_pf(dev->mdev)) + dev->wc_support = true; + return 0; + } + + ret = mlx5_alloc_bfreg(dev->mdev, &dev->wc_bfreg, true, false); + if (ret) + goto print_err; + + if (!dev->wc_bfreg.wc) + goto out1; + + pd = ib_alloc_pd(ibdev, 0); + if (IS_ERR(pd)) { + ret = PTR_ERR(pd); + goto out1; + } + + cq = ib_create_cq(ibdev, NULL, NULL, NULL, &cq_attr); + if (IS_ERR(cq)) { + ret = PTR_ERR(cq); + goto out2; + } + + qp_init_attr.recv_cq = cq; + qp_init_attr.send_cq = cq; + qp = ib_create_qp(pd, &qp_init_attr); + if (IS_ERR(qp)) { + ret = PTR_ERR(qp); + goto out3; + } + + qp_attr.qp_state = IB_QPS_INIT; + ret = ib_modify_qp(qp, &qp_attr, + IB_QP_STATE | IB_QP_PORT | IB_QP_PKEY_INDEX | + IB_QP_QKEY); + if (ret) + goto out4; + + qp_attr.qp_state = IB_QPS_RTR; + ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE); + if (ret) + goto out4; + + qp_attr.qp_state = IB_QPS_RTS; + ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN); + if (ret) + goto out4; + + ret = test_wc_do_send(dev, qp); + if (ret < 0) + goto out4; + + ret = test_wc_poll_cq_result(dev, cq); + if (ret > 0) { + dev->wc_support = true; + ret = 0; + } + +out4: + ib_destroy_qp(qp); +out3: + ib_destroy_cq(cq); +out2: + ib_dealloc_pd(pd); +out1: + mlx5_free_bfreg(dev->mdev, &dev->wc_bfreg); +print_err: + if (ret) + mlx5_ib_err( + dev, + "Error %d while trying to test write-combining support\n", + ret); + return ret; +} diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 5b4c5751a98f..0bdb8b45ea15 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -248,6 +248,7 @@ struct mlx5_ib_flow_db { * rely on the range reserved for that use in the ib_qp_create_flags enum. */ #define MLX5_IB_QP_CREATE_SQPN_QP1 IB_QP_CREATE_RESERVED_START +#define MLX5_IB_QP_CREATE_WC_TEST (IB_QP_CREATE_RESERVED_START << 1) struct wr_list { u16 opcode; @@ -966,6 +967,7 @@ struct mlx5_ib_dev { u8 fill_delay:1; u8 is_rep:1; u8 lag_active:1; + u8 wc_support:1; struct umr_common umrc; /* sync used page count stats */ @@ -993,6 +995,7 @@ struct mlx5_ib_dev { /* Array with num_ports elements */ struct mlx5_ib_port *port; struct mlx5_sq_bfreg bfreg; + struct mlx5_sq_bfreg wc_bfreg; struct mlx5_sq_bfreg fp_bfreg; struct mlx5_ib_delay_drop delay_drop; const struct mlx5_ib_profile *profile; @@ -1506,4 +1509,7 @@ static inline bool mlx5_ib_can_use_umr(struct mlx5_ib_dev *dev, return true; } + +int mlx5_ib_enable_driver(struct ib_device *dev); +int mlx5_ib_test_wc(struct mlx5_ib_dev *dev); #endif /* MLX5_IB_H */ diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index bb3f432e2fb6..c831b455ffa8 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1041,11 +1041,14 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK | IB_QP_CREATE_IPOIB_UD_LSO | IB_QP_CREATE_NETIF_QP | - MLX5_IB_QP_CREATE_SQPN_QP1)) + MLX5_IB_QP_CREATE_SQPN_QP1 | + MLX5_IB_QP_CREATE_WC_TEST)) return -EINVAL; if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR) qp->bf.bfreg = &dev->fp_bfreg; + else if (init_attr->create_flags & MLX5_IB_QP_CREATE_WC_TEST) + qp->bf.bfreg = &dev->wc_bfreg; else qp->bf.bfreg = &dev->bfreg; @@ -5328,7 +5331,6 @@ out: * we hit doorbell */ wmb(); - /* currently we support only regular doorbells */ mlx5_write64((__be32 *)ctrl, bf->bfreg->map + bf->offset); /* Make sure doorbells don't leak out of SQ spinlock * and reach the HCA out of order. -- cgit From c043ff2cfb7f6fdd9a1cb1a7ba3800f19b70bf65 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Wed, 30 Oct 2019 11:44:12 +0200 Subject: RDMA: Connect between the mmap entry and the umap_priv structure The rdma_user_mmap_io interface created a common interface for drivers to correctly map hw resources and zap them once the ucontext is destroyed enabling the drivers to safely free the hw resources. However, this meant the drivers need to delay freeing the resource to the ucontext destroy phase to ensure they were no longer mapped. The new mechanism for a common way of handling user/driver address mapping enabled notifying the driver if all umap_priv mappings were removed, and enabled freeing the hw resources when they are done with and not delay it until ucontext destroy. Since not all drivers use the mechanism, NULL can be sent to the rdma_user_mmap_io interface to continue working as before. Drivers that use the mmap_xa interface can pass the entry being mapped to the rdma_user_mmap_io function to be linked together. Link: https://lore.kernel.org/r/20191030094417.16866-4-michal.kalderon@marvell.com Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/efa/efa_verbs.c | 6 ++++-- drivers/infiniband/hw/hns/hns_roce_main.c | 6 ++++-- drivers/infiniband/hw/mlx4/main.c | 9 ++++++--- drivers/infiniband/hw/mlx5/main.c | 8 +++++--- 4 files changed, 19 insertions(+), 10 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index 4edae89e8e3c..37e3d62bbb51 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -1612,11 +1612,13 @@ static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext, switch (entry->mmap_flag) { case EFA_MMAP_IO_NC: err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, length, - pgprot_noncached(vma->vm_page_prot)); + pgprot_noncached(vma->vm_page_prot), + NULL); break; case EFA_MMAP_IO_WC: err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, length, - pgprot_writecombine(vma->vm_page_prot)); + pgprot_writecombine(vma->vm_page_prot), + NULL); break; case EFA_MMAP_DMA_PAGE: for (va = vma->vm_start; va < vma->vm_end; diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index b5d196c119ee..803dc6f4b496 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -359,7 +359,8 @@ static int hns_roce_mmap(struct ib_ucontext *context, return rdma_user_mmap_io(context, vma, to_hr_ucontext(context)->uar.pfn, PAGE_SIZE, - pgprot_noncached(vma->vm_page_prot)); + pgprot_noncached(vma->vm_page_prot), + NULL); /* vm_pgoff: 1 -- TPTR */ case 1: @@ -372,7 +373,8 @@ static int hns_roce_mmap(struct ib_ucontext *context, return rdma_user_mmap_io(context, vma, hr_dev->tptr_dma_addr >> PAGE_SHIFT, hr_dev->tptr_size, - vma->vm_page_prot); + vma->vm_page_prot, + NULL); default: return -EINVAL; diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 8d2f1e38b891..f89b129b7e3a 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1146,7 +1146,8 @@ static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) return rdma_user_mmap_io(context, vma, to_mucontext(context)->uar.pfn, PAGE_SIZE, - pgprot_noncached(vma->vm_page_prot)); + pgprot_noncached(vma->vm_page_prot), + NULL); case 1: if (dev->dev->caps.bf_reg_size == 0) @@ -1155,7 +1156,8 @@ static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) context, vma, to_mucontext(context)->uar.pfn + dev->dev->caps.num_uars, - PAGE_SIZE, pgprot_writecombine(vma->vm_page_prot)); + PAGE_SIZE, pgprot_writecombine(vma->vm_page_prot), + NULL); case 3: { struct mlx4_clock_params params; @@ -1171,7 +1173,8 @@ static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) params.bar) + params.offset) >> PAGE_SHIFT, - PAGE_SIZE, pgprot_noncached(vma->vm_page_prot)); + PAGE_SIZE, pgprot_noncached(vma->vm_page_prot), + NULL); } default: diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 22db87278c40..2a510de4ae0f 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2156,7 +2156,7 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd, mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn %pa\n", idx, &pfn); err = rdma_user_mmap_io(&context->ibucontext, vma, pfn, PAGE_SIZE, - prot); + prot, NULL); if (err) { mlx5_ib_err(dev, "rdma_user_mmap_io failed with error=%d, mmap_cmd=%s\n", @@ -2198,7 +2198,8 @@ static int dm_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) PAGE_SHIFT) + page_idx; return rdma_user_mmap_io(context, vma, pfn, map_size, - pgprot_writecombine(vma->vm_page_prot)); + pgprot_writecombine(vma->vm_page_prot), + NULL); } static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma) @@ -2236,7 +2237,8 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm PAGE_SHIFT; return rdma_user_mmap_io(&context->ibucontext, vma, pfn, PAGE_SIZE, - pgprot_noncached(vma->vm_page_prot)); + pgprot_noncached(vma->vm_page_prot), + NULL); case MLX5_IB_MMAP_CLOCK_INFO: return mlx5_ib_mmap_clock_info_page(dev, vma, context); -- cgit From e84d3c184e7967559d511e4569c111a02d64031e Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Wed, 30 Oct 2019 11:44:13 +0200 Subject: RDMA/efa: Use the common mmap_xa helpers Remove the functions related to managing the mmap_xa database. This code was replaced with common code in ib_core. Link: https://lore.kernel.org/r/20191030094417.16866-5-michal.kalderon@marvell.com Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/efa/efa.h | 11 +- drivers/infiniband/hw/efa/efa_main.c | 1 + drivers/infiniband/hw/efa/efa_verbs.c | 335 +++++++++++++++------------------- 3 files changed, 153 insertions(+), 194 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/efa/efa.h b/drivers/infiniband/hw/efa/efa.h index 2283e432693e..2bda07078b97 100644 --- a/drivers/infiniband/hw/efa/efa.h +++ b/drivers/infiniband/hw/efa/efa.h @@ -71,8 +71,6 @@ struct efa_dev { struct efa_ucontext { struct ib_ucontext ibucontext; - struct xarray mmap_xa; - u32 mmap_xa_page; u16 uarn; }; @@ -91,6 +89,7 @@ struct efa_cq { struct efa_ucontext *ucontext; dma_addr_t dma_addr; void *cpu_addr; + struct rdma_user_mmap_entry *mmap_entry; size_t size; u16 cq_idx; }; @@ -101,6 +100,13 @@ struct efa_qp { void *rq_cpu_addr; size_t rq_size; enum ib_qp_state state; + + /* Used for saving mmap_xa entries */ + struct rdma_user_mmap_entry *sq_db_mmap_entry; + struct rdma_user_mmap_entry *llq_desc_mmap_entry; + struct rdma_user_mmap_entry *rq_db_mmap_entry; + struct rdma_user_mmap_entry *rq_mmap_entry; + u32 qp_handle; u32 max_send_wr; u32 max_recv_wr; @@ -147,6 +153,7 @@ int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata); void efa_dealloc_ucontext(struct ib_ucontext *ibucontext); int efa_mmap(struct ib_ucontext *ibucontext, struct vm_area_struct *vma); +void efa_mmap_free(struct rdma_user_mmap_entry *rdma_entry); int efa_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr, u32 flags, diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c index 83858f7e83d0..0e3050d01b75 100644 --- a/drivers/infiniband/hw/efa/efa_main.c +++ b/drivers/infiniband/hw/efa/efa_main.c @@ -217,6 +217,7 @@ static const struct ib_device_ops efa_dev_ops = { .get_link_layer = efa_port_link_layer, .get_port_immutable = efa_get_port_immutable, .mmap = efa_mmap, + .mmap_free = efa_mmap_free, .modify_qp = efa_modify_qp, .query_device = efa_query_device, .query_gid = efa_query_gid, diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index 37e3d62bbb51..b242ea7a3bc8 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -13,10 +13,6 @@ #include "efa.h" -#define EFA_MMAP_FLAG_SHIFT 56 -#define EFA_MMAP_PAGE_MASK GENMASK(EFA_MMAP_FLAG_SHIFT - 1, 0) -#define EFA_MMAP_INVALID U64_MAX - enum { EFA_MMAP_DMA_PAGE = 0, EFA_MMAP_IO_WC, @@ -27,20 +23,12 @@ enum { (BIT(EFA_ADMIN_FATAL_ERROR) | BIT(EFA_ADMIN_WARNING) | \ BIT(EFA_ADMIN_NOTIFICATION) | BIT(EFA_ADMIN_KEEP_ALIVE)) -struct efa_mmap_entry { - void *obj; +struct efa_user_mmap_entry { + struct rdma_user_mmap_entry rdma_entry; u64 address; - u64 length; - u32 mmap_page; u8 mmap_flag; }; -static inline u64 get_mmap_key(const struct efa_mmap_entry *efa) -{ - return ((u64)efa->mmap_flag << EFA_MMAP_FLAG_SHIFT) | - ((u64)efa->mmap_page << PAGE_SHIFT); -} - #define EFA_DEFINE_STATS(op) \ op(EFA_TX_BYTES, "tx_bytes") \ op(EFA_TX_PKTS, "tx_pkts") \ @@ -147,6 +135,12 @@ static inline struct efa_ah *to_eah(struct ib_ah *ibah) return container_of(ibah, struct efa_ah, ibah); } +static inline struct efa_user_mmap_entry * +to_emmap(struct rdma_user_mmap_entry *rdma_entry) +{ + return container_of(rdma_entry, struct efa_user_mmap_entry, rdma_entry); +} + #define field_avail(x, fld, sz) (offsetof(typeof(x), fld) + \ FIELD_SIZEOF(typeof(x), fld) <= (sz)) @@ -172,106 +166,6 @@ static void *efa_zalloc_mapped(struct efa_dev *dev, dma_addr_t *dma_addr, return addr; } -/* - * This is only called when the ucontext is destroyed and there can be no - * concurrent query via mmap or allocate on the xarray, thus we can be sure no - * other thread is using the entry pointer. We also know that all the BAR - * pages have either been zap'd or munmaped at this point. Normal pages are - * refcounted and will be freed at the proper time. - */ -static void mmap_entries_remove_free(struct efa_dev *dev, - struct efa_ucontext *ucontext) -{ - struct efa_mmap_entry *entry; - unsigned long mmap_page; - - xa_for_each(&ucontext->mmap_xa, mmap_page, entry) { - xa_erase(&ucontext->mmap_xa, mmap_page); - - ibdev_dbg( - &dev->ibdev, - "mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx] removed\n", - entry->obj, get_mmap_key(entry), entry->address, - entry->length); - if (entry->mmap_flag == EFA_MMAP_DMA_PAGE) - /* DMA mapping is already gone, now free the pages */ - free_pages_exact(phys_to_virt(entry->address), - entry->length); - kfree(entry); - } -} - -static struct efa_mmap_entry *mmap_entry_get(struct efa_dev *dev, - struct efa_ucontext *ucontext, - u64 key, u64 len) -{ - struct efa_mmap_entry *entry; - u64 mmap_page; - - mmap_page = (key & EFA_MMAP_PAGE_MASK) >> PAGE_SHIFT; - if (mmap_page > U32_MAX) - return NULL; - - entry = xa_load(&ucontext->mmap_xa, mmap_page); - if (!entry || get_mmap_key(entry) != key || entry->length != len) - return NULL; - - ibdev_dbg(&dev->ibdev, - "mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx] removed\n", - entry->obj, key, entry->address, entry->length); - - return entry; -} - -/* - * Note this locking scheme cannot support removal of entries, except during - * ucontext destruction when the core code guarentees no concurrency. - */ -static u64 mmap_entry_insert(struct efa_dev *dev, struct efa_ucontext *ucontext, - void *obj, u64 address, u64 length, u8 mmap_flag) -{ - struct efa_mmap_entry *entry; - u32 next_mmap_page; - int err; - - entry = kmalloc(sizeof(*entry), GFP_KERNEL); - if (!entry) - return EFA_MMAP_INVALID; - - entry->obj = obj; - entry->address = address; - entry->length = length; - entry->mmap_flag = mmap_flag; - - xa_lock(&ucontext->mmap_xa); - if (check_add_overflow(ucontext->mmap_xa_page, - (u32)(length >> PAGE_SHIFT), - &next_mmap_page)) - goto err_unlock; - - entry->mmap_page = ucontext->mmap_xa_page; - ucontext->mmap_xa_page = next_mmap_page; - err = __xa_insert(&ucontext->mmap_xa, entry->mmap_page, entry, - GFP_KERNEL); - if (err) - goto err_unlock; - - xa_unlock(&ucontext->mmap_xa); - - ibdev_dbg( - &dev->ibdev, - "mmap: obj[0x%p] addr[%#llx], len[%#llx], key[%#llx] inserted\n", - entry->obj, entry->address, entry->length, get_mmap_key(entry)); - - return get_mmap_key(entry); - -err_unlock: - xa_unlock(&ucontext->mmap_xa); - kfree(entry); - return EFA_MMAP_INVALID; - -} - int efa_query_device(struct ib_device *ibdev, struct ib_device_attr *props, struct ib_udata *udata) @@ -485,8 +379,19 @@ static int efa_destroy_qp_handle(struct efa_dev *dev, u32 qp_handle) return efa_com_destroy_qp(&dev->edev, ¶ms); } +static void efa_qp_user_mmap_entries_remove(struct efa_ucontext *uctx, + struct efa_qp *qp) +{ + rdma_user_mmap_entry_remove(qp->rq_mmap_entry); + rdma_user_mmap_entry_remove(qp->rq_db_mmap_entry); + rdma_user_mmap_entry_remove(qp->llq_desc_mmap_entry); + rdma_user_mmap_entry_remove(qp->sq_db_mmap_entry); +} + int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) { + struct efa_ucontext *ucontext = rdma_udata_to_drv_context(udata, + struct efa_ucontext, ibucontext); struct efa_dev *dev = to_edev(ibqp->pd->device); struct efa_qp *qp = to_eqp(ibqp); int err; @@ -505,61 +410,101 @@ int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) DMA_TO_DEVICE); } + efa_qp_user_mmap_entries_remove(ucontext, qp); kfree(qp); return 0; } +static struct rdma_user_mmap_entry* +efa_user_mmap_entry_insert(struct ib_ucontext *ucontext, + u64 address, size_t length, + u8 mmap_flag, u64 *offset) +{ + struct efa_user_mmap_entry *entry = kzalloc(sizeof(*entry), GFP_KERNEL); + int err; + + if (!entry) + return NULL; + + entry->address = address; + entry->mmap_flag = mmap_flag; + + err = rdma_user_mmap_entry_insert(ucontext, &entry->rdma_entry, + length); + if (err) { + kfree(entry); + return NULL; + } + *offset = rdma_user_mmap_get_offset(&entry->rdma_entry); + + return &entry->rdma_entry; +} + static int qp_mmap_entries_setup(struct efa_qp *qp, struct efa_dev *dev, struct efa_ucontext *ucontext, struct efa_com_create_qp_params *params, struct efa_ibv_create_qp_resp *resp) { - /* - * Once an entry is inserted it might be mmapped, hence cannot be - * cleaned up until dealloc_ucontext. - */ - resp->sq_db_mmap_key = - mmap_entry_insert(dev, ucontext, qp, - dev->db_bar_addr + resp->sq_db_offset, - PAGE_SIZE, EFA_MMAP_IO_NC); - if (resp->sq_db_mmap_key == EFA_MMAP_INVALID) + size_t length; + u64 address; + + address = dev->db_bar_addr + resp->sq_db_offset; + qp->sq_db_mmap_entry = + efa_user_mmap_entry_insert(&ucontext->ibucontext, + address, + PAGE_SIZE, EFA_MMAP_IO_NC, + &resp->sq_db_mmap_key); + if (!qp->sq_db_mmap_entry) return -ENOMEM; resp->sq_db_offset &= ~PAGE_MASK; - resp->llq_desc_mmap_key = - mmap_entry_insert(dev, ucontext, qp, - dev->mem_bar_addr + resp->llq_desc_offset, - PAGE_ALIGN(params->sq_ring_size_in_bytes + - (resp->llq_desc_offset & ~PAGE_MASK)), - EFA_MMAP_IO_WC); - if (resp->llq_desc_mmap_key == EFA_MMAP_INVALID) - return -ENOMEM; + address = dev->mem_bar_addr + resp->llq_desc_offset; + length = PAGE_ALIGN(params->sq_ring_size_in_bytes + + (resp->llq_desc_offset & ~PAGE_MASK)); + + qp->llq_desc_mmap_entry = + efa_user_mmap_entry_insert(&ucontext->ibucontext, + address, length, + EFA_MMAP_IO_WC, + &resp->llq_desc_mmap_key); + if (!qp->llq_desc_mmap_entry) + goto err_remove_mmap; resp->llq_desc_offset &= ~PAGE_MASK; if (qp->rq_size) { - resp->rq_db_mmap_key = - mmap_entry_insert(dev, ucontext, qp, - dev->db_bar_addr + resp->rq_db_offset, - PAGE_SIZE, EFA_MMAP_IO_NC); - if (resp->rq_db_mmap_key == EFA_MMAP_INVALID) - return -ENOMEM; + address = dev->db_bar_addr + resp->rq_db_offset; + + qp->rq_db_mmap_entry = + efa_user_mmap_entry_insert(&ucontext->ibucontext, + address, PAGE_SIZE, + EFA_MMAP_IO_NC, + &resp->rq_db_mmap_key); + if (!qp->rq_db_mmap_entry) + goto err_remove_mmap; resp->rq_db_offset &= ~PAGE_MASK; - resp->rq_mmap_key = - mmap_entry_insert(dev, ucontext, qp, - virt_to_phys(qp->rq_cpu_addr), - qp->rq_size, EFA_MMAP_DMA_PAGE); - if (resp->rq_mmap_key == EFA_MMAP_INVALID) - return -ENOMEM; + address = virt_to_phys(qp->rq_cpu_addr); + qp->rq_mmap_entry = + efa_user_mmap_entry_insert(&ucontext->ibucontext, + address, qp->rq_size, + EFA_MMAP_DMA_PAGE, + &resp->rq_mmap_key); + if (!qp->rq_mmap_entry) + goto err_remove_mmap; resp->rq_mmap_size = qp->rq_size; } return 0; + +err_remove_mmap: + efa_qp_user_mmap_entries_remove(ucontext, qp); + + return -ENOMEM; } static int efa_qp_validate_cap(struct efa_dev *dev, @@ -634,7 +579,6 @@ struct ib_qp *efa_create_qp(struct ib_pd *ibpd, struct efa_dev *dev = to_edev(ibpd->device); struct efa_ibv_create_qp_resp resp = {}; struct efa_ibv_create_qp cmd = {}; - bool rq_entry_inserted = false; struct efa_ucontext *ucontext; struct efa_qp *qp; int err; @@ -742,7 +686,6 @@ struct ib_qp *efa_create_qp(struct ib_pd *ibpd, if (err) goto err_destroy_qp; - rq_entry_inserted = true; qp->qp_handle = create_qp_resp.qp_handle; qp->ibqp.qp_num = create_qp_resp.qp_num; qp->ibqp.qp_type = init_attr->qp_type; @@ -759,7 +702,7 @@ struct ib_qp *efa_create_qp(struct ib_pd *ibpd, ibdev_dbg(&dev->ibdev, "Failed to copy udata for qp[%u]\n", create_qp_resp.qp_num); - goto err_destroy_qp; + goto err_remove_mmap_entries; } } @@ -767,13 +710,16 @@ struct ib_qp *efa_create_qp(struct ib_pd *ibpd, return &qp->ibqp; +err_remove_mmap_entries: + efa_qp_user_mmap_entries_remove(ucontext, qp); err_destroy_qp: efa_destroy_qp_handle(dev, create_qp_resp.qp_handle); err_free_mapped: if (qp->rq_size) { dma_unmap_single(&dev->pdev->dev, qp->rq_dma_addr, qp->rq_size, DMA_TO_DEVICE); - if (!rq_entry_inserted) + + if (!qp->rq_mmap_entry) free_pages_exact(qp->rq_cpu_addr, qp->rq_size); } err_free_qp: @@ -897,16 +843,18 @@ void efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) efa_destroy_cq_idx(dev, cq->cq_idx); dma_unmap_single(&dev->pdev->dev, cq->dma_addr, cq->size, DMA_FROM_DEVICE); + rdma_user_mmap_entry_remove(cq->mmap_entry); } static int cq_mmap_entries_setup(struct efa_dev *dev, struct efa_cq *cq, struct efa_ibv_create_cq_resp *resp) { resp->q_mmap_size = cq->size; - resp->q_mmap_key = mmap_entry_insert(dev, cq->ucontext, cq, - virt_to_phys(cq->cpu_addr), - cq->size, EFA_MMAP_DMA_PAGE); - if (resp->q_mmap_key == EFA_MMAP_INVALID) + cq->mmap_entry = efa_user_mmap_entry_insert(&cq->ucontext->ibucontext, + virt_to_phys(cq->cpu_addr), + cq->size, EFA_MMAP_DMA_PAGE, + &resp->q_mmap_key); + if (!cq->mmap_entry) return -ENOMEM; return 0; @@ -924,7 +872,6 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct efa_dev *dev = to_edev(ibdev); struct efa_ibv_create_cq cmd = {}; struct efa_cq *cq = to_ecq(ibcq); - bool cq_entry_inserted = false; int entries = attr->cqe; int err; @@ -1013,15 +960,13 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, goto err_destroy_cq; } - cq_entry_inserted = true; - if (udata->outlen) { err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen)); if (err) { ibdev_dbg(ibdev, "Failed to copy udata for create_cq\n"); - goto err_destroy_cq; + goto err_remove_mmap; } } @@ -1030,13 +975,16 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, return 0; +err_remove_mmap: + rdma_user_mmap_entry_remove(cq->mmap_entry); err_destroy_cq: efa_destroy_cq_idx(dev, cq->cq_idx); err_free_mapped: dma_unmap_single(&dev->pdev->dev, cq->dma_addr, cq->size, DMA_FROM_DEVICE); - if (!cq_entry_inserted) + if (!cq->mmap_entry) free_pages_exact(cq->cpu_addr, cq->size); + err_out: atomic64_inc(&dev->stats.sw_stats.create_cq_err); return err; @@ -1556,7 +1504,6 @@ int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata) goto err_out; ucontext->uarn = result.uarn; - xa_init(&ucontext->mmap_xa); resp.cmds_supp_udata_mask |= EFA_USER_CMDS_SUPP_UDATA_QUERY_DEVICE; resp.cmds_supp_udata_mask |= EFA_USER_CMDS_SUPP_UDATA_CREATE_AH; @@ -1585,40 +1532,56 @@ void efa_dealloc_ucontext(struct ib_ucontext *ibucontext) struct efa_ucontext *ucontext = to_eucontext(ibucontext); struct efa_dev *dev = to_edev(ibucontext->device); - mmap_entries_remove_free(dev, ucontext); efa_dealloc_uar(dev, ucontext->uarn); } +void efa_mmap_free(struct rdma_user_mmap_entry *rdma_entry) +{ + struct efa_user_mmap_entry *entry = to_emmap(rdma_entry); + + /* DMA mapping is already gone, now free the pages */ + if (entry->mmap_flag == EFA_MMAP_DMA_PAGE) + free_pages_exact(phys_to_virt(entry->address), + entry->rdma_entry.npages * PAGE_SIZE); + kfree(entry); +} + static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext, - struct vm_area_struct *vma, u64 key, u64 length) + struct vm_area_struct *vma) { - struct efa_mmap_entry *entry; + struct rdma_user_mmap_entry *rdma_entry; + struct efa_user_mmap_entry *entry; unsigned long va; + int err = 0; u64 pfn; - int err; - entry = mmap_entry_get(dev, ucontext, key, length); - if (!entry) { - ibdev_dbg(&dev->ibdev, "key[%#llx] does not have valid entry\n", - key); + rdma_entry = rdma_user_mmap_entry_get(&ucontext->ibucontext, vma); + if (!rdma_entry) { + ibdev_dbg(&dev->ibdev, + "pgoff[%#lx] does not have valid entry\n", + vma->vm_pgoff); return -EINVAL; } + entry = to_emmap(rdma_entry); ibdev_dbg(&dev->ibdev, - "Mapping address[%#llx], length[%#llx], mmap_flag[%d]\n", - entry->address, length, entry->mmap_flag); + "Mapping address[%#llx], length[%#zx], mmap_flag[%d]\n", + entry->address, rdma_entry->npages * PAGE_SIZE, + entry->mmap_flag); pfn = entry->address >> PAGE_SHIFT; switch (entry->mmap_flag) { case EFA_MMAP_IO_NC: - err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, length, + err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, + entry->rdma_entry.npages * PAGE_SIZE, pgprot_noncached(vma->vm_page_prot), - NULL); + rdma_entry); break; case EFA_MMAP_IO_WC: - err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, length, + err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, + entry->rdma_entry.npages * PAGE_SIZE, pgprot_writecombine(vma->vm_page_prot), - NULL); + rdma_entry); break; case EFA_MMAP_DMA_PAGE: for (va = vma->vm_start; va < vma->vm_end; @@ -1635,12 +1598,13 @@ static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext, if (err) { ibdev_dbg( &dev->ibdev, - "Couldn't mmap address[%#llx] length[%#llx] mmap_flag[%d] err[%d]\n", - entry->address, length, entry->mmap_flag, err); - return err; + "Couldn't mmap address[%#llx] length[%#zx] mmap_flag[%d] err[%d]\n", + entry->address, rdma_entry->npages * PAGE_SIZE, + entry->mmap_flag, err); } - return 0; + rdma_user_mmap_entry_put(rdma_entry); + return err; } int efa_mmap(struct ib_ucontext *ibucontext, @@ -1648,26 +1612,13 @@ int efa_mmap(struct ib_ucontext *ibucontext, { struct efa_ucontext *ucontext = to_eucontext(ibucontext); struct efa_dev *dev = to_edev(ibucontext->device); - u64 length = vma->vm_end - vma->vm_start; - u64 key = vma->vm_pgoff << PAGE_SHIFT; + size_t length = vma->vm_end - vma->vm_start; ibdev_dbg(&dev->ibdev, - "start %#lx, end %#lx, length = %#llx, key = %#llx\n", - vma->vm_start, vma->vm_end, length, key); - - if (length % PAGE_SIZE != 0 || !(vma->vm_flags & VM_SHARED)) { - ibdev_dbg(&dev->ibdev, - "length[%#llx] is not page size aligned[%#lx] or VM_SHARED is not set [%#lx]\n", - length, PAGE_SIZE, vma->vm_flags); - return -EINVAL; - } - - if (vma->vm_flags & VM_EXEC) { - ibdev_dbg(&dev->ibdev, "Mapping executable pages is not permitted\n"); - return -EPERM; - } + "start %#lx, end %#lx, length = %#zx, pgoff = %#lx\n", + vma->vm_start, vma->vm_end, length, vma->vm_pgoff); - return __efa_mmap(dev, ucontext, vma, key, length); + return __efa_mmap(dev, ucontext, vma); } static int efa_ah_destroy(struct efa_dev *dev, struct efa_ah *ah) -- cgit From 4c6bb02d598003e525e83532280ae895213aab20 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Wed, 30 Oct 2019 11:44:15 +0200 Subject: RDMA/qedr: Use the common mmap API Remove all functions related to mmap from qedr and use the common API. Link: https://lore.kernel.org/r/20191030094417.16866-7-michal.kalderon@marvell.com Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qedr/main.c | 1 + drivers/infiniband/hw/qedr/qedr.h | 30 +++--- drivers/infiniband/hw/qedr/verbs.c | 185 ++++++++++++++++--------------------- drivers/infiniband/hw/qedr/verbs.h | 3 +- 4 files changed, 98 insertions(+), 121 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index 1ff5407270d2..03fd168cccc0 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -212,6 +212,7 @@ static const struct ib_device_ops qedr_dev_ops = { .get_link_layer = qedr_link_layer, .map_mr_sg = qedr_map_mr_sg, .mmap = qedr_mmap, + .mmap_free = qedr_mmap_free, .modify_port = qedr_modify_port, .modify_qp = qedr_modify_qp, .modify_srq = qedr_modify_srq, diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h index 8e927f6c1520..6ef39dccba21 100644 --- a/drivers/infiniband/hw/qedr/qedr.h +++ b/drivers/infiniband/hw/qedr/qedr.h @@ -231,14 +231,10 @@ struct qedr_ucontext { struct qedr_dev *dev; struct qedr_pd *pd; void __iomem *dpi_addr; + struct rdma_user_mmap_entry *db_mmap_entry; u64 dpi_phys_addr; u32 dpi_size; u16 dpi; - - struct list_head mm_head; - - /* Lock to protect mm list */ - struct mutex mm_list_lock; }; union db_prod64 { @@ -301,14 +297,6 @@ struct qedr_pd { struct qedr_ucontext *uctx; }; -struct qedr_mm { - struct { - u64 phy_addr; - unsigned long len; - } key; - struct list_head entry; -}; - union db_prod32 { struct rdma_pwm_val16_data data; u32 raw; @@ -491,6 +479,15 @@ struct qedr_mr { u32 npages; }; +struct qedr_user_mmap_entry { + struct rdma_user_mmap_entry rdma_entry; + struct qedr_dev *dev; + u64 io_address; + size_t length; + u16 dpi; + u8 mmap_flag; +}; + #define SET_FIELD2(value, name, flag) ((value) |= ((flag) << (name ## _SHIFT))) #define QEDR_RESP_IMM (RDMA_CQE_RESPONDER_IMM_FLG_MASK << \ @@ -589,4 +586,11 @@ static inline struct qedr_srq *get_qedr_srq(struct ib_srq *ibsrq) { return container_of(ibsrq, struct qedr_srq, ibsrq); } + +static inline struct qedr_user_mmap_entry * +get_qedr_mmap_entry(struct rdma_user_mmap_entry *rdma_entry) +{ + return container_of(rdma_entry, struct qedr_user_mmap_entry, + rdma_entry); +} #endif diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 8b4240c1cc76..903cd934eb53 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -59,6 +59,10 @@ #define DB_ADDR_SHIFT(addr) ((addr) << DB_PWM_ADDR_OFFSET_SHIFT) +enum { + QEDR_USER_MMAP_IO_WC = 0, +}; + static inline int qedr_ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len) { @@ -257,60 +261,6 @@ int qedr_modify_port(struct ib_device *ibdev, u8 port, int mask, return 0; } -static int qedr_add_mmap(struct qedr_ucontext *uctx, u64 phy_addr, - unsigned long len) -{ - struct qedr_mm *mm; - - mm = kzalloc(sizeof(*mm), GFP_KERNEL); - if (!mm) - return -ENOMEM; - - mm->key.phy_addr = phy_addr; - /* This function might be called with a length which is not a multiple - * of PAGE_SIZE, while the mapping is PAGE_SIZE grained and the kernel - * forces this granularity by increasing the requested size if needed. - * When qedr_mmap is called, it will search the list with the updated - * length as a key. To prevent search failures, the length is rounded up - * in advance to PAGE_SIZE. - */ - mm->key.len = roundup(len, PAGE_SIZE); - INIT_LIST_HEAD(&mm->entry); - - mutex_lock(&uctx->mm_list_lock); - list_add(&mm->entry, &uctx->mm_head); - mutex_unlock(&uctx->mm_list_lock); - - DP_DEBUG(uctx->dev, QEDR_MSG_MISC, - "added (addr=0x%llx,len=0x%lx) for ctx=%p\n", - (unsigned long long)mm->key.phy_addr, - (unsigned long)mm->key.len, uctx); - - return 0; -} - -static bool qedr_search_mmap(struct qedr_ucontext *uctx, u64 phy_addr, - unsigned long len) -{ - bool found = false; - struct qedr_mm *mm; - - mutex_lock(&uctx->mm_list_lock); - list_for_each_entry(mm, &uctx->mm_head, entry) { - if (len != mm->key.len || phy_addr != mm->key.phy_addr) - continue; - - found = true; - break; - } - mutex_unlock(&uctx->mm_list_lock); - DP_DEBUG(uctx->dev, QEDR_MSG_MISC, - "searched for (addr=0x%llx,len=0x%lx) for ctx=%p, result=%d\n", - mm->key.phy_addr, mm->key.len, uctx, found); - - return found; -} - int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) { struct ib_device *ibdev = uctx->device; @@ -319,6 +269,7 @@ int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) struct qedr_alloc_ucontext_resp uresp = {}; struct qedr_dev *dev = get_qedr_dev(ibdev); struct qed_rdma_add_user_out_params oparams; + struct qedr_user_mmap_entry *entry; if (!udata) return -EFAULT; @@ -335,13 +286,29 @@ int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) ctx->dpi_addr = oparams.dpi_addr; ctx->dpi_phys_addr = oparams.dpi_phys_addr; ctx->dpi_size = oparams.dpi_size; - INIT_LIST_HEAD(&ctx->mm_head); - mutex_init(&ctx->mm_list_lock); + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) { + rc = -ENOMEM; + goto err; + } + + entry->io_address = ctx->dpi_phys_addr; + entry->length = ctx->dpi_size; + entry->mmap_flag = QEDR_USER_MMAP_IO_WC; + entry->dpi = ctx->dpi; + entry->dev = dev; + rc = rdma_user_mmap_entry_insert(uctx, &entry->rdma_entry, + ctx->dpi_size); + if (rc) { + kfree(entry); + goto err; + } + ctx->db_mmap_entry = &entry->rdma_entry; uresp.dpm_enabled = dev->user_dpm_enabled; uresp.wids_enabled = 1; uresp.wid_count = oparams.wid_count; - uresp.db_pa = ctx->dpi_phys_addr; + uresp.db_pa = rdma_user_mmap_get_offset(ctx->db_mmap_entry); uresp.db_size = ctx->dpi_size; uresp.max_send_wr = dev->attr.max_sqe; uresp.max_recv_wr = dev->attr.max_rqe; @@ -353,82 +320,86 @@ int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp)); if (rc) - return rc; + goto err; ctx->dev = dev; - rc = qedr_add_mmap(ctx, ctx->dpi_phys_addr, ctx->dpi_size); - if (rc) - return rc; - DP_DEBUG(dev, QEDR_MSG_INIT, "Allocating user context %p\n", &ctx->ibucontext); return 0; + +err: + if (!ctx->db_mmap_entry) + dev->ops->rdma_remove_user(dev->rdma_ctx, ctx->dpi); + else + rdma_user_mmap_entry_remove(ctx->db_mmap_entry); + + return rc; } void qedr_dealloc_ucontext(struct ib_ucontext *ibctx) { struct qedr_ucontext *uctx = get_qedr_ucontext(ibctx); - struct qedr_mm *mm, *tmp; DP_DEBUG(uctx->dev, QEDR_MSG_INIT, "Deallocating user context %p\n", uctx); - uctx->dev->ops->rdma_remove_user(uctx->dev->rdma_ctx, uctx->dpi); - - list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) { - DP_DEBUG(uctx->dev, QEDR_MSG_MISC, - "deleted (addr=0x%llx,len=0x%lx) for ctx=%p\n", - mm->key.phy_addr, mm->key.len, uctx); - list_del(&mm->entry); - kfree(mm); - } + + rdma_user_mmap_entry_remove(uctx->db_mmap_entry); } -int qedr_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) +void qedr_mmap_free(struct rdma_user_mmap_entry *rdma_entry) { - struct qedr_ucontext *ucontext = get_qedr_ucontext(context); - struct qedr_dev *dev = get_qedr_dev(context->device); - unsigned long phys_addr = vma->vm_pgoff << PAGE_SHIFT; - unsigned long len = (vma->vm_end - vma->vm_start); - unsigned long dpi_start; + struct qedr_user_mmap_entry *entry = get_qedr_mmap_entry(rdma_entry); + struct qedr_dev *dev = entry->dev; - dpi_start = dev->db_phys_addr + (ucontext->dpi * ucontext->dpi_size); + if (entry->mmap_flag == QEDR_USER_MMAP_IO_WC) + dev->ops->rdma_remove_user(dev->rdma_ctx, entry->dpi); - DP_DEBUG(dev, QEDR_MSG_INIT, - "mmap invoked with vm_start=0x%pK, vm_end=0x%pK,vm_pgoff=0x%pK; dpi_start=0x%pK dpi_size=0x%x\n", - (void *)vma->vm_start, (void *)vma->vm_end, - (void *)vma->vm_pgoff, (void *)dpi_start, ucontext->dpi_size); + kfree(entry); +} - if ((vma->vm_start & (PAGE_SIZE - 1)) || (len & (PAGE_SIZE - 1))) { - DP_ERR(dev, - "failed mmap, addresses must be page aligned: start=0x%pK, end=0x%pK\n", - (void *)vma->vm_start, (void *)vma->vm_end); - return -EINVAL; - } +int qedr_mmap(struct ib_ucontext *ucontext, struct vm_area_struct *vma) +{ + struct ib_device *dev = ucontext->device; + size_t length = vma->vm_end - vma->vm_start; + struct rdma_user_mmap_entry *rdma_entry; + struct qedr_user_mmap_entry *entry; + int rc = 0; + u64 pfn; - if (!qedr_search_mmap(ucontext, phys_addr, len)) { - DP_ERR(dev, "failed mmap, vm_pgoff=0x%lx is not authorized\n", - vma->vm_pgoff); - return -EINVAL; - } + ibdev_dbg(dev, + "start %#lx, end %#lx, length = %#zx, pgoff = %#lx\n", + vma->vm_start, vma->vm_end, length, vma->vm_pgoff); - if (phys_addr < dpi_start || - ((phys_addr + len) > (dpi_start + ucontext->dpi_size))) { - DP_ERR(dev, - "failed mmap, pages are outside of dpi; page address=0x%pK, dpi_start=0x%pK, dpi_size=0x%x\n", - (void *)phys_addr, (void *)dpi_start, - ucontext->dpi_size); + rdma_entry = rdma_user_mmap_entry_get(ucontext, vma); + if (!rdma_entry) { + ibdev_dbg(dev, "pgoff[%#lx] does not have valid entry\n", + vma->vm_pgoff); return -EINVAL; } - - if (vma->vm_flags & VM_READ) { - DP_ERR(dev, "failed mmap, cannot map doorbell bar for read\n"); - return -EINVAL; + entry = get_qedr_mmap_entry(rdma_entry); + ibdev_dbg(dev, + "Mapping address[%#llx], length[%#zx], mmap_flag[%d]\n", + entry->io_address, length, entry->mmap_flag); + + switch (entry->mmap_flag) { + case QEDR_USER_MMAP_IO_WC: + pfn = entry->io_address >> PAGE_SHIFT; + rc = rdma_user_mmap_io(ucontext, vma, pfn, length, + pgprot_writecombine(vma->vm_page_prot), + rdma_entry); + break; + default: + rc = -EINVAL; } - vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); - return io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, len, - vma->vm_page_prot); + if (rc) + ibdev_dbg(dev, + "Couldn't mmap address[%#llx] length[%#zx] mmap_flag[%d] err[%d]\n", + entry->io_address, length, entry->mmap_flag, rc); + + rdma_user_mmap_entry_put(rdma_entry); + return rc; } int qedr_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h index 9aaa90283d6e..3606e97e95da 100644 --- a/drivers/infiniband/hw/qedr/verbs.h +++ b/drivers/infiniband/hw/qedr/verbs.h @@ -46,7 +46,8 @@ int qedr_query_pkey(struct ib_device *, u8 port, u16 index, u16 *pkey); int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata); void qedr_dealloc_ucontext(struct ib_ucontext *uctx); -int qedr_mmap(struct ib_ucontext *, struct vm_area_struct *vma); +int qedr_mmap(struct ib_ucontext *ucontext, struct vm_area_struct *vma); +void qedr_mmap_free(struct rdma_user_mmap_entry *rdma_entry); int qedr_alloc_pd(struct ib_pd *pd, struct ib_udata *udata); void qedr_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); -- cgit From 97f612509294aadabb8b431782794544df10cd13 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Wed, 30 Oct 2019 11:44:16 +0200 Subject: RDMA/qedr: Add doorbell overflow recovery support Use the doorbell recovery mechanism to register rdma related doorbells that will be restored in case there is a doorbell overflow attention. Link: https://lore.kernel.org/r/20191030094417.16866-8-michal.kalderon@marvell.com Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qedr/qedr.h | 11 +- drivers/infiniband/hw/qedr/verbs.c | 314 +++++++++++++++++++++++++++++++------ 2 files changed, 275 insertions(+), 50 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h index 6ef39dccba21..639d521ed319 100644 --- a/drivers/infiniband/hw/qedr/qedr.h +++ b/drivers/infiniband/hw/qedr/qedr.h @@ -235,6 +235,7 @@ struct qedr_ucontext { u64 dpi_phys_addr; u32 dpi_size; u16 dpi; + bool db_rec; }; union db_prod64 { @@ -262,6 +263,11 @@ struct qedr_userq { struct qedr_pbl *pbl_tbl; u64 buf_addr; size_t buf_len; + + /* doorbell recovery */ + void __iomem *db_addr; + struct qedr_user_db_rec *db_rec_data; + struct rdma_user_mmap_entry *db_mmap_entry; }; struct qedr_cq { @@ -482,7 +488,10 @@ struct qedr_mr { struct qedr_user_mmap_entry { struct rdma_user_mmap_entry rdma_entry; struct qedr_dev *dev; - u64 io_address; + union { + u64 io_address; + void *address; + }; size_t length; u16 dpi; u8 mmap_flag; diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 903cd934eb53..bc4a35b5581c 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -61,6 +61,7 @@ enum { QEDR_USER_MMAP_IO_WC = 0, + QEDR_USER_MMAP_PHYS_PAGE, }; static inline int qedr_ib_copy_to_udata(struct ib_udata *udata, void *src, @@ -267,6 +268,7 @@ int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) int rc; struct qedr_ucontext *ctx = get_qedr_ucontext(uctx); struct qedr_alloc_ucontext_resp uresp = {}; + struct qedr_alloc_ucontext_req ureq = {}; struct qedr_dev *dev = get_qedr_dev(ibdev); struct qed_rdma_add_user_out_params oparams; struct qedr_user_mmap_entry *entry; @@ -274,6 +276,17 @@ int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) if (!udata) return -EFAULT; + if (udata->inlen) { + rc = ib_copy_from_udata(&ureq, udata, + min(sizeof(ureq), udata->inlen)); + if (rc) { + DP_ERR(dev, "Problem copying data from user space\n"); + return -EFAULT; + } + + ctx->db_rec = !!(ureq.context_flags & QEDR_ALLOC_UCTX_DB_REC); + } + rc = dev->ops->rdma_add_user(dev->rdma_ctx, &oparams); if (rc) { DP_ERR(dev, @@ -352,7 +365,9 @@ void qedr_mmap_free(struct rdma_user_mmap_entry *rdma_entry) struct qedr_user_mmap_entry *entry = get_qedr_mmap_entry(rdma_entry); struct qedr_dev *dev = entry->dev; - if (entry->mmap_flag == QEDR_USER_MMAP_IO_WC) + if (entry->mmap_flag == QEDR_USER_MMAP_PHYS_PAGE) + free_page((unsigned long)entry->address); + else if (entry->mmap_flag == QEDR_USER_MMAP_IO_WC) dev->ops->rdma_remove_user(dev->rdma_ctx, entry->dpi); kfree(entry); @@ -389,6 +404,10 @@ int qedr_mmap(struct ib_ucontext *ucontext, struct vm_area_struct *vma) pgprot_writecombine(vma->vm_page_prot), rdma_entry); break; + case QEDR_USER_MMAP_PHYS_PAGE: + rc = vm_insert_page(vma, vma->vm_start, + virt_to_page(entry->address)); + break; default: rc = -EINVAL; } @@ -629,16 +648,48 @@ static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem, } } +static int qedr_db_recovery_add(struct qedr_dev *dev, + void __iomem *db_addr, + void *db_data, + enum qed_db_rec_width db_width, + enum qed_db_rec_space db_space) +{ + if (!db_data) { + DP_DEBUG(dev, QEDR_MSG_INIT, "avoiding db rec since old lib\n"); + return 0; + } + + return dev->ops->common->db_recovery_add(dev->cdev, db_addr, db_data, + db_width, db_space); +} + +static void qedr_db_recovery_del(struct qedr_dev *dev, + void __iomem *db_addr, + void *db_data) +{ + if (!db_data) { + DP_DEBUG(dev, QEDR_MSG_INIT, "avoiding db rec since old lib\n"); + return; + } + + /* Ignore return code as there is not much we can do about it. Error + * log will be printed inside. + */ + dev->ops->common->db_recovery_del(dev->cdev, db_addr, db_data); +} + static int qedr_copy_cq_uresp(struct qedr_dev *dev, - struct qedr_cq *cq, struct ib_udata *udata) + struct qedr_cq *cq, struct ib_udata *udata, + u32 db_offset) { struct qedr_create_cq_uresp uresp; int rc; memset(&uresp, 0, sizeof(uresp)); - uresp.db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT); + uresp.db_offset = db_offset; uresp.icid = cq->icid; + uresp.db_rec_addr = rdma_user_mmap_get_offset(cq->q.db_mmap_entry); rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp)); if (rc) @@ -666,10 +717,58 @@ static inline int qedr_align_cq_entries(int entries) return aligned_size / QEDR_CQE_SIZE; } +static int qedr_init_user_db_rec(struct ib_udata *udata, + struct qedr_dev *dev, struct qedr_userq *q, + bool requires_db_rec) +{ + struct qedr_ucontext *uctx = + rdma_udata_to_drv_context(udata, struct qedr_ucontext, + ibucontext); + struct qedr_user_mmap_entry *entry; + int rc; + + /* Aborting for non doorbell userqueue (SRQ) or non-supporting lib */ + if (requires_db_rec == 0 || !uctx->db_rec) + return 0; + + /* Allocate a page for doorbell recovery, add to mmap */ + q->db_rec_data = (void *)get_zeroed_page(GFP_USER); + if (!q->db_rec_data) { + DP_ERR(dev, "get_zeroed_page failed\n"); + return -ENOMEM; + } + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + goto err_free_db_data; + + entry->address = q->db_rec_data; + entry->length = PAGE_SIZE; + entry->mmap_flag = QEDR_USER_MMAP_PHYS_PAGE; + rc = rdma_user_mmap_entry_insert(&uctx->ibucontext, + &entry->rdma_entry, + PAGE_SIZE); + if (rc) + goto err_free_entry; + + q->db_mmap_entry = &entry->rdma_entry; + + return 0; + +err_free_entry: + kfree(entry); + +err_free_db_data: + free_page((unsigned long)q->db_rec_data); + q->db_rec_data = NULL; + return -ENOMEM; +} + static inline int qedr_init_user_queue(struct ib_udata *udata, struct qedr_dev *dev, struct qedr_userq *q, u64 buf_addr, - size_t buf_len, int access, int dmasync, + size_t buf_len, bool requires_db_rec, + int access, int dmasync, int alloc_and_init) { u32 fw_pages; @@ -707,7 +806,8 @@ static inline int qedr_init_user_queue(struct ib_udata *udata, } } - return 0; + /* mmap the user address used to store doorbell data for recovery */ + return qedr_init_user_db_rec(udata, dev, q, requires_db_rec); err0: ib_umem_release(q->umem); @@ -793,6 +893,7 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, int entries = attr->cqe; struct qedr_cq *cq = get_qedr_cq(ibcq); int chain_entries; + u32 db_offset; int page_cnt; u64 pbl_ptr; u16 icid; @@ -812,8 +913,12 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, chain_entries = qedr_align_cq_entries(entries); chain_entries = min_t(int, chain_entries, QEDR_MAX_CQES); + /* calc db offset. user will add DPI base, kernel will add db addr */ + db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT); + if (udata) { - if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) { + if (ib_copy_from_udata(&ureq, udata, min(sizeof(ureq), + udata->inlen))) { DP_ERR(dev, "create cq: problem copying data from user space\n"); goto err0; @@ -828,8 +933,9 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, cq->cq_type = QEDR_CQ_TYPE_USER; rc = qedr_init_user_queue(udata, dev, &cq->q, ureq.addr, - ureq.len, IB_ACCESS_LOCAL_WRITE, 1, - 1); + ureq.len, true, + IB_ACCESS_LOCAL_WRITE, + 1, 1); if (rc) goto err0; @@ -837,6 +943,7 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, page_cnt = cq->q.pbl_info.num_pbes; cq->ibcq.cqe = chain_entries; + cq->q.db_addr = ctx->dpi_addr + db_offset; } else { cq->cq_type = QEDR_CQ_TYPE_KERNEL; @@ -848,7 +955,7 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, sizeof(union rdma_cqe), &cq->pbl, NULL); if (rc) - goto err1; + goto err0; page_cnt = qed_chain_get_page_cnt(&cq->pbl); pbl_ptr = qed_chain_get_pbl_phys(&cq->pbl); @@ -860,21 +967,28 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, rc = dev->ops->rdma_create_cq(dev->rdma_ctx, ¶ms, &icid); if (rc) - goto err2; + goto err1; cq->icid = icid; cq->sig = QEDR_CQ_MAGIC_NUMBER; spin_lock_init(&cq->cq_lock); if (udata) { - rc = qedr_copy_cq_uresp(dev, cq, udata); + rc = qedr_copy_cq_uresp(dev, cq, udata, db_offset); if (rc) - goto err3; + goto err2; + + rc = qedr_db_recovery_add(dev, cq->q.db_addr, + &cq->q.db_rec_data->db_data, + DB_REC_WIDTH_64B, + DB_REC_USER); + if (rc) + goto err2; + } else { /* Generate doorbell address. */ - cq->db_addr = dev->db_addr + - DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT); cq->db.data.icid = cq->icid; + cq->db_addr = dev->db_addr + db_offset; cq->db.data.params = DB_AGG_CMD_SET << RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT; @@ -884,6 +998,11 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, cq->latest_cqe = NULL; consume_cqe(cq); cq->cq_cons = qed_chain_get_cons_idx_u32(&cq->pbl); + + rc = qedr_db_recovery_add(dev, cq->db_addr, &cq->db.data, + DB_REC_WIDTH_64B, DB_REC_KERNEL); + if (rc) + goto err2; } DP_DEBUG(dev, QEDR_MSG_CQ, @@ -892,18 +1011,19 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, return 0; -err3: +err2: destroy_iparams.icid = cq->icid; dev->ops->rdma_destroy_cq(dev->rdma_ctx, &destroy_iparams, &destroy_oparams); -err2: - if (udata) - qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl); - else - dev->ops->common->chain_free(dev->cdev, &cq->pbl); err1: - if (udata) + if (udata) { + qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl); ib_umem_release(cq->q.umem); + if (ctx) + rdma_user_mmap_entry_remove(cq->q.db_mmap_entry); + } else { + dev->ops->common->chain_free(dev->cdev, &cq->pbl); + } err0: return -EINVAL; } @@ -934,8 +1054,10 @@ void qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) cq->destroyed = 1; /* GSIs CQs are handled by driver, so they don't exist in the FW */ - if (cq->cq_type == QEDR_CQ_TYPE_GSI) + if (cq->cq_type == QEDR_CQ_TYPE_GSI) { + qedr_db_recovery_del(dev, cq->db_addr, &cq->db.data); return; + } iparams.icid = cq->icid; dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams); @@ -944,6 +1066,14 @@ void qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) if (udata) { qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl); ib_umem_release(cq->q.umem); + + if (cq->q.db_rec_data) { + qedr_db_recovery_del(dev, cq->q.db_addr, + &cq->q.db_rec_data->db_data); + rdma_user_mmap_entry_remove(cq->q.db_mmap_entry); + } + } else { + qedr_db_recovery_del(dev, cq->db_addr, &cq->db.data); } /* We don't want the IRQ handler to handle a non-existing CQ so we @@ -1108,8 +1238,8 @@ static int qedr_copy_srq_uresp(struct qedr_dev *dev, } static void qedr_copy_rq_uresp(struct qedr_dev *dev, - struct qedr_create_qp_uresp *uresp, - struct qedr_qp *qp) + struct qedr_create_qp_uresp *uresp, + struct qedr_qp *qp) { /* iWARP requires two doorbells per RQ. */ if (rdma_protocol_iwarp(&dev->ibdev, 1)) { @@ -1122,6 +1252,7 @@ static void qedr_copy_rq_uresp(struct qedr_dev *dev, } uresp->rq_icid = qp->icid; + uresp->rq_db_rec_addr = rdma_user_mmap_get_offset(qp->urq.db_mmap_entry); } static void qedr_copy_sq_uresp(struct qedr_dev *dev, @@ -1135,22 +1266,25 @@ static void qedr_copy_sq_uresp(struct qedr_dev *dev, uresp->sq_icid = qp->icid; else uresp->sq_icid = qp->icid + 1; + + uresp->sq_db_rec_addr = + rdma_user_mmap_get_offset(qp->usq.db_mmap_entry); } static int qedr_copy_qp_uresp(struct qedr_dev *dev, - struct qedr_qp *qp, struct ib_udata *udata) + struct qedr_qp *qp, struct ib_udata *udata, + struct qedr_create_qp_uresp *uresp) { - struct qedr_create_qp_uresp uresp; int rc; - memset(&uresp, 0, sizeof(uresp)); - qedr_copy_sq_uresp(dev, &uresp, qp); - qedr_copy_rq_uresp(dev, &uresp, qp); + memset(uresp, 0, sizeof(*uresp)); + qedr_copy_sq_uresp(dev, uresp, qp); + qedr_copy_rq_uresp(dev, uresp, qp); - uresp.atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE; - uresp.qp_id = qp->qp_id; + uresp->atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE; + uresp->qp_id = qp->qp_id; - rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp)); + rc = qedr_ib_copy_to_udata(udata, uresp, sizeof(*uresp)); if (rc) DP_ERR(dev, "create qp: failed a copy to user space with qp icid=0x%x.\n", @@ -1197,16 +1331,35 @@ static void qedr_set_common_qp_params(struct qedr_dev *dev, qp->sq.max_sges, qp->sq_cq->icid); } -static void qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp) +static int qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp) { + int rc; + qp->sq.db = dev->db_addr + DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD); qp->sq.db_data.data.icid = qp->icid + 1; + rc = qedr_db_recovery_add(dev, qp->sq.db, + &qp->sq.db_data, + DB_REC_WIDTH_32B, + DB_REC_KERNEL); + if (rc) + return rc; + if (!qp->srq) { qp->rq.db = dev->db_addr + DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD); qp->rq.db_data.data.icid = qp->icid; + + rc = qedr_db_recovery_add(dev, qp->rq.db, + &qp->rq.db_data, + DB_REC_WIDTH_32B, + DB_REC_KERNEL); + if (rc) + qedr_db_recovery_del(dev, qp->sq.db, + &qp->sq.db_data); } + + return rc; } static int qedr_check_srq_params(struct qedr_dev *dev, @@ -1260,7 +1413,7 @@ static int qedr_init_srq_user_params(struct ib_udata *udata, int rc; rc = qedr_init_user_queue(udata, srq->dev, &srq->usrq, ureq->srq_addr, - ureq->srq_len, access, dmasync, 1); + ureq->srq_len, false, access, dmasync, 1); if (rc) return rc; @@ -1356,7 +1509,8 @@ int qedr_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr, hw_srq->max_sges = init_attr->attr.max_sge; if (udata) { - if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) { + if (ib_copy_from_udata(&ureq, udata, min(sizeof(ureq), + udata->inlen))) { DP_ERR(dev, "create srq: problem copying data from user space\n"); goto err0; @@ -1545,7 +1699,9 @@ qedr_iwarp_populate_user_qp(struct qedr_dev *dev, &qp->urq.pbl_info, FW_PAGE_SHIFT); } -static void qedr_cleanup_user(struct qedr_dev *dev, struct qedr_qp *qp) +static void qedr_cleanup_user(struct qedr_dev *dev, + struct qedr_ucontext *ctx, + struct qedr_qp *qp) { ib_umem_release(qp->usq.umem); qp->usq.umem = NULL; @@ -1560,6 +1716,18 @@ static void qedr_cleanup_user(struct qedr_dev *dev, struct qedr_qp *qp) kfree(qp->usq.pbl_tbl); kfree(qp->urq.pbl_tbl); } + + if (qp->usq.db_rec_data) { + qedr_db_recovery_del(dev, qp->usq.db_addr, + &qp->usq.db_rec_data->db_data); + rdma_user_mmap_entry_remove(qp->usq.db_mmap_entry); + } + + if (qp->urq.db_rec_data) { + qedr_db_recovery_del(dev, qp->urq.db_addr, + &qp->urq.db_rec_data->db_data); + rdma_user_mmap_entry_remove(qp->urq.db_mmap_entry); + } } static int qedr_create_user_qp(struct qedr_dev *dev, @@ -1571,13 +1739,15 @@ static int qedr_create_user_qp(struct qedr_dev *dev, struct qed_rdma_create_qp_in_params in_params; struct qed_rdma_create_qp_out_params out_params; struct qedr_pd *pd = get_qedr_pd(ibpd); + struct qedr_create_qp_uresp uresp; + struct qedr_ucontext *ctx = NULL; struct qedr_create_qp_ureq ureq; int alloc_and_init = rdma_protocol_roce(&dev->ibdev, 1); int rc = -EINVAL; qp->create_type = QEDR_QP_CREATE_USER; memset(&ureq, 0, sizeof(ureq)); - rc = ib_copy_from_udata(&ureq, udata, sizeof(ureq)); + rc = ib_copy_from_udata(&ureq, udata, min(sizeof(ureq), udata->inlen)); if (rc) { DP_ERR(dev, "Problem copying data from user space\n"); return rc; @@ -1585,14 +1755,16 @@ static int qedr_create_user_qp(struct qedr_dev *dev, /* SQ - read access only (0), dma sync not required (0) */ rc = qedr_init_user_queue(udata, dev, &qp->usq, ureq.sq_addr, - ureq.sq_len, 0, 0, alloc_and_init); + ureq.sq_len, true, 0, 0, + alloc_and_init); if (rc) return rc; if (!qp->srq) { /* RQ - read access only (0), dma sync not required (0) */ rc = qedr_init_user_queue(udata, dev, &qp->urq, ureq.rq_addr, - ureq.rq_len, 0, 0, alloc_and_init); + ureq.rq_len, true, + 0, 0, alloc_and_init); if (rc) return rc; } @@ -1622,29 +1794,57 @@ static int qedr_create_user_qp(struct qedr_dev *dev, qp->qp_id = out_params.qp_id; qp->icid = out_params.icid; - rc = qedr_copy_qp_uresp(dev, qp, udata); + rc = qedr_copy_qp_uresp(dev, qp, udata, &uresp); + if (rc) + goto err; + + /* db offset was calculated in copy_qp_uresp, now set in the user q */ + ctx = pd->uctx; + qp->usq.db_addr = ctx->dpi_addr + uresp.sq_db_offset; + qp->urq.db_addr = ctx->dpi_addr + uresp.rq_db_offset; + + rc = qedr_db_recovery_add(dev, qp->usq.db_addr, + &qp->usq.db_rec_data->db_data, + DB_REC_WIDTH_32B, + DB_REC_USER); + if (rc) + goto err; + + rc = qedr_db_recovery_add(dev, qp->urq.db_addr, + &qp->urq.db_rec_data->db_data, + DB_REC_WIDTH_32B, + DB_REC_USER); if (rc) goto err; qedr_qp_user_print(dev, qp); - return 0; + return rc; err: rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp); if (rc) DP_ERR(dev, "create qp: fatal fault. rc=%d", rc); err1: - qedr_cleanup_user(dev, qp); + qedr_cleanup_user(dev, ctx, qp); return rc; } -static void qedr_set_iwarp_db_info(struct qedr_dev *dev, struct qedr_qp *qp) +static int qedr_set_iwarp_db_info(struct qedr_dev *dev, struct qedr_qp *qp) { + int rc; + qp->sq.db = dev->db_addr + DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD); qp->sq.db_data.data.icid = qp->icid; + rc = qedr_db_recovery_add(dev, qp->sq.db, + &qp->sq.db_data, + DB_REC_WIDTH_32B, + DB_REC_KERNEL); + if (rc) + return rc; + qp->rq.db = dev->db_addr + DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD); qp->rq.db_data.data.icid = qp->icid; @@ -1652,6 +1852,12 @@ static void qedr_set_iwarp_db_info(struct qedr_dev *dev, struct qedr_qp *qp) DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS); qp->rq.iwarp_db2_data.data.icid = qp->icid; qp->rq.iwarp_db2_data.data.value = DQ_TCM_IWARP_POST_RQ_CF_CMD; + + rc = qedr_db_recovery_add(dev, qp->rq.db, + &qp->rq.db_data, + DB_REC_WIDTH_32B, + DB_REC_KERNEL); + return rc; } static int @@ -1699,8 +1905,7 @@ qedr_roce_create_kernel_qp(struct qedr_dev *dev, qp->qp_id = out_params.qp_id; qp->icid = out_params.icid; - qedr_set_roce_db_info(dev, qp); - return rc; + return qedr_set_roce_db_info(dev, qp); } static int @@ -1758,8 +1963,7 @@ qedr_iwarp_create_kernel_qp(struct qedr_dev *dev, qp->qp_id = out_params.qp_id; qp->icid = out_params.icid; - qedr_set_iwarp_db_info(dev, qp); - return rc; + return qedr_set_iwarp_db_info(dev, qp); err: dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp); @@ -1774,6 +1978,15 @@ static void qedr_cleanup_kernel(struct qedr_dev *dev, struct qedr_qp *qp) dev->ops->common->chain_free(dev->cdev, &qp->rq.pbl); kfree(qp->rqe_wr_id); + + /* GSI qp is not registered to db mechanism so no need to delete */ + if (qp->qp_type == IB_QPT_GSI) + return; + + qedr_db_recovery_del(dev, qp->sq.db, &qp->sq.db_data); + + if (!qp->srq) + qedr_db_recovery_del(dev, qp->rq.db, &qp->rq.db_data); } static int qedr_create_kernel_qp(struct qedr_dev *dev, @@ -2414,7 +2627,10 @@ err: static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp, struct ib_udata *udata) { - int rc = 0; + struct qedr_ucontext *ctx = + rdma_udata_to_drv_context(udata, struct qedr_ucontext, + ibucontext); + int rc; if (qp->qp_type != IB_QPT_GSI) { rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp); @@ -2423,7 +2639,7 @@ static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp, } if (qp->create_type == QEDR_QP_CREATE_USER) - qedr_cleanup_user(dev, qp); + qedr_cleanup_user(dev, ctx, qp); else qedr_cleanup_kernel(dev, qp); -- cgit From b4bc76609722f175a257184cc17ad73226d4b716 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Wed, 30 Oct 2019 11:44:17 +0200 Subject: RDMA/qedr: Add iWARP doorbell recovery support This patch adds the iWARP specific doorbells to the doorbell recovery mechanism. Link: https://lore.kernel.org/r/20191030094417.16866-9-michal.kalderon@marvell.com Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qedr/qedr.h | 12 +++++++----- drivers/infiniband/hw/qedr/verbs.c | 37 ++++++++++++++++++++++++++++++++++++- 2 files changed, 43 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h index 639d521ed319..5488dbd59d3c 100644 --- a/drivers/infiniband/hw/qedr/qedr.h +++ b/drivers/infiniband/hw/qedr/qedr.h @@ -238,6 +238,11 @@ struct qedr_ucontext { bool db_rec; }; +union db_prod32 { + struct rdma_pwm_val16_data data; + u32 raw; +}; + union db_prod64 { struct rdma_pwm_val32_data data; u64 raw; @@ -268,6 +273,8 @@ struct qedr_userq { void __iomem *db_addr; struct qedr_user_db_rec *db_rec_data; struct rdma_user_mmap_entry *db_mmap_entry; + void __iomem *db_rec_db2_addr; + union db_prod32 db_rec_db2_data; }; struct qedr_cq { @@ -303,11 +310,6 @@ struct qedr_pd { struct qedr_ucontext *uctx; }; -union db_prod32 { - struct rdma_pwm_val16_data data; - u32 raw; -}; - struct qedr_qp_hwq_info { /* WQE Elements */ struct qed_chain pbl; diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index bc4a35b5581c..9a674e65c4f7 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -1728,6 +1728,10 @@ static void qedr_cleanup_user(struct qedr_dev *dev, &qp->urq.db_rec_data->db_data); rdma_user_mmap_entry_remove(qp->urq.db_mmap_entry); } + + if (rdma_protocol_iwarp(&dev->ibdev, 1)) + qedr_db_recovery_del(dev, qp->urq.db_rec_db2_addr, + &qp->urq.db_rec_db2_data); } static int qedr_create_user_qp(struct qedr_dev *dev, @@ -1803,6 +1807,17 @@ static int qedr_create_user_qp(struct qedr_dev *dev, qp->usq.db_addr = ctx->dpi_addr + uresp.sq_db_offset; qp->urq.db_addr = ctx->dpi_addr + uresp.rq_db_offset; + if (rdma_protocol_iwarp(&dev->ibdev, 1)) { + qp->urq.db_rec_db2_addr = ctx->dpi_addr + uresp.rq_db2_offset; + + /* calculate the db_rec_db2 data since it is constant so no + * need to reflect from user + */ + qp->urq.db_rec_db2_data.data.icid = cpu_to_le16(qp->icid); + qp->urq.db_rec_db2_data.data.value = + cpu_to_le16(DQ_TCM_IWARP_POST_RQ_CF_CMD); + } + rc = qedr_db_recovery_add(dev, qp->usq.db_addr, &qp->usq.db_rec_data->db_data, DB_REC_WIDTH_32B, @@ -1817,6 +1832,14 @@ static int qedr_create_user_qp(struct qedr_dev *dev, if (rc) goto err; + if (rdma_protocol_iwarp(&dev->ibdev, 1)) { + rc = qedr_db_recovery_add(dev, qp->urq.db_rec_db2_addr, + &qp->urq.db_rec_db2_data, + DB_REC_WIDTH_32B, + DB_REC_USER); + if (rc) + goto err; + } qedr_qp_user_print(dev, qp); return rc; @@ -1857,6 +1880,13 @@ static int qedr_set_iwarp_db_info(struct qedr_dev *dev, struct qedr_qp *qp) &qp->rq.db_data, DB_REC_WIDTH_32B, DB_REC_KERNEL); + if (rc) + return rc; + + rc = qedr_db_recovery_add(dev, qp->rq.iwarp_db2, + &qp->rq.iwarp_db2_data, + DB_REC_WIDTH_32B, + DB_REC_KERNEL); return rc; } @@ -1985,8 +2015,13 @@ static void qedr_cleanup_kernel(struct qedr_dev *dev, struct qedr_qp *qp) qedr_db_recovery_del(dev, qp->sq.db, &qp->sq.db_data); - if (!qp->srq) + if (!qp->srq) { qedr_db_recovery_del(dev, qp->rq.db, &qp->rq.db_data); + + if (rdma_protocol_iwarp(&dev->ibdev, 1)) + qedr_db_recovery_del(dev, qp->rq.iwarp_db2, + &qp->rq.iwarp_db2_data); + } } static int qedr_create_kernel_qp(struct qedr_dev *dev, -- cgit From 25f3b49b9288a873b8c2ed94d7f66fad74e82296 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Mon, 28 Oct 2019 17:59:29 +0200 Subject: RDMA/hns: Remove unsupported modify_port callback There is no need to return always zero for function which is not supported. Fixes: 9a4435375cd1 ("IB/hns: Add driver files for hns RoCE driver") Link: https://lore.kernel.org/r/20191028155931.1114-3-kamalheib1@gmail.com Signed-off-by: Kamal Heib Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_main.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 803dc6f4b496..a389f9996689 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -301,12 +301,6 @@ static int hns_roce_modify_device(struct ib_device *ib_dev, int mask, return 0; } -static int hns_roce_modify_port(struct ib_device *ib_dev, u8 port_num, int mask, - struct ib_port_modify *props) -{ - return 0; -} - static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) { @@ -440,7 +434,6 @@ static const struct ib_device_ops hns_roce_dev_ops = { .get_port_immutable = hns_roce_port_immutable, .mmap = hns_roce_mmap, .modify_device = hns_roce_modify_device, - .modify_port = hns_roce_modify_port, .modify_qp = hns_roce_modify_qp, .query_ah = hns_roce_query_ah, .query_device = hns_roce_query_device, -- cgit From 6135b71159de3bc66964ad9157f1a62a77590006 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Mon, 28 Oct 2019 17:59:30 +0200 Subject: RDMA/ocrdma: Remove unsupported modify_port callback There is no need to return always zero for function which is not supported. Fixes: fe2caefcdf58 ("RDMA/ocrdma: Add driver for Emulex OneConnect IBoE RDMA adapter") Link: https://lore.kernel.org/r/20191028155931.1114-4-kamalheib1@gmail.com Signed-off-by: Kamal Heib Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/ocrdma/ocrdma_main.c | 1 - drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 6 ------ drivers/infiniband/hw/ocrdma/ocrdma_verbs.h | 2 -- 3 files changed, 9 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index c15cfc6cef81..d8c47d24d6d6 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -166,7 +166,6 @@ static const struct ib_device_ops ocrdma_dev_ops = { .get_port_immutable = ocrdma_port_immutable, .map_mr_sg = ocrdma_map_mr_sg, .mmap = ocrdma_mmap, - .modify_port = ocrdma_modify_port, .modify_qp = ocrdma_modify_qp, .poll_cq = ocrdma_poll_cq, .post_recv = ocrdma_post_recv, diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index e8267e590772..e72050de5734 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -190,12 +190,6 @@ int ocrdma_query_port(struct ib_device *ibdev, return 0; } -int ocrdma_modify_port(struct ib_device *ibdev, u8 port, int mask, - struct ib_port_modify *props) -{ - return 0; -} - static int ocrdma_add_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr, unsigned long len) { diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h index 32488da1b752..3a5010881be5 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h @@ -54,8 +54,6 @@ int ocrdma_arm_cq(struct ib_cq *, enum ib_cq_notify_flags flags); int ocrdma_query_device(struct ib_device *, struct ib_device_attr *props, struct ib_udata *uhw); int ocrdma_query_port(struct ib_device *, u8 port, struct ib_port_attr *props); -int ocrdma_modify_port(struct ib_device *, u8 port, int mask, - struct ib_port_modify *props); enum rdma_protocol_type ocrdma_query_protocol(struct ib_device *device, u8 port_num); -- cgit From ad0593ec8930d2e3fa4252cb7ad666bdfe4ed4f1 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Mon, 28 Oct 2019 17:59:31 +0200 Subject: RDMA/qedr: Remove unsupported modify_port callback There is no need to return always zero for function which is not supported. Fixes: ac1b36e55a51 ("qedr: Add support for user context verbs") Link: https://lore.kernel.org/r/20191028155931.1114-5-kamalheib1@gmail.com Signed-off-by: Kamal Heib Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qedr/main.c | 1 - drivers/infiniband/hw/qedr/verbs.c | 6 ------ drivers/infiniband/hw/qedr/verbs.h | 2 -- 3 files changed, 9 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index 03fd168cccc0..ee47f6ce0c12 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -213,7 +213,6 @@ static const struct ib_device_ops qedr_dev_ops = { .map_mr_sg = qedr_map_mr_sg, .mmap = qedr_mmap, .mmap_free = qedr_mmap_free, - .modify_port = qedr_modify_port, .modify_qp = qedr_modify_qp, .modify_srq = qedr_modify_srq, .poll_cq = qedr_poll_cq, diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 9a674e65c4f7..8096b8fcab4e 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -256,12 +256,6 @@ int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr) return 0; } -int qedr_modify_port(struct ib_device *ibdev, u8 port, int mask, - struct ib_port_modify *props) -{ - return 0; -} - int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) { struct ib_device *ibdev = uctx->device; diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h index 3606e97e95da..620472116c6d 100644 --- a/drivers/infiniband/hw/qedr/verbs.h +++ b/drivers/infiniband/hw/qedr/verbs.h @@ -35,8 +35,6 @@ int qedr_query_device(struct ib_device *ibdev, struct ib_device_attr *attr, struct ib_udata *udata); int qedr_query_port(struct ib_device *, u8 port, struct ib_port_attr *props); -int qedr_modify_port(struct ib_device *, u8 port, int mask, - struct ib_port_modify *props); int qedr_iw_query_gid(struct ib_device *ibdev, u8 port, int index, union ib_gid *gid); -- cgit From 688eec9d3dca2a3db3824e66463f086c70fa1b9f Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 29 Oct 2019 08:27:43 +0200 Subject: RDMA/qib: Delete extra line Trivial cleanup to fix the following warning: drivers/infiniband/hw/qib/qib_iba6120.c:1420: warning: bad line: Fixes: f931551bafe1 ("IB/qib: Add new qib driver for QLogic PCIe InfiniBand adapters") Link: https://lore.kernel.org/r/20191029062745.7932-15-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qib/qib_iba6120.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c index 531d8a1db2c3..ca5ea734e3d0 100644 --- a/drivers/infiniband/hw/qib/qib_iba6120.c +++ b/drivers/infiniband/hw/qib/qib_iba6120.c @@ -1417,7 +1417,6 @@ static void qib_6120_quiet_serdes(struct qib_pportdata *ppd) * * The exact combo of LEDs if on is true is determined by looking * at the ibcstatus. - * These LEDs indicate the physical and logical state of IB link. * For this chip (at least with recommended board pinouts), LED1 * is Yellow (logical state) and LED2 is Green (physical state), -- cgit From 874e476ba949deb475378e10df7527921e4ff6c1 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 29 Oct 2019 08:27:44 +0200 Subject: RDMA/qib: Delete empty check_cc_key function Function always returns zero, just delete it. Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qib/qib_mad.c | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index f92faf5ec369..818ec37a3215 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -2296,18 +2296,11 @@ bail: return reply_failure((struct ib_smp *) ccp); } -static int check_cc_key(struct qib_ibport *ibp, - struct ib_cc_mad *ccp, int mad_flags) -{ - return 0; -} - static int process_cc(struct ib_device *ibdev, int mad_flags, u8 port, const struct ib_mad *in_mad, struct ib_mad *out_mad) { struct ib_cc_mad *ccp = (struct ib_cc_mad *)out_mad; - struct qib_ibport *ibp = to_iport(ibdev, port); int ret; *out_mad = *in_mad; @@ -2318,10 +2311,6 @@ static int process_cc(struct ib_device *ibdev, int mad_flags, goto bail; } - ret = check_cc_key(ibp, ccp, mad_flags); - if (ret) - goto bail; - switch (ccp->method) { case IB_MGMT_METHOD_GET: switch (ccp->attr_id) { -- cgit From be4a8d46732a45e78aa27c34b58431a917cd644d Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 29 Oct 2019 08:27:31 +0200 Subject: RDMA/mad: Allocate zeroed MAD buffer Ensure that MAD output buffer is zero-based allocated in all the callers of process_mad and remove the various memset()'s from the drivers. Link: https://lore.kernel.org/r/20191029062745.7932-3-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx4/mad.c | 1 - drivers/infiniband/hw/mlx5/mad.c | 2 -- drivers/infiniband/hw/ocrdma/ocrdma_stats.c | 1 - drivers/infiniband/hw/qib/qib_mad.c | 8 -------- 4 files changed, 12 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 57079110af9b..985cced5d509 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -966,7 +966,6 @@ static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, } mutex_unlock(&dev->counters_table[port_num - 1].mutex); if (stats_avail) { - memset(out_mad->data, 0, sizeof out_mad->data); switch (counter_stats.counter_mode & 0xf) { case 0: edit_counter(&counter_stats, diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index 348c1df69cdc..0a5eb6e1798c 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -284,8 +284,6 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, *out_mad_size != sizeof(*out_mad))) return IB_MAD_RESULT_FAILURE; - memset(out_mad->data, 0, sizeof(out_mad->data)); - if (MLX5_CAP_GEN(dev->mdev, vport_counters) && in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT && in_mad->mad_hdr.method == IB_MGMT_METHOD_GET) { diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c index a902942adb5d..dda9b61f53b6 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c @@ -675,7 +675,6 @@ int ocrdma_pma_counters(struct ocrdma_dev *dev, { struct ib_pma_portcounters *pma_cnt; - memset(out_mad->data, 0, sizeof out_mad->data); pma_cnt = (void *)(out_mad->data + 40); ocrdma_update_stats(dev); diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index 818ec37a3215..6fbba0d54269 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -2098,8 +2098,6 @@ static int cc_get_classportinfo(struct ib_cc_mad *ccp, struct ib_cc_classportinfo_attr *p = (struct ib_cc_classportinfo_attr *)ccp->mgmt_data; - memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data)); - p->base_version = 1; p->class_version = 1; p->cap_mask = 0; @@ -2120,8 +2118,6 @@ static int cc_get_congestion_info(struct ib_cc_mad *ccp, struct qib_ibport *ibp = to_iport(ibdev, port); struct qib_pportdata *ppd = ppd_from_ibp(ibp); - memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data)); - p->congestion_info = 0; p->control_table_cap = ppd->cc_max_table_entries; @@ -2138,8 +2134,6 @@ static int cc_get_congestion_setting(struct ib_cc_mad *ccp, struct qib_pportdata *ppd = ppd_from_ibp(ibp); struct ib_cc_congestion_entry_shadow *entries; - memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data)); - spin_lock(&ppd->cc_shadow_lock); entries = ppd->congestion_entries_shadow->entries; @@ -2176,8 +2170,6 @@ static int cc_get_congestion_control_table(struct ib_cc_mad *ccp, if (cct_block_index > IB_CC_TABLE_CAP_DEFAULT - 1) goto bail; - memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data)); - spin_lock(&ppd->cc_shadow_lock); max_cct_block = -- cgit From 6a42265c9116a2ff8b94b52525c95f177cb15db2 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 29 Oct 2019 08:27:34 +0200 Subject: RDMA/ocrdma: Make ocrdma_pma_counters() return void This function always returns 0, so just use void and remove the bogus checking at the only call site. Link: https://lore.kernel.org/r/20191029062745.7932-6-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/ocrdma/ocrdma_ah.c | 6 ++---- drivers/infiniband/hw/ocrdma/ocrdma_stats.c | 4 +--- drivers/infiniband/hw/ocrdma/ocrdma_stats.h | 3 +-- 3 files changed, 4 insertions(+), 9 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index 8d3e36d548aa..f8ebdf7086a1 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -268,10 +268,8 @@ int ocrdma_process_mad(struct ib_device *ibdev, switch (in_mad->mad_hdr.mgmt_class) { case IB_MGMT_CLASS_PERF_MGMT: dev = get_ocrdma_dev(ibdev); - if (!ocrdma_pma_counters(dev, out_mad)) - status = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; - else - status = IB_MAD_RESULT_SUCCESS; + ocrdma_pma_counters(dev, out_mad); + status = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; break; default: status = IB_MAD_RESULT_SUCCESS; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c index dda9b61f53b6..95e8c1560cc2 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c @@ -670,8 +670,7 @@ err: return -EFAULT; } -int ocrdma_pma_counters(struct ocrdma_dev *dev, - struct ib_mad *out_mad) +void ocrdma_pma_counters(struct ocrdma_dev *dev, struct ib_mad *out_mad) { struct ib_pma_portcounters *pma_cnt; @@ -682,7 +681,6 @@ int ocrdma_pma_counters(struct ocrdma_dev *dev, pma_cnt->port_rcv_data = cpu_to_be32(ocrdma_sysfs_rcv_data(dev)); pma_cnt->port_xmit_packets = cpu_to_be32(ocrdma_sysfs_xmit_pkts(dev)); pma_cnt->port_rcv_packets = cpu_to_be32(ocrdma_sysfs_rcv_pkts(dev)); - return 0; } static ssize_t ocrdma_dbgfs_ops_read(struct file *filp, char __user *buffer, diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.h b/drivers/infiniband/hw/ocrdma/ocrdma_stats.h index bba1fec4f11f..98feca26ac55 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.h @@ -69,7 +69,6 @@ bool ocrdma_alloc_stats_resources(struct ocrdma_dev *dev); void ocrdma_release_stats_resources(struct ocrdma_dev *dev); void ocrdma_rem_port_stats(struct ocrdma_dev *dev); void ocrdma_add_port_stats(struct ocrdma_dev *dev); -int ocrdma_pma_counters(struct ocrdma_dev *dev, - struct ib_mad *out_mad); +void ocrdma_pma_counters(struct ocrdma_dev *dev, struct ib_mad *out_mad); #endif /* __OCRDMA_STATS_H__ */ -- cgit From dd0b0159f7b31439679879fcf2574d7ad744b6f1 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 29 Oct 2019 08:27:37 +0200 Subject: RDMA/mad: Do not check MAD sizes in roce and ib drivers All callers for process_mad allocate MAD structures with proper sizes, there is no need to recheck it. Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx4/mad.c | 4 ---- drivers/infiniband/hw/mlx5/mad.c | 4 ---- drivers/infiniband/hw/mthca/mthca_mad.c | 4 ---- drivers/infiniband/hw/ocrdma/ocrdma_ah.c | 4 ---- drivers/infiniband/hw/qib/qib_mad.c | 4 ---- 5 files changed, 20 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 985cced5d509..c6ea4c50da1e 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -992,10 +992,6 @@ int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, struct ib_mad *out_mad = (struct ib_mad *)out; enum rdma_link_layer link = rdma_port_get_link_layer(ibdev, port_num); - if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || - *out_mad_size != sizeof(*out_mad))) - return IB_MAD_RESULT_FAILURE; - /* iboe_process_mad() which uses the HCA flow-counters to implement IB PMA * queries, should be called only by VFs and for that specific purpose */ diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index 0a5eb6e1798c..f49d9c70246e 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -280,10 +280,6 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, struct ib_mad *out_mad = (struct ib_mad *)out; int ret; - if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || - *out_mad_size != sizeof(*out_mad))) - return IB_MAD_RESULT_FAILURE; - if (MLX5_CAP_GEN(dev->mdev, vport_counters) && in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT && in_mad->mad_hdr.method == IB_MGMT_METHOD_GET) { diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c index 7ad517da4917..0893604d2a62 100644 --- a/drivers/infiniband/hw/mthca/mthca_mad.c +++ b/drivers/infiniband/hw/mthca/mthca_mad.c @@ -212,10 +212,6 @@ int mthca_process_mad(struct ib_device *ibdev, const struct ib_mad *in_mad = (const struct ib_mad *)in; struct ib_mad *out_mad = (struct ib_mad *)out; - if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || - *out_mad_size != sizeof(*out_mad))) - return IB_MAD_RESULT_FAILURE; - /* Forward locally generated traps to the SM */ if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0) { diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index f8ebdf7086a1..0dc74ef42f8c 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -261,10 +261,6 @@ int ocrdma_process_mad(struct ib_device *ibdev, const struct ib_mad *in_mad = (const struct ib_mad *)in; struct ib_mad *out_mad = (struct ib_mad *)out; - if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || - *out_mad_size != sizeof(*out_mad))) - return IB_MAD_RESULT_FAILURE; - switch (in_mad->mad_hdr.mgmt_class) { case IB_MGMT_CLASS_PERF_MGMT: dev = get_ocrdma_dev(ibdev); diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index 6fbba0d54269..b259aaf85d4a 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -2396,10 +2396,6 @@ int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, const struct ib_mad *in_mad = (const struct ib_mad *)in; struct ib_mad *out_mad = (struct ib_mad *)out; - if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || - *out_mad_size != sizeof(*out_mad))) - return IB_MAD_RESULT_FAILURE; - switch (in_mad->mad_hdr.mgmt_class) { case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: case IB_MGMT_CLASS_SUBN_LID_ROUTED: -- cgit From 84b56d57cf3cc9640b69cbb3a5d023d010ccb62a Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 29 Oct 2019 08:27:40 +0200 Subject: RDMA/ocrdma: Simplify process_mad function Change the switch with one case into a simple if statement so the code is less confusing. Link: https://lore.kernel.org/r/20191029062745.7932-12-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/ocrdma/ocrdma_ah.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index 0dc74ef42f8c..4098508b9240 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -256,20 +256,16 @@ int ocrdma_process_mad(struct ib_device *ibdev, struct ib_mad_hdr *out, size_t *out_mad_size, u16 *out_mad_pkey_index) { - int status; + int status = IB_MAD_RESULT_SUCCESS; struct ocrdma_dev *dev; const struct ib_mad *in_mad = (const struct ib_mad *)in; struct ib_mad *out_mad = (struct ib_mad *)out; - switch (in_mad->mad_hdr.mgmt_class) { - case IB_MGMT_CLASS_PERF_MGMT: + if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT) { dev = get_ocrdma_dev(ibdev); ocrdma_pma_counters(dev, out_mad); - status = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; - break; - default: - status = IB_MAD_RESULT_SUCCESS; - break; + status |= IB_MAD_RESULT_REPLY; } + return status; } -- cgit From ffa2fd1323e32093abc23e7d71ed92b85e566ffe Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 29 Oct 2019 08:27:42 +0200 Subject: RDMA/mlx5: Rewrite MAD processing logic to be readable Multiple "if"s and "||" make extension of process_mad() function as a tedious task, rewrite that function to be more readable. Link: https://lore.kernel.org/r/20191029062745.7932-14-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mad.c | 116 +++++++++++++++++++-------------------- 1 file changed, 55 insertions(+), 61 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index f49d9c70246e..1c87412a162f 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -74,58 +74,6 @@ static int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, port); } -static int process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad *in_mad, struct ib_mad *out_mad) -{ - u16 slid; - int err; - - slid = in_wc ? ib_lid_cpu16(in_wc->slid) : be16_to_cpu(IB_LID_PERMISSIVE); - - if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0) - return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; - - if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || - in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { - if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET && - in_mad->mad_hdr.method != IB_MGMT_METHOD_SET && - in_mad->mad_hdr.method != IB_MGMT_METHOD_TRAP_REPRESS) - return IB_MAD_RESULT_SUCCESS; - - /* Don't process SMInfo queries -- the SMA can't handle them. - */ - if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO) - return IB_MAD_RESULT_SUCCESS; - } else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT || - in_mad->mad_hdr.mgmt_class == MLX5_IB_VENDOR_CLASS1 || - in_mad->mad_hdr.mgmt_class == MLX5_IB_VENDOR_CLASS2 || - in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_CONG_MGMT) { - if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET && - in_mad->mad_hdr.method != IB_MGMT_METHOD_SET) - return IB_MAD_RESULT_SUCCESS; - } else { - return IB_MAD_RESULT_SUCCESS; - } - - err = mlx5_MAD_IFC(to_mdev(ibdev), - mad_flags & IB_MAD_IGNORE_MKEY, - mad_flags & IB_MAD_IGNORE_BKEY, - port_num, in_wc, in_grh, in_mad, out_mad); - if (err) - return IB_MAD_RESULT_FAILURE; - - /* set return bit in status of directed route responses */ - if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) - out_mad->mad_hdr.status |= cpu_to_be16(1 << 15); - - if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS) - /* no response for trap repress */ - return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; - - return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; -} - static void pma_cnt_ext_assign(struct ib_pma_portcounters_ext *pma_cnt_ext, void *out) { @@ -278,17 +226,63 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, struct mlx5_ib_dev *dev = to_mdev(ibdev); const struct ib_mad *in_mad = (const struct ib_mad *)in; struct ib_mad *out_mad = (struct ib_mad *)out; - int ret; + u8 mgmt_class = in_mad->mad_hdr.mgmt_class; + u8 method = in_mad->mad_hdr.method; + u16 slid; + int err; - if (MLX5_CAP_GEN(dev->mdev, vport_counters) && - in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT && - in_mad->mad_hdr.method == IB_MGMT_METHOD_GET) { - ret = process_pma_cmd(dev, port_num, in_mad, out_mad); - } else { - ret = process_mad(ibdev, mad_flags, port_num, in_wc, in_grh, - in_mad, out_mad); + slid = in_wc ? ib_lid_cpu16(in_wc->slid) : + be16_to_cpu(IB_LID_PERMISSIVE); + + if (method == IB_MGMT_METHOD_TRAP && !slid) + return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; + + switch (mgmt_class) { + case IB_MGMT_CLASS_SUBN_LID_ROUTED: + case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: { + if (method != IB_MGMT_METHOD_GET && + method != IB_MGMT_METHOD_SET && + method != IB_MGMT_METHOD_TRAP_REPRESS) + return IB_MAD_RESULT_SUCCESS; + + /* Don't process SMInfo queries -- the SMA can't handle them. + */ + if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO) + return IB_MAD_RESULT_SUCCESS; + } break; + case IB_MGMT_CLASS_PERF_MGMT: + if (MLX5_CAP_GEN(dev->mdev, vport_counters) && + method == IB_MGMT_METHOD_GET) + return process_pma_cmd(dev, port_num, in_mad, out_mad); + /* fallthrough */ + case MLX5_IB_VENDOR_CLASS1: + /* fallthrough */ + case MLX5_IB_VENDOR_CLASS2: + case IB_MGMT_CLASS_CONG_MGMT: { + if (method != IB_MGMT_METHOD_GET && + method != IB_MGMT_METHOD_SET) + return IB_MAD_RESULT_SUCCESS; + } break; + default: + return IB_MAD_RESULT_SUCCESS; } - return ret; + + err = mlx5_MAD_IFC(to_mdev(ibdev), + mad_flags & IB_MAD_IGNORE_MKEY, + mad_flags & IB_MAD_IGNORE_BKEY, + port_num, in_wc, in_grh, in_mad, out_mad); + if (err) + return IB_MAD_RESULT_FAILURE; + + /* set return bit in status of directed route responses */ + if (mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) + out_mad->mad_hdr.status |= cpu_to_be16(1 << 15); + + if (method == IB_MGMT_METHOD_TRAP_REPRESS) + /* no response for trap repress */ + return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; + + return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; } int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port) -- cgit From ec6adad0a1e3ef3064c12146b00c2bd1e6835b0c Mon Sep 17 00:00:00 2001 From: Yixian Liu Date: Tue, 5 Nov 2019 19:07:54 +0800 Subject: RDMA/hns: Delete unnecessary variable max_post There is no need to define max_post in hns_roce_wq, as it does same thing as wqe_cnt. Link: https://lore.kernel.org/r/1572952082-6681-2-git-send-email-liweihang@hisilicon.com Signed-off-by: Yixian Liu Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_device.h | 1 - drivers/infiniband/hw/hns/hns_roce_qp.c | 8 ++++---- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 940761310430..f8f6f052bab5 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -424,7 +424,6 @@ struct hns_roce_wq { u64 *wrid; /* Work request ID */ spinlock_t lock; int wqe_cnt; /* WQE num */ - u32 max_post; int max_gs; int offset; int wqe_shift; /* WQE size */ diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 6aa27d6ea3a6..308d15b98ee0 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -318,7 +318,7 @@ static int hns_roce_set_rq_size(struct hns_roce_dev *hr_dev, * hr_qp->rq.max_gs); } - cap->max_recv_wr = hr_qp->rq.max_post = hr_qp->rq.wqe_cnt; + cap->max_recv_wr = hr_qp->rq.wqe_cnt; cap->max_recv_sge = hr_qp->rq.max_gs; return 0; @@ -610,7 +610,7 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev, hr_qp->buff_size = size; /* Get wr and sge number which send */ - cap->max_send_wr = hr_qp->sq.max_post = hr_qp->sq.wqe_cnt; + cap->max_send_wr = hr_qp->sq.wqe_cnt; cap->max_send_sge = hr_qp->sq.max_gs; /* We don't support inline sends for kernel QPs (yet) */ @@ -1291,7 +1291,7 @@ bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq, u32 cur; cur = hr_wq->head - hr_wq->tail; - if (likely(cur + nreq < hr_wq->max_post)) + if (likely(cur + nreq < hr_wq->wqe_cnt)) return false; hr_cq = to_hr_cq(ib_cq); @@ -1299,7 +1299,7 @@ bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq, cur = hr_wq->head - hr_wq->tail; spin_unlock(&hr_cq->lock); - return cur + nreq >= hr_wq->max_post; + return cur + nreq >= hr_wq->wqe_cnt; } int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev) -- cgit From 16a11e0bffcab729bd2e8f315e1252e7ef3ddf33 Mon Sep 17 00:00:00 2001 From: Lang Cheng Date: Tue, 5 Nov 2019 19:07:55 +0800 Subject: RDMA/hns: Remove unnecessary structure hns_roce_sqp Special QP have no differences with normal qp in data structure, so definition of struct hns_roce_sqp should be removed and replaced by struct hns_roce_qp. Link: https://lore.kernel.org/r/1572952082-6681-3-git-send-email-liweihang@hisilicon.com Signed-off-by: Lang Cheng Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_device.h | 9 --------- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 5 +---- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 5 +---- drivers/infiniband/hw/hns/hns_roce_qp.c | 8 +++----- 4 files changed, 5 insertions(+), 22 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index f8f6f052bab5..409c626708b7 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -692,10 +692,6 @@ struct hns_roce_qp { struct hns_roce_rinl_buf rq_inl_buf; }; -struct hns_roce_sqp { - struct hns_roce_qp hr_qp; -}; - struct hns_roce_ib_iboe { spinlock_t lock; struct net_device *netdevs[HNS_ROCE_MAX_PORTS]; @@ -1089,11 +1085,6 @@ static inline struct hns_roce_srq *to_hr_srq(struct ib_srq *ibsrq) return container_of(ibsrq, struct hns_roce_srq, ibsrq); } -static inline struct hns_roce_sqp *hr_to_hr_sqp(struct hns_roce_qp *hr_qp) -{ - return container_of(hr_qp, struct hns_roce_sqp, hr_qp); -} - static inline void hns_roce_write64_k(__le32 val[2], void __iomem *dest) { __raw_writeq(*(u64 *) val, dest); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 5f74bf55f471..bfe9cee8c969 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -3644,10 +3644,7 @@ int hns_roce_v1_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf); } - if (hr_qp->ibqp.qp_type == IB_QPT_RC) - kfree(hr_qp); - else - kfree(hr_to_hr_sqp(hr_qp)); + kfree(hr_qp); return 0; } diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 7218f6d4101a..0883fce0e400 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -4727,10 +4727,7 @@ static int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) ibdev_err(&hr_dev->ib_dev, "Destroy qp 0x%06lx failed(%d)\n", hr_qp->qpn, ret); - if (hr_qp->ibqp.qp_type == IB_QPT_GSI) - kfree(hr_to_hr_sqp(hr_qp)); - else - kfree(hr_qp); + kfree(hr_qp); return 0; } diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 308d15b98ee0..6fac4666ae8a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -1019,7 +1019,6 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd, { struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); struct ib_device *ibdev = &hr_dev->ib_dev; - struct hns_roce_sqp *hr_sqp; struct hns_roce_qp *hr_qp; int ret; @@ -1049,11 +1048,10 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd, return ERR_PTR(-EINVAL); } - hr_sqp = kzalloc(sizeof(*hr_sqp), GFP_KERNEL); - if (!hr_sqp) + hr_qp = kzalloc(sizeof(*hr_qp), GFP_KERNEL); + if (!hr_qp) return ERR_PTR(-ENOMEM); - hr_qp = &hr_sqp->hr_qp; hr_qp->port = init_attr->port_num - 1; hr_qp->phy_port = hr_dev->iboe.phy_port[hr_qp->port]; @@ -1068,7 +1066,7 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd, hr_qp->ibqp.qp_num, hr_qp); if (ret) { ibdev_err(ibdev, "Create GSI QP failed!\n"); - kfree(hr_sqp); + kfree(hr_qp); return ERR_PTR(ret); } -- cgit From 03ccba5c2cf7ec5149c59039a5644233a2f60ca9 Mon Sep 17 00:00:00 2001 From: Yixian Liu Date: Tue, 5 Nov 2019 19:07:56 +0800 Subject: RDMA/hns: Delete unnecessary uar from hns_roce_cq The uar information is already recorded in priv_uar of hns_roce_dev, there is no need to record it in hns_roce_cq again. Link: https://lore.kernel.org/r/1572952082-6681-4-git-send-email-liweihang@hisilicon.com Signed-off-by: Yixian Liu Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_cq.c | 4 +--- drivers/infiniband/hw/hns/hns_roce_device.h | 1 - 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 22541d19cd09..d1d773958c27 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -347,7 +347,6 @@ static int create_kernel_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, int cq_entries) { struct device *dev = hr_dev->dev; - struct hns_roce_uar *uar; int ret; if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) { @@ -367,9 +366,8 @@ static int create_kernel_cq(struct hns_roce_dev *hr_dev, goto err_db; } - uar = &hr_dev->priv_uar; hr_cq->cq_db_l = hr_dev->reg_base + hr_dev->odb_offset + - DB_REG_OFFSET * uar->index; + DB_REG_OFFSET * hr_dev->priv_uar.index; return 0; diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 409c626708b7..c461cffa9721 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -494,7 +494,6 @@ struct hns_roce_cq { void (*comp)(struct hns_roce_cq *cq); void (*event)(struct hns_roce_cq *cq, enum hns_roce_event event_type); - struct hns_roce_uar *uar; u32 cq_depth; u32 cons_index; u32 *set_ci_db; -- cgit From d938d7856f4253f81906af0b9ca67c6a6e51cbaf Mon Sep 17 00:00:00 2001 From: Yixian Liu Date: Tue, 5 Nov 2019 19:07:57 +0800 Subject: RDMA/hns: Modify fields of struct hns_roce_srq Use wqe_cnt instead of max which means the queue size of srq, and remove wqe_ctr which is not used. Link: https://lore.kernel.org/r/1572952082-6681-5-git-send-email-liweihang@hisilicon.com Signed-off-by: Yixian Liu Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_device.h | 5 ++--- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 12 ++++++------ drivers/infiniband/hw/hns/hns_roce_srq.c | 26 +++++++++++++------------- 3 files changed, 21 insertions(+), 22 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index c461cffa9721..ce777ce06ea2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -517,9 +517,8 @@ struct hns_roce_idx_que { struct hns_roce_srq { struct ib_srq ibsrq; - void (*event)(struct hns_roce_srq *srq, enum hns_roce_event event); unsigned long srqn; - int max; + u32 wqe_cnt; int max_gs; int wqe_shift; void __iomem *db_reg_l; @@ -535,8 +534,8 @@ struct hns_roce_srq { spinlock_t lock; int head; int tail; - u16 wqe_ctr; struct mutex mutex; + void (*event)(struct hns_roce_srq *srq, enum hns_roce_event event); }; struct hns_roce_uar_table { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 0883fce0e400..4700bac1c6e2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -6040,7 +6040,7 @@ static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev, hr_dev->caps.srqwqe_hop_num)); roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQ_SHIFT_M, SRQC_BYTE_4_SRQ_SHIFT_S, - ilog2(srq->max)); + ilog2(srq->wqe_cnt)); roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQN_M, SRQC_BYTE_4_SRQN_S, srq->srqn); @@ -6126,7 +6126,7 @@ static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq, int ret; if (srq_attr_mask & IB_SRQ_LIMIT) { - if (srq_attr->srq_limit >= srq->max) + if (srq_attr->srq_limit >= srq->wqe_cnt) return -EINVAL; mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); @@ -6186,7 +6186,7 @@ static int hns_roce_v2_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) SRQC_BYTE_8_SRQ_LIMIT_WL_S); attr->srq_limit = limit_wl; - attr->max_wr = srq->max - 1; + attr->max_wr = srq->wqe_cnt - 1; attr->max_sge = srq->max_gs; memcpy(srq_context, mailbox->buf, sizeof(*srq_context)); @@ -6239,7 +6239,7 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, spin_lock_irqsave(&srq->lock, flags); - ind = srq->head & (srq->max - 1); + ind = srq->head & (srq->wqe_cnt - 1); for (nreq = 0; wr; ++nreq, wr = wr->next) { if (unlikely(wr->num_sge > srq->max_gs)) { @@ -6254,7 +6254,7 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, break; } - wqe_idx = find_empty_entry(&srq->idx_que, srq->max); + wqe_idx = find_empty_entry(&srq->idx_que, srq->wqe_cnt); if (wqe_idx < 0) { ret = -ENOMEM; *bad_wr = wr; @@ -6278,7 +6278,7 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, } srq->wrid[wqe_idx] = wr->wr_id; - ind = (ind + 1) & (srq->max - 1); + ind = (ind + 1) & (srq->wqe_cnt - 1); } if (likely(nreq)) { diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index d96041d806f6..6f9d1d250e4a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -255,7 +255,7 @@ static int hns_roce_create_idx_que(struct ib_pd *pd, struct hns_roce_srq *srq, struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); struct hns_roce_idx_que *idx_que = &srq->idx_que; - idx_que->bitmap = bitmap_zalloc(srq->max, GFP_KERNEL); + idx_que->bitmap = bitmap_zalloc(srq->wqe_cnt, GFP_KERNEL); if (!idx_que->bitmap) return -ENOMEM; @@ -281,7 +281,7 @@ static int create_kernel_srq(struct hns_roce_srq *srq, int srq_buf_size) return -ENOMEM; srq->head = 0; - srq->tail = srq->max - 1; + srq->tail = srq->wqe_cnt - 1; ret = hns_roce_mtt_init(hr_dev, srq->buf.npages, srq->buf.page_shift, &srq->mtt); @@ -312,7 +312,7 @@ static int create_kernel_srq(struct hns_roce_srq *srq, int srq_buf_size) if (ret) goto err_kernel_idx_buf; - srq->wrid = kvmalloc_array(srq->max, sizeof(u64), GFP_KERNEL); + srq->wrid = kvmalloc_array(srq->wqe_cnt, sizeof(u64), GFP_KERNEL); if (!srq->wrid) { ret = -ENOMEM; goto err_kernel_idx_buf; @@ -358,7 +358,7 @@ static void destroy_kernel_srq(struct hns_roce_dev *hr_dev, } int hns_roce_create_srq(struct ib_srq *ib_srq, - struct ib_srq_init_attr *srq_init_attr, + struct ib_srq_init_attr *init_attr, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ib_srq->device); @@ -370,24 +370,24 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, u32 cqn; /* Check the actual SRQ wqe and SRQ sge num */ - if (srq_init_attr->attr.max_wr >= hr_dev->caps.max_srq_wrs || - srq_init_attr->attr.max_sge > hr_dev->caps.max_srq_sges) + if (init_attr->attr.max_wr >= hr_dev->caps.max_srq_wrs || + init_attr->attr.max_sge > hr_dev->caps.max_srq_sges) return -EINVAL; mutex_init(&srq->mutex); spin_lock_init(&srq->lock); - srq->max = roundup_pow_of_two(srq_init_attr->attr.max_wr + 1); - srq->max_gs = srq_init_attr->attr.max_sge; + srq->wqe_cnt = roundup_pow_of_two(init_attr->attr.max_wr + 1); + srq->max_gs = init_attr->attr.max_sge; srq_desc_size = max(16, 16 * srq->max_gs); srq->wqe_shift = ilog2(srq_desc_size); - srq_buf_size = srq->max * srq_desc_size; + srq_buf_size = srq->wqe_cnt * srq_desc_size; srq->idx_que.entry_sz = HNS_ROCE_IDX_QUE_ENTRY_SZ; - srq->idx_que.buf_size = srq->max * srq->idx_que.entry_sz; + srq->idx_que.buf_size = srq->wqe_cnt * srq->idx_que.entry_sz; srq->mtt.mtt_type = MTT_TYPE_SRQWQE; srq->idx_que.mtt.mtt_type = MTT_TYPE_IDX; @@ -405,8 +405,8 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, } } - cqn = ib_srq_has_cq(srq_init_attr->srq_type) ? - to_hr_cq(srq_init_attr->ext.cq)->cqn : 0; + cqn = ib_srq_has_cq(init_attr->srq_type) ? + to_hr_cq(init_attr->ext.cq)->cqn : 0; srq->db_reg_l = hr_dev->reg_base + SRQ_DB_REG; @@ -453,7 +453,7 @@ void hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt); } else { kvfree(srq->wrid); - hns_roce_buf_free(hr_dev, srq->max << srq->wqe_shift, + hns_roce_buf_free(hr_dev, srq->wqe_cnt << srq->wqe_shift, &srq->buf); } ib_umem_release(srq->idx_que.umem); -- cgit From 6eef524201deaaaf980bd21b80aac1b052cd56a7 Mon Sep 17 00:00:00 2001 From: Yixing Liu Date: Tue, 5 Nov 2019 19:07:58 +0800 Subject: RDMA/hns: Replace not intuitive function/macro names Replace "sw2hw" and "hw2sw" which is hard to understand with "create" and "destroy". Link: https://lore.kernel.org/r/1572952082-6681-6-git-send-email-liweihang@hisilicon.com Signed-off-by: Yixing Liu Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_cmd.h | 14 ++++---- drivers/infiniband/hw/hns/hns_roce_cq.c | 23 ++++++------- drivers/infiniband/hw/hns/hns_roce_device.h | 6 ++-- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 7 ++-- drivers/infiniband/hw/hns/hns_roce_mr.c | 50 +++++++++++++++-------------- drivers/infiniband/hw/hns/hns_roce_srq.c | 22 ++++++------- 6 files changed, 63 insertions(+), 59 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.h b/drivers/infiniband/hw/hns/hns_roce_cmd.h index 2b6ac646ca9a..cd3ed2b462bd 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cmd.h +++ b/drivers/infiniband/hw/hns/hns_roce_cmd.h @@ -115,12 +115,12 @@ enum { enum { /* TPT commands */ - HNS_ROCE_CMD_SW2HW_MPT = 0xd, - HNS_ROCE_CMD_HW2SW_MPT = 0xf, + HNS_ROCE_CMD_CREATE_MPT = 0xd, + HNS_ROCE_CMD_DESTROY_MPT = 0xf, /* CQ commands */ - HNS_ROCE_CMD_SW2HW_CQ = 0x16, - HNS_ROCE_CMD_HW2SW_CQ = 0x17, + HNS_ROCE_CMD_CREATE_CQ = 0x16, + HNS_ROCE_CMD_DESTROY_CQ = 0x17, /* QP/EE commands */ HNS_ROCE_CMD_RST2INIT_QP = 0x19, @@ -129,14 +129,14 @@ enum { HNS_ROCE_CMD_RTS2RTS_QP = 0x1c, HNS_ROCE_CMD_2ERR_QP = 0x1e, HNS_ROCE_CMD_RTS2SQD_QP = 0x1f, - HNS_ROCE_CMD_SQD2SQD_QP = 0x38, HNS_ROCE_CMD_SQD2RTS_QP = 0x20, HNS_ROCE_CMD_2RST_QP = 0x21, HNS_ROCE_CMD_QUERY_QP = 0x22, - HNS_ROCE_CMD_SW2HW_SRQ = 0x70, + HNS_ROCE_CMD_SQD2SQD_QP = 0x38, + HNS_ROCE_CMD_CREATE_SRQ = 0x70, HNS_ROCE_CMD_MODIFY_SRQC = 0x72, HNS_ROCE_CMD_QUERY_SRQC = 0x73, - HNS_ROCE_CMD_HW2SW_SRQ = 0x74, + HNS_ROCE_CMD_DESTROY_SRQ = 0x74, }; int hns_roce_cmd_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index d1d773958c27..713df1f78388 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -73,12 +73,13 @@ static void hns_roce_ib_cq_event(struct hns_roce_cq *hr_cq, } } -static int hns_roce_sw2hw_cq(struct hns_roce_dev *dev, - struct hns_roce_cmd_mailbox *mailbox, - unsigned long cq_num) +static int hns_roce_hw_create_cq(struct hns_roce_dev *dev, + struct hns_roce_cmd_mailbox *mailbox, + unsigned long cq_num) { return hns_roce_cmd_mbox(dev, mailbox->dma, 0, cq_num, 0, - HNS_ROCE_CMD_SW2HW_CQ, HNS_ROCE_CMD_TIMEOUT_MSECS); + HNS_ROCE_CMD_CREATE_CQ, + HNS_ROCE_CMD_TIMEOUT_MSECS); } static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, int nent, @@ -144,7 +145,7 @@ static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, int nent, nent, vector); /* Send mailbox to hw */ - ret = hns_roce_sw2hw_cq(hr_dev, mailbox, hr_cq->cqn); + ret = hns_roce_hw_create_cq(hr_dev, mailbox, hr_cq->cqn); hns_roce_free_cmd_mailbox(hr_dev, mailbox); if (ret) { dev_err(dev, "CQ alloc.Failed to cmd mailbox.\n"); @@ -170,12 +171,12 @@ err_out: return ret; } -static int hns_roce_hw2sw_cq(struct hns_roce_dev *dev, - struct hns_roce_cmd_mailbox *mailbox, - unsigned long cq_num) +static int hns_roce_hw_destroy_cq(struct hns_roce_dev *dev, + struct hns_roce_cmd_mailbox *mailbox, + unsigned long cq_num) { return hns_roce_cmd_mbox(dev, 0, mailbox ? mailbox->dma : 0, cq_num, - mailbox ? 0 : 1, HNS_ROCE_CMD_HW2SW_CQ, + mailbox ? 0 : 1, HNS_ROCE_CMD_DESTROY_CQ, HNS_ROCE_CMD_TIMEOUT_MSECS); } @@ -185,9 +186,9 @@ void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) struct device *dev = hr_dev->dev; int ret; - ret = hns_roce_hw2sw_cq(hr_dev, NULL, hr_cq->cqn); + ret = hns_roce_hw_destroy_cq(hr_dev, NULL, hr_cq->cqn); if (ret) - dev_err(dev, "HW2SW_CQ failed (%d) for CQN %06lx\n", ret, + dev_err(dev, "DESTROY_CQ failed (%d) for CQN %06lx\n", ret, hr_cq->cqn); xa_erase(&cq_table->array, hr_cq->cqn); diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index ce777ce06ea2..a1b712e9d454 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -1184,9 +1184,9 @@ struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); -int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev, - struct hns_roce_cmd_mailbox *mailbox, - unsigned long mpt_index); +int hns_roce_hw_destroy_mpt(struct hns_roce_dev *hr_dev, + struct hns_roce_cmd_mailbox *mailbox, + unsigned long mpt_index); unsigned long key_to_hw_index(u32 key); struct ib_mw *hns_roce_alloc_mw(struct ib_pd *pd, enum ib_mw_type, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index bfe9cee8c969..89a4c3afdda1 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -1114,9 +1114,10 @@ static int hns_roce_v1_dereg_mr(struct hns_roce_dev *hr_dev, free_mr = &priv->free_mr; if (mr->enabled) { - if (hns_roce_hw2sw_mpt(hr_dev, NULL, key_to_hw_index(mr->key) - & (hr_dev->caps.num_mtpts - 1))) - dev_warn(dev, "HW2SW_MPT failed!\n"); + if (hns_roce_hw_destroy_mpt(hr_dev, NULL, + key_to_hw_index(mr->key) & + (hr_dev->caps.num_mtpts - 1))) + dev_warn(dev, "DESTROY_MPT failed!\n"); } mr_work = kzalloc(sizeof(*mr_work), GFP_KERNEL); diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 5f8416ba09a9..577946bdd574 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -48,21 +48,21 @@ unsigned long key_to_hw_index(u32 key) return (key << 24) | (key >> 8); } -static int hns_roce_sw2hw_mpt(struct hns_roce_dev *hr_dev, - struct hns_roce_cmd_mailbox *mailbox, - unsigned long mpt_index) +static int hns_roce_hw_create_mpt(struct hns_roce_dev *hr_dev, + struct hns_roce_cmd_mailbox *mailbox, + unsigned long mpt_index) { return hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, mpt_index, 0, - HNS_ROCE_CMD_SW2HW_MPT, + HNS_ROCE_CMD_CREATE_MPT, HNS_ROCE_CMD_TIMEOUT_MSECS); } -int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev, - struct hns_roce_cmd_mailbox *mailbox, - unsigned long mpt_index) +int hns_roce_hw_destroy_mpt(struct hns_roce_dev *hr_dev, + struct hns_roce_cmd_mailbox *mailbox, + unsigned long mpt_index) { return hns_roce_cmd_mbox(hr_dev, 0, mailbox ? mailbox->dma : 0, - mpt_index, !mailbox, HNS_ROCE_CMD_HW2SW_MPT, + mpt_index, !mailbox, HNS_ROCE_CMD_DESTROY_MPT, HNS_ROCE_CMD_TIMEOUT_MSECS); } @@ -707,10 +707,11 @@ static void hns_roce_mr_free(struct hns_roce_dev *hr_dev, int ret; if (mr->enabled) { - ret = hns_roce_hw2sw_mpt(hr_dev, NULL, key_to_hw_index(mr->key) - & (hr_dev->caps.num_mtpts - 1)); + ret = hns_roce_hw_destroy_mpt(hr_dev, NULL, + key_to_hw_index(mr->key) & + (hr_dev->caps.num_mtpts - 1)); if (ret) - dev_warn(dev, "HW2SW_MPT failed (%d)\n", ret); + dev_warn(dev, "DESTROY_MPT failed (%d)\n", ret); } if (mr->size != ~0ULL) { @@ -763,10 +764,10 @@ static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev, goto err_page; } - ret = hns_roce_sw2hw_mpt(hr_dev, mailbox, - mtpt_idx & (hr_dev->caps.num_mtpts - 1)); + ret = hns_roce_hw_create_mpt(hr_dev, mailbox, + mtpt_idx & (hr_dev->caps.num_mtpts - 1)); if (ret) { - dev_err(dev, "SW2HW_MPT failed (%d)\n", ret); + dev_err(dev, "CREATE_MPT failed (%d)\n", ret); goto err_page; } @@ -1308,9 +1309,9 @@ int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length, if (ret) goto free_cmd_mbox; - ret = hns_roce_hw2sw_mpt(hr_dev, NULL, mtpt_idx); + ret = hns_roce_hw_destroy_mpt(hr_dev, NULL, mtpt_idx); if (ret) - dev_warn(dev, "HW2SW_MPT failed (%d)\n", ret); + dev_warn(dev, "DESTROY_MPT failed (%d)\n", ret); mr->enabled = 0; @@ -1332,9 +1333,9 @@ int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length, goto free_cmd_mbox; } - ret = hns_roce_sw2hw_mpt(hr_dev, mailbox, mtpt_idx); + ret = hns_roce_hw_create_mpt(hr_dev, mailbox, mtpt_idx); if (ret) { - dev_err(dev, "SW2HW_MPT failed (%d)\n", ret); + dev_err(dev, "CREATE_MPT failed (%d)\n", ret); ib_umem_release(mr->umem); goto free_cmd_mbox; } @@ -1448,10 +1449,11 @@ static void hns_roce_mw_free(struct hns_roce_dev *hr_dev, int ret; if (mw->enabled) { - ret = hns_roce_hw2sw_mpt(hr_dev, NULL, key_to_hw_index(mw->rkey) - & (hr_dev->caps.num_mtpts - 1)); + ret = hns_roce_hw_destroy_mpt(hr_dev, NULL, + key_to_hw_index(mw->rkey) & + (hr_dev->caps.num_mtpts - 1)); if (ret) - dev_warn(dev, "MW HW2SW_MPT failed (%d)\n", ret); + dev_warn(dev, "MW DESTROY_MPT failed (%d)\n", ret); hns_roce_table_put(hr_dev, &hr_dev->mr_table.mtpt_table, key_to_hw_index(mw->rkey)); @@ -1487,10 +1489,10 @@ static int hns_roce_mw_enable(struct hns_roce_dev *hr_dev, goto err_page; } - ret = hns_roce_sw2hw_mpt(hr_dev, mailbox, - mtpt_idx & (hr_dev->caps.num_mtpts - 1)); + ret = hns_roce_hw_create_mpt(hr_dev, mailbox, + mtpt_idx & (hr_dev->caps.num_mtpts - 1)); if (ret) { - dev_err(dev, "MW sw2hw_mpt failed (%d)\n", ret); + dev_err(dev, "MW CREATE_MPT failed (%d)\n", ret); goto err_page; } diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 6f9d1d250e4a..d275818554a6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -59,21 +59,21 @@ static void hns_roce_ib_srq_event(struct hns_roce_srq *srq, } } -static int hns_roce_sw2hw_srq(struct hns_roce_dev *dev, - struct hns_roce_cmd_mailbox *mailbox, - unsigned long srq_num) +static int hns_roce_hw_create_srq(struct hns_roce_dev *dev, + struct hns_roce_cmd_mailbox *mailbox, + unsigned long srq_num) { return hns_roce_cmd_mbox(dev, mailbox->dma, 0, srq_num, 0, - HNS_ROCE_CMD_SW2HW_SRQ, + HNS_ROCE_CMD_CREATE_SRQ, HNS_ROCE_CMD_TIMEOUT_MSECS); } -static int hns_roce_hw2sw_srq(struct hns_roce_dev *dev, - struct hns_roce_cmd_mailbox *mailbox, - unsigned long srq_num) +static int hns_roce_hw_destroy_srq(struct hns_roce_dev *dev, + struct hns_roce_cmd_mailbox *mailbox, + unsigned long srq_num) { return hns_roce_cmd_mbox(dev, 0, mailbox ? mailbox->dma : 0, srq_num, - mailbox ? 0 : 1, HNS_ROCE_CMD_HW2SW_SRQ, + mailbox ? 0 : 1, HNS_ROCE_CMD_DESTROY_SRQ, HNS_ROCE_CMD_TIMEOUT_MSECS); } @@ -134,7 +134,7 @@ static int hns_roce_srq_alloc(struct hns_roce_dev *hr_dev, u32 pdn, u32 cqn, mtts_wqe, mtts_idx, dma_handle_wqe, dma_handle_idx); - ret = hns_roce_sw2hw_srq(hr_dev, mailbox, srq->srqn); + ret = hns_roce_hw_create_srq(hr_dev, mailbox, srq->srqn); hns_roce_free_cmd_mailbox(hr_dev, mailbox); if (ret) goto err_xa; @@ -160,9 +160,9 @@ static void hns_roce_srq_free(struct hns_roce_dev *hr_dev, struct hns_roce_srq_table *srq_table = &hr_dev->srq_table; int ret; - ret = hns_roce_hw2sw_srq(hr_dev, NULL, srq->srqn); + ret = hns_roce_hw_destroy_srq(hr_dev, NULL, srq->srqn); if (ret) - dev_err(hr_dev->dev, "HW2SW_SRQ failed (%d) for CQN %06lx\n", + dev_err(hr_dev->dev, "DESTROY_SRQ failed (%d) for SRQN %06lx\n", ret, srq->srqn); xa_erase(&srq_table->xa, srq->srqn); -- cgit From 880f133c6026bee250220b773f556cc27bb62457 Mon Sep 17 00:00:00 2001 From: Lang Cheng Date: Tue, 5 Nov 2019 19:07:59 +0800 Subject: RDMA/hns: Simplify doorbell initialization code If a variable needs to be set to 0 before use, it can be directly initialized to 0. Link: https://lore.kernel.org/r/1572952082-6681-7-git-send-email-liweihang@hisilicon.com Signed-off-by: Lang Cheng Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 4700bac1c6e2..a8ce3716a63e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -4944,10 +4944,7 @@ static void hns_roce_v2_init_irq_work(struct hns_roce_dev *hr_dev, static void set_eq_cons_index_v2(struct hns_roce_eq *eq) { struct hns_roce_dev *hr_dev = eq->hr_dev; - __le32 doorbell[2]; - - doorbell[0] = 0; - doorbell[1] = 0; + __le32 doorbell[2] = {}; if (eq->type_flag == HNS_ROCE_AEQ) { roce_set_field(doorbell[0], HNS_ROCE_V2_EQ_DB_CMD_M, -- cgit From 301cc7eb2cd9b0292658b9bfd45e5226cd1fef27 Mon Sep 17 00:00:00 2001 From: Lang Cheng Date: Tue, 5 Nov 2019 19:08:00 +0800 Subject: RDMA/hns: Modify hns_roce_hw_v2_get_cfg to simplify the code Merge base configuration of hr_dev into hns_roce_hw_v2_get_cfg(). In addition, there is no need to return 0 at last, so we change return type of it to void. Link: https://lore.kernel.org/r/1572952082-6681-8-git-send-email-liweihang@hisilicon.com Signed-off-by: Lang Cheng Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index a8ce3716a63e..907c95149cb9 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -6370,12 +6370,14 @@ static const struct pci_device_id hns_roce_hw_v2_pci_tbl[] = { MODULE_DEVICE_TABLE(pci, hns_roce_hw_v2_pci_tbl); -static int hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev, +static void hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev, struct hnae3_handle *handle) { struct hns_roce_v2_priv *priv = hr_dev->priv; int i; + hr_dev->pci_dev = handle->pdev; + hr_dev->dev = &handle->pdev->dev; hr_dev->hw = &hns_roce_hw_v2; hr_dev->dfx = &hns_roce_dfx_hw_v2; hr_dev->sdb_offset = ROCEE_DB_SQ_L_0_REG; @@ -6400,8 +6402,6 @@ static int hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev, hr_dev->reset_cnt = handle->ae_algo->ops->ae_dev_reset_cnt(handle); priv->handle = handle; - - return 0; } static int __hns_roce_hw_v2_init_instance(struct hnae3_handle *handle) @@ -6419,14 +6419,7 @@ static int __hns_roce_hw_v2_init_instance(struct hnae3_handle *handle) goto error_failed_kzalloc; } - hr_dev->pci_dev = handle->pdev; - hr_dev->dev = &handle->pdev->dev; - - ret = hns_roce_hw_v2_get_cfg(hr_dev, handle); - if (ret) { - dev_err(hr_dev->dev, "Get Configuration failed!\n"); - goto error_failed_get_cfg; - } + hns_roce_hw_v2_get_cfg(hr_dev, handle); ret = hns_roce_init(hr_dev); if (ret) { -- cgit From 1ceb0b11a8a2363db3bf44f3e0ae4615134733e9 Mon Sep 17 00:00:00 2001 From: Yixian Liu Date: Tue, 5 Nov 2019 19:08:01 +0800 Subject: RDMA/hns: Fix non-standard error codes It is better to return a linux error code than define a private constant. Link: https://lore.kernel.org/r/1572952082-6681-9-git-send-email-liweihang@hisilicon.com Signed-off-by: Yixian Liu Signed-off-by: Wenpeng Liang Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_alloc.c | 4 ++-- drivers/infiniband/hw/hns/hns_roce_cq.c | 4 ++-- drivers/infiniband/hw/hns/hns_roce_mr.c | 15 ++++++++------- drivers/infiniband/hw/hns/hns_roce_pd.c | 2 +- drivers/infiniband/hw/hns/hns_roce_srq.c | 2 +- 5 files changed, 14 insertions(+), 13 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c index 8c063c598d2a..da574c26e063 100644 --- a/drivers/infiniband/hw/hns/hns_roce_alloc.c +++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c @@ -55,7 +55,7 @@ int hns_roce_bitmap_alloc(struct hns_roce_bitmap *bitmap, unsigned long *obj) bitmap->last = 0; *obj |= bitmap->top; } else { - ret = -1; + ret = -EINVAL; } spin_unlock(&bitmap->lock); @@ -100,7 +100,7 @@ int hns_roce_bitmap_alloc_range(struct hns_roce_bitmap *bitmap, int cnt, } *obj |= bitmap->top; } else { - ret = -1; + ret = -EINVAL; } spin_unlock(&bitmap->lock); diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 713df1f78388..699c987f8da4 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -116,9 +116,9 @@ static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, int nent, hr_cq->vector = vector; ret = hns_roce_bitmap_alloc(&cq_table->bitmap, &hr_cq->cqn); - if (ret == -1) { + if (ret) { dev_err(dev, "CQ alloc.Failed to alloc index.\n"); - return -ENOMEM; + return ret; } /* Get CQC memory HEM(Hardware Entry Memory) table */ diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 577946bdd574..6589e283cc77 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -83,7 +83,7 @@ static int hns_roce_buddy_alloc(struct hns_roce_buddy *buddy, int order, } } spin_unlock(&buddy->lock); - return -1; + return -EINVAL; found: clear_bit(*seg, buddy->bits[o]); @@ -206,13 +206,14 @@ static int hns_roce_alloc_mtt_range(struct hns_roce_dev *hr_dev, int order, } ret = hns_roce_buddy_alloc(buddy, order, seg); - if (ret == -1) - return -1; + if (ret) + return ret; - if (hns_roce_table_get_range(hr_dev, table, *seg, - *seg + (1 << order) - 1)) { + ret = hns_roce_table_get_range(hr_dev, table, *seg, + *seg + (1 << order) - 1); + if (ret) { hns_roce_buddy_free(buddy, *seg, order); - return -1; + return ret; } return 0; @@ -578,7 +579,7 @@ static int hns_roce_mr_alloc(struct hns_roce_dev *hr_dev, u32 pd, u64 iova, /* Allocate a key for mr from mr_table */ ret = hns_roce_bitmap_alloc(&hr_dev->mr_table.mtpt_bitmap, &index); - if (ret == -1) + if (ret) return -ENOMEM; mr->iova = iova; /* MR va starting addr */ diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c index 912b89b4da34..780c780fdb22 100644 --- a/drivers/infiniband/hw/hns/hns_roce_pd.c +++ b/drivers/infiniband/hw/hns/hns_roce_pd.c @@ -96,7 +96,7 @@ int hns_roce_uar_alloc(struct hns_roce_dev *hr_dev, struct hns_roce_uar *uar) /* Using bitmap to manager UAR index */ ret = hns_roce_bitmap_alloc(&hr_dev->uar_table.bitmap, &uar->logic_idx); - if (ret == -1) + if (ret) return -ENOMEM; if (uar->logic_idx > 0 && hr_dev->caps.phy_num_uars > 1) diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index d275818554a6..96ff782e0fdf 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -111,7 +111,7 @@ static int hns_roce_srq_alloc(struct hns_roce_dev *hr_dev, u32 pdn, u32 cqn, } ret = hns_roce_bitmap_alloc(&srq_table->bitmap, &srq->srqn); - if (ret == -1) { + if (ret) { dev_err(hr_dev->dev, "SRQ alloc.Failed to alloc index.\n"); return -ENOMEM; } -- cgit From d11769fdc1bbf6664083c0caabd9f2f864dd72ec Mon Sep 17 00:00:00 2001 From: Wenpeng Liang Date: Tue, 5 Nov 2019 19:08:02 +0800 Subject: RDMA/hns: Modify appropriate printings Modify some printings that is not in uniformed style, non-standard or with spelling errors. Link: https://lore.kernel.org/r/1572952082-6681-10-git-send-email-liweihang@hisilicon.com Signed-off-by: Wenpeng Liang Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_cq.c | 20 ++++++++++++-------- drivers/infiniband/hw/hns/hns_roce_main.c | 4 ++-- drivers/infiniband/hw/hns/hns_roce_qp.c | 2 +- drivers/infiniband/hw/hns/hns_roce_srq.c | 8 ++++---- 4 files changed, 19 insertions(+), 15 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 699c987f8da4..e25fa19c249c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -105,32 +105,34 @@ static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, int nent, mtts = hns_roce_table_find(hr_dev, mtt_table, hr_mtt->first_seg, &dma_handle); if (!mtts) { - dev_err(dev, "CQ alloc.Failed to find cq buf addr.\n"); + dev_err(dev, "Failed to find mtt for CQ buf.\n"); return -EINVAL; } if (vector >= hr_dev->caps.num_comp_vectors) { - dev_err(dev, "CQ alloc.Invalid vector.\n"); + dev_err(dev, "Invalid vector(0x%x) for CQ alloc.\n", vector); return -EINVAL; } hr_cq->vector = vector; ret = hns_roce_bitmap_alloc(&cq_table->bitmap, &hr_cq->cqn); if (ret) { - dev_err(dev, "CQ alloc.Failed to alloc index.\n"); + dev_err(dev, "Num of CQ out of range.\n"); return ret; } /* Get CQC memory HEM(Hardware Entry Memory) table */ ret = hns_roce_table_get(hr_dev, &cq_table->table, hr_cq->cqn); if (ret) { - dev_err(dev, "CQ alloc.Failed to get context mem.\n"); + dev_err(dev, + "Get context mem failed(%d) when CQ(0x%lx) alloc.\n", + ret, hr_cq->cqn); goto err_out; } ret = xa_err(xa_store(&cq_table->array, hr_cq->cqn, hr_cq, GFP_KERNEL)); if (ret) { - dev_err(dev, "CQ alloc failed xa_store.\n"); + dev_err(dev, "Failed to xa_store CQ.\n"); goto err_put; } @@ -148,7 +150,9 @@ static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, int nent, ret = hns_roce_hw_create_cq(hr_dev, mailbox, hr_cq->cqn); hns_roce_free_cmd_mailbox(hr_dev, mailbox); if (ret) { - dev_err(dev, "CQ alloc.Failed to cmd mailbox.\n"); + dev_err(dev, + "Send cmd mailbox failed(%d) when CQ(0x%lx) alloc.\n", + ret, hr_cq->cqn); goto err_xa; } @@ -418,7 +422,7 @@ int hns_roce_ib_create_cq(struct ib_cq *ib_cq, int ret; if (cq_entries < 1 || cq_entries > hr_dev->caps.max_cqes) { - dev_err(dev, "Creat CQ failed. entries=%d, max=%d\n", + dev_err(dev, "Create CQ failed. entries=%d, max=%d\n", cq_entries, hr_dev->caps.max_cqes); return -EINVAL; } @@ -448,7 +452,7 @@ int hns_roce_ib_create_cq(struct ib_cq *ib_cq, ret = hns_roce_cq_alloc(hr_dev, cq_entries, &hr_cq->hr_buf.hr_mtt, hr_cq, vector); if (ret) { - dev_err(dev, "Creat CQ .Failed to cq_alloc.\n"); + dev_err(dev, "Alloc CQ failed(%d).\n", ret); goto err_dbmap; } diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index a389f9996689..066f01c45b5a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -111,7 +111,7 @@ static int handle_en_event(struct hns_roce_dev *hr_dev, u8 port, netdev = hr_dev->iboe.netdevs[port]; if (!netdev) { - dev_err(dev, "port(%d) can't find netdev\n", port); + dev_err(dev, "Can't find netdev on port(%u)!\n", port); return -ENODEV; } @@ -253,7 +253,7 @@ static int hns_roce_query_port(struct ib_device *ib_dev, u8 port_num, net_dev = hr_dev->iboe.netdevs[port]; if (!net_dev) { spin_unlock_irqrestore(&hr_dev->iboe.lock, flags); - dev_err(dev, "find netdev %d failed!\r\n", port); + dev_err(dev, "Find netdev %u failed!\n", port); return -EINVAL; } diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 6fac4666ae8a..9442f017db02 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -1031,7 +1031,7 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd, ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata, 0, hr_qp); if (ret) { - ibdev_err(ibdev, "Create RC QP 0x%06lx failed(%d)\n", + ibdev_err(ibdev, "Create QP 0x%06lx failed(%d)\n", hr_qp->qpn, ret); kfree(hr_qp); return ERR_PTR(ret); diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 96ff782e0fdf..a1bfa51c67fe 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -95,8 +95,7 @@ static int hns_roce_srq_alloc(struct hns_roce_dev *hr_dev, u32 pdn, u32 cqn, srq->mtt.first_seg, &dma_handle_wqe); if (!mtts_wqe) { - dev_err(hr_dev->dev, - "SRQ alloc.Failed to find srq buf addr.\n"); + dev_err(hr_dev->dev, "Failed to find mtt for srq buf.\n"); return -EINVAL; } @@ -106,13 +105,14 @@ static int hns_roce_srq_alloc(struct hns_roce_dev *hr_dev, u32 pdn, u32 cqn, &dma_handle_idx); if (!mtts_idx) { dev_err(hr_dev->dev, - "SRQ alloc.Failed to find idx que buf addr.\n"); + "Failed to find mtt for srq idx queue buf.\n"); return -EINVAL; } ret = hns_roce_bitmap_alloc(&srq_table->bitmap, &srq->srqn); if (ret) { - dev_err(hr_dev->dev, "SRQ alloc.Failed to alloc index.\n"); + dev_err(hr_dev->dev, + "Failed to alloc a bit from srq bitmap.\n"); return -ENOMEM; } -- cgit From 333ee7e2d0b6590cde2bea3e17f30262e0dc0706 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 29 Oct 2019 08:27:36 +0200 Subject: RDMA/hfi1: Delete unreachable code All callers allocate MAD structures with proper sizes, there is no need to recheck it. Link: https://lore.kernel.org/r/20191029062745.7932-8-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/mad.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index d8ff063a5419..a54746f4a0ae 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -4921,10 +4921,6 @@ int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, { switch (in_mad->base_version) { case OPA_MGMT_BASE_VERSION: - if (unlikely(in_mad_size != sizeof(struct opa_mad))) { - dev_err(ibdev->dev.parent, "invalid in_mad_size\n"); - return IB_MAD_RESULT_FAILURE; - } return hfi1_process_opa_mad(ibdev, mad_flags, port, in_wc, in_grh, (struct opa_mad *)in_mad, -- cgit From e26e7b88f6b7482cbff633c6fc9eaee3ecbd41b1 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 29 Oct 2019 08:27:45 +0200 Subject: RDMA: Change MAD processing function to remove extra casting and parameter All users of process_mad() converts input pointers from ib_mad_hdr to be ib_mad, update the function declaration to use ib_mad directly. Also remove not used input MAD size parameter. Link: https://lore.kernel.org/r/20191029062745.7932-17-leon@kernel.org Signed-off-by: Leon Romanovsky Tested-By: Mike Marciniszyn Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/mad.c | 13 +++--- drivers/infiniband/hw/hfi1/verbs.h | 5 +-- drivers/infiniband/hw/mlx4/mad.c | 25 +++++------- drivers/infiniband/hw/mlx4/mlx4_ib.h | 7 ++-- drivers/infiniband/hw/mlx5/mad.c | 24 +++++------ drivers/infiniband/hw/mlx5/mlx5_ib.h | 5 +-- drivers/infiniband/hw/mthca/mthca_dev.h | 12 ++---- drivers/infiniband/hw/mthca/mthca_mad.c | 70 ++++++++++++++------------------ drivers/infiniband/hw/ocrdma/ocrdma_ah.c | 17 +++----- drivers/infiniband/hw/ocrdma/ocrdma_ah.h | 11 ++--- drivers/infiniband/hw/qedr/verbs.c | 17 ++------ drivers/infiniband/hw/qedr/verbs.h | 7 ++-- drivers/infiniband/hw/qib/qib_mad.c | 15 +++---- drivers/infiniband/hw/qib/qib_verbs.h | 5 +-- 14 files changed, 93 insertions(+), 140 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index a54746f4a0ae..a51bcd2b4391 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -4915,11 +4915,10 @@ static int hfi1_process_ib_mad(struct ib_device *ibdev, int mad_flags, u8 port, */ int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in_mad, size_t in_mad_size, - struct ib_mad_hdr *out_mad, size_t *out_mad_size, - u16 *out_mad_pkey_index) + const struct ib_mad *in_mad, struct ib_mad *out_mad, + size_t *out_mad_size, u16 *out_mad_pkey_index) { - switch (in_mad->base_version) { + switch (in_mad->mad_hdr.base_version) { case OPA_MGMT_BASE_VERSION: return hfi1_process_opa_mad(ibdev, mad_flags, port, in_wc, in_grh, @@ -4928,10 +4927,8 @@ int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, out_mad_size, out_mad_pkey_index); case IB_MGMT_BASE_VERSION: - return hfi1_process_ib_mad(ibdev, mad_flags, port, - in_wc, in_grh, - (const struct ib_mad *)in_mad, - (struct ib_mad *)out_mad); + return hfi1_process_ib_mad(ibdev, mad_flags, port, in_wc, + in_grh, in_mad, out_mad); default: break; } diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index ae9582ddbc8f..b0e9bf7cd150 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -330,9 +330,8 @@ void hfi1_sys_guid_chg(struct hfi1_ibport *ibp); void hfi1_node_desc_chg(struct hfi1_ibport *ibp); int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in_mad, size_t in_mad_size, - struct ib_mad_hdr *out_mad, size_t *out_mad_size, - u16 *out_mad_pkey_index); + const struct ib_mad *in_mad, struct ib_mad *out_mad, + size_t *out_mad_size, u16 *out_mad_pkey_index); /* * The PSN_MASK and PSN_SHIFT allow for diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index c6ea4c50da1e..abe68708d6d6 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -983,13 +983,10 @@ static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, - u16 *out_mad_pkey_index) + const struct ib_mad *in, struct ib_mad *out, + size_t *out_mad_size, u16 *out_mad_pkey_index) { struct mlx4_ib_dev *dev = to_mdev(ibdev); - const struct ib_mad *in_mad = (const struct ib_mad *)in; - struct ib_mad *out_mad = (struct ib_mad *)out; enum rdma_link_layer link = rdma_port_get_link_layer(ibdev, port_num); /* iboe_process_mad() which uses the HCA flow-counters to implement IB PMA @@ -997,20 +994,20 @@ int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, */ if (link == IB_LINK_LAYER_INFINIBAND) { if (mlx4_is_slave(dev->dev) && - (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT && - (in_mad->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS || - in_mad->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS_EXT || - in_mad->mad_hdr.attr_id == IB_PMA_CLASS_PORT_INFO))) - return iboe_process_mad(ibdev, mad_flags, port_num, in_wc, - in_grh, in_mad, out_mad); + (in->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT && + (in->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS || + in->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS_EXT || + in->mad_hdr.attr_id == IB_PMA_CLASS_PORT_INFO))) + return iboe_process_mad(ibdev, mad_flags, port_num, + in_wc, in_grh, in, out); - return ib_process_mad(ibdev, mad_flags, port_num, in_wc, - in_grh, in_mad, out_mad); + return ib_process_mad(ibdev, mad_flags, port_num, in_wc, in_grh, + in, out); } if (link == IB_LINK_LAYER_ETHERNET) return iboe_process_mad(ibdev, mad_flags, port_num, in_wc, - in_grh, in_mad, out_mad); + in_grh, in, out); return -EINVAL; } diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index eb53bb4c0c91..0d846fea8fdc 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -786,11 +786,10 @@ int mlx4_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags, int port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, const void *in_mad, void *response_mad); -int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, +int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, - u16 *out_mad_pkey_index); + const struct ib_mad *in, struct ib_mad *out, + size_t *out_mad_size, u16 *out_mad_pkey_index); int mlx4_ib_mad_init(struct mlx4_ib_dev *dev); void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev); diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index 1c87412a162f..14e0c17de6a9 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -219,15 +219,12 @@ done: int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, - u16 *out_mad_pkey_index) + const struct ib_mad *in, struct ib_mad *out, + size_t *out_mad_size, u16 *out_mad_pkey_index) { struct mlx5_ib_dev *dev = to_mdev(ibdev); - const struct ib_mad *in_mad = (const struct ib_mad *)in; - struct ib_mad *out_mad = (struct ib_mad *)out; - u8 mgmt_class = in_mad->mad_hdr.mgmt_class; - u8 method = in_mad->mad_hdr.method; + u8 mgmt_class = in->mad_hdr.mgmt_class; + u8 method = in->mad_hdr.method; u16 slid; int err; @@ -247,13 +244,13 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, /* Don't process SMInfo queries -- the SMA can't handle them. */ - if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO) + if (in->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO) return IB_MAD_RESULT_SUCCESS; } break; case IB_MGMT_CLASS_PERF_MGMT: if (MLX5_CAP_GEN(dev->mdev, vport_counters) && method == IB_MGMT_METHOD_GET) - return process_pma_cmd(dev, port_num, in_mad, out_mad); + return process_pma_cmd(dev, port_num, in, out); /* fallthrough */ case MLX5_IB_VENDOR_CLASS1: /* fallthrough */ @@ -267,16 +264,15 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, return IB_MAD_RESULT_SUCCESS; } - err = mlx5_MAD_IFC(to_mdev(ibdev), - mad_flags & IB_MAD_IGNORE_MKEY, - mad_flags & IB_MAD_IGNORE_BKEY, - port_num, in_wc, in_grh, in_mad, out_mad); + err = mlx5_MAD_IFC(to_mdev(ibdev), mad_flags & IB_MAD_IGNORE_MKEY, + mad_flags & IB_MAD_IGNORE_BKEY, port_num, in_wc, + in_grh, in, out); if (err) return IB_MAD_RESULT_FAILURE; /* set return bit in status of directed route responses */ if (mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) - out_mad->mad_hdr.status |= cpu_to_be16(1 << 15); + out->mad_hdr.status |= cpu_to_be16(1 << 15); if (method == IB_MGMT_METHOD_TRAP_REPRESS) /* no response for trap repress */ diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 0bdb8b45ea15..933a2059886b 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1192,9 +1192,8 @@ int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, unsigned int *meta_sg_offset); int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, - u16 *out_mad_pkey_index); + const struct ib_mad *in, struct ib_mad *out, + size_t *out_mad_size, u16 *out_mad_pkey_index); struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev, struct ib_udata *udata); int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index bfd4eebc1182..599794c5a78f 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -576,14 +576,10 @@ enum ib_rate mthca_rate_to_ib(struct mthca_dev *dev, u8 mthca_rate, u8 port); int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); -int mthca_process_mad(struct ib_device *ibdev, - int mad_flags, - u8 port_num, - const struct ib_wc *in_wc, - const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, - u16 *out_mad_pkey_index); +int mthca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad *in, struct ib_mad *out, + size_t *out_mad_size, u16 *out_mad_pkey_index); int mthca_create_agents(struct mthca_dev *dev); void mthca_free_agents(struct mthca_dev *dev); diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c index 0893604d2a62..99aa8183a7f2 100644 --- a/drivers/infiniband/hw/mthca/mthca_mad.c +++ b/drivers/infiniband/hw/mthca/mthca_mad.c @@ -196,26 +196,19 @@ static void forward_trap(struct mthca_dev *dev, } } -int mthca_process_mad(struct ib_device *ibdev, - int mad_flags, - u8 port_num, - const struct ib_wc *in_wc, - const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, - u16 *out_mad_pkey_index) +int mthca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad *in, struct ib_mad *out, + size_t *out_mad_size, u16 *out_mad_pkey_index) { int err; u16 slid = in_wc ? ib_lid_cpu16(in_wc->slid) : be16_to_cpu(IB_LID_PERMISSIVE); u16 prev_lid = 0; struct ib_port_attr pattr; - const struct ib_mad *in_mad = (const struct ib_mad *)in; - struct ib_mad *out_mad = (struct ib_mad *)out; /* Forward locally generated traps to the SM */ - if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && - slid == 0) { - forward_trap(to_mdev(ibdev), port_num, in_mad); + if (in->mad_hdr.method == IB_MGMT_METHOD_TRAP && !slid) { + forward_trap(to_mdev(ibdev), port_num, in); return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; } @@ -225,40 +218,39 @@ int mthca_process_mad(struct ib_device *ibdev, * Only handle PMA and Mellanox vendor-specific class gets and * sets for other classes. */ - if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || - in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { - if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET && - in_mad->mad_hdr.method != IB_MGMT_METHOD_SET && - in_mad->mad_hdr.method != IB_MGMT_METHOD_TRAP_REPRESS) + if (in->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || + in->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { + if (in->mad_hdr.method != IB_MGMT_METHOD_GET && + in->mad_hdr.method != IB_MGMT_METHOD_SET && + in->mad_hdr.method != IB_MGMT_METHOD_TRAP_REPRESS) return IB_MAD_RESULT_SUCCESS; /* * Don't process SMInfo queries or vendor-specific * MADs -- the SMA can't handle them. */ - if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO || - ((in_mad->mad_hdr.attr_id & IB_SMP_ATTR_VENDOR_MASK) == + if (in->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO || + ((in->mad_hdr.attr_id & IB_SMP_ATTR_VENDOR_MASK) == IB_SMP_ATTR_VENDOR_MASK)) return IB_MAD_RESULT_SUCCESS; - } else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT || - in_mad->mad_hdr.mgmt_class == MTHCA_VENDOR_CLASS1 || - in_mad->mad_hdr.mgmt_class == MTHCA_VENDOR_CLASS2) { - if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET && - in_mad->mad_hdr.method != IB_MGMT_METHOD_SET) + } else if (in->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT || + in->mad_hdr.mgmt_class == MTHCA_VENDOR_CLASS1 || + in->mad_hdr.mgmt_class == MTHCA_VENDOR_CLASS2) { + if (in->mad_hdr.method != IB_MGMT_METHOD_GET && + in->mad_hdr.method != IB_MGMT_METHOD_SET) return IB_MAD_RESULT_SUCCESS; } else return IB_MAD_RESULT_SUCCESS; - if ((in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || - in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) && - in_mad->mad_hdr.method == IB_MGMT_METHOD_SET && - in_mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO && + if ((in->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || + in->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) && + in->mad_hdr.method == IB_MGMT_METHOD_SET && + in->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO && !ib_query_port(ibdev, port_num, &pattr)) prev_lid = ib_lid_cpu16(pattr.lid); - err = mthca_MAD_IFC(to_mdev(ibdev), - mad_flags & IB_MAD_IGNORE_MKEY, - mad_flags & IB_MAD_IGNORE_BKEY, - port_num, in_wc, in_grh, in_mad, out_mad); + err = mthca_MAD_IFC(to_mdev(ibdev), mad_flags & IB_MAD_IGNORE_MKEY, + mad_flags & IB_MAD_IGNORE_BKEY, port_num, in_wc, + in_grh, in, out); if (err == -EBADMSG) return IB_MAD_RESULT_SUCCESS; else if (err) { @@ -266,16 +258,16 @@ int mthca_process_mad(struct ib_device *ibdev, return IB_MAD_RESULT_FAILURE; } - if (!out_mad->mad_hdr.status) { - smp_snoop(ibdev, port_num, in_mad, prev_lid); - node_desc_override(ibdev, out_mad); + if (!out->mad_hdr.status) { + smp_snoop(ibdev, port_num, in, prev_lid); + node_desc_override(ibdev, out); } /* set return bit in status of directed route responses */ - if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) - out_mad->mad_hdr.status |= cpu_to_be16(1 << 15); + if (in->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) + out->mad_hdr.status |= cpu_to_be16(1 << 15); - if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS) + if (in->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS) /* no response for trap repress */ return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index 4098508b9240..2b7f00ac41b0 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -247,23 +247,18 @@ int ocrdma_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr) return 0; } -int ocrdma_process_mad(struct ib_device *ibdev, - int process_mad_flags, - u8 port_num, - const struct ib_wc *in_wc, - const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, +int ocrdma_process_mad(struct ib_device *ibdev, int process_mad_flags, + u8 port_num, const struct ib_wc *in_wc, + const struct ib_grh *in_grh, const struct ib_mad *in, + struct ib_mad *out, size_t *out_mad_size, u16 *out_mad_pkey_index) { int status = IB_MAD_RESULT_SUCCESS; struct ocrdma_dev *dev; - const struct ib_mad *in_mad = (const struct ib_mad *)in; - struct ib_mad *out_mad = (struct ib_mad *)out; - if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT) { + if (in->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT) { dev = get_ocrdma_dev(ibdev); - ocrdma_pma_counters(dev, out_mad); + ocrdma_pma_counters(dev, out); status |= IB_MAD_RESULT_REPLY; } diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h index 64cb82c08664..9780afcde780 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h @@ -56,12 +56,9 @@ int ocrdma_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, u32 flags, void ocrdma_destroy_ah(struct ib_ah *ah, u32 flags); int ocrdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); -int ocrdma_process_mad(struct ib_device *, - int process_mad_flags, - u8 port_num, - const struct ib_wc *in_wc, - const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, +int ocrdma_process_mad(struct ib_device *dev, int process_mad_flags, + u8 port_num, const struct ib_wc *in_wc, + const struct ib_grh *in_grh, const struct ib_mad *in, + struct ib_mad *out, size_t *out_mad_size, u16 *out_mad_pkey_index); #endif /* __OCRDMA_AH_H__ */ diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 8096b8fcab4e..fea95faa3b21 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -4346,19 +4346,10 @@ int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) } int qedr_process_mad(struct ib_device *ibdev, int process_mad_flags, - u8 port_num, - const struct ib_wc *in_wc, - const struct ib_grh *in_grh, - const struct ib_mad_hdr *mad_hdr, - size_t in_mad_size, struct ib_mad_hdr *out_mad, - size_t *out_mad_size, u16 *out_mad_pkey_index) + u8 port_num, const struct ib_wc *in_wc, + const struct ib_grh *in_grh, const struct ib_mad *in, + struct ib_mad *out_mad, size_t *out_mad_size, + u16 *out_mad_pkey_index) { - struct qedr_dev *dev = get_qedr_dev(ibdev); - - DP_DEBUG(dev, QEDR_MSG_GSI, - "QEDR_PROCESS_MAD in_mad %x %x %x %x %x %x %x %x\n", - mad_hdr->attr_id, mad_hdr->base_version, mad_hdr->attr_mod, - mad_hdr->class_specific, mad_hdr->class_version, - mad_hdr->method, mad_hdr->mgmt_class, mad_hdr->status); return IB_MAD_RESULT_SUCCESS; } diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h index 620472116c6d..18027844eb87 100644 --- a/drivers/infiniband/hw/qedr/verbs.h +++ b/drivers/infiniband/hw/qedr/verbs.h @@ -92,10 +92,9 @@ int qedr_post_recv(struct ib_qp *, const struct ib_recv_wr *, const struct ib_recv_wr **bad_wr); int qedr_process_mad(struct ib_device *ibdev, int process_mad_flags, u8 port_num, const struct ib_wc *in_wc, - const struct ib_grh *in_grh, - const struct ib_mad_hdr *in_mad, - size_t in_mad_size, struct ib_mad_hdr *out_mad, - size_t *out_mad_size, u16 *out_mad_pkey_index); + const struct ib_grh *in_grh, const struct ib_mad *in_mad, + struct ib_mad *out_mad, size_t *out_mad_size, + u16 *out_mad_pkey_index); int qedr_port_immutable(struct ib_device *ibdev, u8 port_num, struct ib_port_immutable *immutable); diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index b259aaf85d4a..79bb83222e8d 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -2386,24 +2386,21 @@ bail: */ int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, - u16 *out_mad_pkey_index) + const struct ib_mad *in, struct ib_mad *out, + size_t *out_mad_size, u16 *out_mad_pkey_index) { int ret; struct qib_ibport *ibp = to_iport(ibdev, port); struct qib_pportdata *ppd = ppd_from_ibp(ibp); - const struct ib_mad *in_mad = (const struct ib_mad *)in; - struct ib_mad *out_mad = (struct ib_mad *)out; - switch (in_mad->mad_hdr.mgmt_class) { + switch (in->mad_hdr.mgmt_class) { case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: case IB_MGMT_CLASS_SUBN_LID_ROUTED: - ret = process_subn(ibdev, mad_flags, port, in_mad, out_mad); + ret = process_subn(ibdev, mad_flags, port, in, out); goto bail; case IB_MGMT_CLASS_PERF_MGMT: - ret = process_perf(ibdev, port, in_mad, out_mad); + ret = process_perf(ibdev, port, in, out); goto bail; case IB_MGMT_CLASS_CONG_MGMT: @@ -2412,7 +2409,7 @@ int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, ret = IB_MAD_RESULT_SUCCESS; goto bail; } - ret = process_cc(ibdev, mad_flags, port, in_mad, out_mad); + ret = process_cc(ibdev, mad_flags, port, in, out); goto bail; default: diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index 17bdf8acee2f..8bf414b47b96 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -245,9 +245,8 @@ void qib_sys_guid_chg(struct qib_ibport *ibp); void qib_node_desc_chg(struct qib_ibport *ibp); int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, - u16 *out_mad_pkey_index); + const struct ib_mad *in, struct ib_mad *out, + size_t *out_mad_size, u16 *out_mad_pkey_index); void qib_notify_create_mad_agent(struct rvt_dev_info *rdi, int port_idx); void qib_notify_free_mad_agent(struct rvt_dev_info *rdi, int port_idx); -- cgit From 208d70f562e563226df178ff8f969364972e9e99 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Sun, 3 Nov 2019 16:07:23 +0200 Subject: IB/mlx5: Support flow counters offset for bulk counters Add support for flow steering counters action with a non-base counter ID (offset) for bulk counters. When creating a flow counter object, save the bulk value. This value is used when a flow action with a non-base counter ID is requested - to validate that the required offset is in the range of the allocated bulk. Link: https://lore.kernel.org/r/20191103140723.77411-1-leon@kernel.org Signed-off-by: Yevgeny Kliteynik Signed-off-by: Leon Romanovsky Reviewed-by: Mark Bloch Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/devx.c | 15 ++++++++++++++- drivers/infiniband/hw/mlx5/flow.c | 29 +++++++++++++++++++++++++++-- drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 +- 3 files changed, 42 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 6b1fca91d7d3..2a5812a4c3bb 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -100,6 +100,7 @@ struct devx_obj { struct mlx5_ib_devx_mr devx_mr; struct mlx5_core_dct core_dct; struct mlx5_core_cq core_cq; + u32 flow_counter_bulk_size; }; struct list_head event_sub; /* holds devx_event_subscription entries */ }; @@ -192,15 +193,20 @@ bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type) } } -bool mlx5_ib_devx_is_flow_counter(void *obj, u32 *counter_id) +bool mlx5_ib_devx_is_flow_counter(void *obj, u32 offset, u32 *counter_id) { struct devx_obj *devx_obj = obj; u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode); if (opcode == MLX5_CMD_OP_DEALLOC_FLOW_COUNTER) { + + if (offset && offset >= devx_obj->flow_counter_bulk_size) + return false; + *counter_id = MLX5_GET(dealloc_flow_counter_in, devx_obj->dinbox, flow_counter_id); + *counter_id += offset; return true; } @@ -1463,6 +1469,13 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( if (err) goto obj_free; + if (opcode == MLX5_CMD_OP_ALLOC_FLOW_COUNTER) { + u8 bulk = MLX5_GET(alloc_flow_counter_in, + cmd_in, + flow_counter_bulk); + obj->flow_counter_bulk_size = 128UL * bulk; + } + uobj->object = obj; INIT_LIST_HEAD(&obj->event_sub); obj->ib_dev = dev; diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c index b198ff10cde9..dbee17d22d50 100644 --- a/drivers/infiniband/hw/mlx5/flow.c +++ b/drivers/infiniband/hw/mlx5/flow.c @@ -85,6 +85,8 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata); int len, ret, i; u32 counter_id = 0; + u32 *offset_attr; + u32 offset = 0; if (!capable(CAP_NET_RAW)) return -EPERM; @@ -151,8 +153,27 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( if (len) { devx_obj = arr_flow_actions[0]->object; - if (!mlx5_ib_devx_is_flow_counter(devx_obj, &counter_id)) + if (uverbs_attr_is_valid(attrs, + MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET)) { + + int num_offsets = uverbs_attr_ptr_get_array_size( + attrs, + MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET, + sizeof(u32)); + + if (num_offsets != 1) + return -EINVAL; + + offset_attr = uverbs_attr_get_alloced_ptr( + attrs, + MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET); + offset = *offset_attr; + } + + if (!mlx5_ib_devx_is_flow_counter(devx_obj, offset, + &counter_id)) return -EINVAL; + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; } @@ -598,7 +619,11 @@ DECLARE_UVERBS_NAMED_METHOD( UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, MLX5_IB_OBJECT_DEVX_OBJ, UVERBS_ACCESS_READ, 1, 1, - UA_OPTIONAL)); + UA_OPTIONAL), + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET, + UVERBS_ATTR_MIN_SIZE(sizeof(u32)), + UA_OPTIONAL, + UA_ALLOC_AND_COPY)); DECLARE_UVERBS_NAMED_METHOD_DESTROY( MLX5_IB_METHOD_DESTROY_FLOW, diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 933a2059886b..d14fe43837e7 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1366,7 +1366,7 @@ struct mlx5_ib_flow_handler *mlx5_ib_raw_fs_rule_add( struct mlx5_flow_act *flow_act, u32 counter_id, void *cmd_in, int inlen, int dest_id, int dest_type); bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type); -bool mlx5_ib_devx_is_flow_counter(void *obj, u32 *counter_id); +bool mlx5_ib_devx_is_flow_counter(void *obj, u32 offset, u32 *counter_id); int mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root); void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction); #else -- cgit From 960657b732e1ce21b07be5ab48a7ad3913d72ba4 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Wed, 6 Nov 2019 14:23:54 +0800 Subject: RDMA/qedr: Fix potential use after free MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the release operation after error log to avoid possible use after free. Link: https://lore.kernel.org/r/1573021434-18768-1-git-send-email-bianpan2016@163.com Signed-off-by: Pan Bian Acked-by: Michal Kalderon  Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qedr/qedr_iw_cm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/qedr/qedr_iw_cm.c b/drivers/infiniband/hw/qedr/qedr_iw_cm.c index 5e9732990be5..d3c13f4b136e 100644 --- a/drivers/infiniband/hw/qedr/qedr_iw_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_iw_cm.c @@ -481,10 +481,10 @@ qedr_addr6_resolve(struct qedr_dev *dev, if ((!dst) || dst->error) { if (dst) { - dst_release(dst); DP_ERR(dev, "ip6_route_output returned dst->error = %d\n", dst->error); + dst_release(dst); } return -EINVAL; } -- cgit From da046d5f895fca18d63b15ac8faebd5bf784e23a Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Wed, 6 Nov 2019 14:44:11 +0800 Subject: RDMA/i40iw: Fix potential use after free Release variable dst after logging dst->error to avoid possible use after free. Link: https://lore.kernel.org/r/1573022651-37171-1-git-send-email-bianpan2016@163.com Signed-off-by: Pan Bian Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/i40iw/i40iw_cm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index 2d6a378e8560..bb78d3280acc 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -2079,9 +2079,9 @@ static int i40iw_addr_resolve_neigh_ipv6(struct i40iw_device *iwdev, dst = i40iw_get_dst_ipv6(&src_addr, &dst_addr); if (!dst || dst->error) { if (dst) { - dst_release(dst); i40iw_pr_err("ip6_route_output returned dst->error = %d\n", dst->error); + dst_release(dst); } return rc; } -- cgit From 7ee23491b39259ae83899dd93b2a29ef0f22f0a7 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 7 Nov 2019 08:50:25 +0530 Subject: RDMA/qib: Validate ->show()/store() callbacks before calling them The permissions of the read-only or write-only sysfs files can be changed (as root) and the user can then try to read a write-only file or write to a read-only file which will lead to kernel crash here. Protect against that by always validating the show/store callbacks. Link: https://lore.kernel.org/r/d45cc26361a174ae12dbb86c994ef334d257924b.1573096807.git.viresh.kumar@linaro.org Signed-off-by: Viresh Kumar Reviewed-by: Greg Kroah-Hartman Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qib/qib_sysfs.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c index 3926be78036e..568b21eb6ea1 100644 --- a/drivers/infiniband/hw/qib/qib_sysfs.c +++ b/drivers/infiniband/hw/qib/qib_sysfs.c @@ -301,6 +301,9 @@ static ssize_t qib_portattr_show(struct kobject *kobj, struct qib_pportdata *ppd = container_of(kobj, struct qib_pportdata, pport_kobj); + if (!pattr->show) + return -EIO; + return pattr->show(ppd, buf); } @@ -312,6 +315,9 @@ static ssize_t qib_portattr_store(struct kobject *kobj, struct qib_pportdata *ppd = container_of(kobj, struct qib_pportdata, pport_kobj); + if (!pattr->store) + return -EIO; + return pattr->store(ppd, buf, len); } -- cgit From 6296665cee88a76a875ddcd7031bb1633b4adac8 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 7 Nov 2019 22:48:55 +0000 Subject: RDMA/ocrdma: Fix spelling mistake in variable name There is a spelling mistake in the variable nak_invalid_requst_errors, rename it to nak_invalid_request_errors. Link: https://lore.kernel.org/r/20191107224855.417647-1-colin.king@canonical.com Signed-off-by: Colin Ian King Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/ocrdma/ocrdma_sli.h | 2 +- drivers/infiniband/hw/ocrdma/ocrdma_stats.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h index 6ef89c226ad8..c2e0d0fa44be 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h @@ -2034,7 +2034,7 @@ struct ocrdma_rx_stats { }; struct ocrdma_rx_qp_err_stats { - u32 nak_invalid_requst_errors; + u32 nak_invalid_request_errors; u32 nak_remote_operation_errors; u32 nak_count_remote_access_errors; u32 local_length_errors; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c index 95e8c1560cc2..5f831e3bdbad 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c @@ -423,8 +423,8 @@ static char *ocrdma_rxqp_errstats(struct ocrdma_dev *dev) memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM)); pcur = stats; - pcur += ocrdma_add_stat(stats, pcur, "nak_invalid_requst_errors", - (u64)rx_qp_err_stats->nak_invalid_requst_errors); + pcur += ocrdma_add_stat(stats, pcur, "nak_invalid_request_errors", + (u64)rx_qp_err_stats->nak_invalid_request_errors); pcur += ocrdma_add_stat(stats, pcur, "nak_remote_operation_errors", (u64)rx_qp_err_stats->nak_remote_operation_errors); pcur += ocrdma_add_stat(stats, pcur, "nak_count_remote_access_errors", -- cgit From 9a5407d74c22821f7944e2be4209bdfc5faf8143 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Sun, 10 Nov 2019 13:36:45 +0200 Subject: RDMA/qedr: Make qedr_iw_load_qp() static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function qedr_iw_load_qp() is only used in qedr_iw_cm.c Fixes: 82af6d19d8d9 ("RDMA/qedr: Fix synchronization methods and memory leaks in qedr") Link: https://lore.kernel.org/r/20191110113645.20058-1-kamalheib1@gmail.com Signed-off-by: Kamal Heib Acked-by: Michal Kalderon  Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qedr/qedr_iw_cm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/qedr/qedr_iw_cm.c b/drivers/infiniband/hw/qedr/qedr_iw_cm.c index d3c13f4b136e..792eecd206b6 100644 --- a/drivers/infiniband/hw/qedr/qedr_iw_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_iw_cm.c @@ -506,7 +506,7 @@ qedr_addr6_resolve(struct qedr_dev *dev, return rc; } -struct qedr_qp *qedr_iw_load_qp(struct qedr_dev *dev, u32 qpn) +static struct qedr_qp *qedr_iw_load_qp(struct qedr_dev *dev, u32 qpn) { struct qedr_qp *qp; -- cgit From 64c264872b8879e2ab9017eefe9514d4c045c60e Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Tue, 12 Nov 2019 11:26:08 +0200 Subject: RDMA/efa: Clear the admin command buffer prior to its submission We cannot rely on the entry memcpy as we only copy the actual size of the command, the rest of the bytes must be memset to zero. Currently providing non-zero memory will not have any user visible impact. However, since admin commands are extendable (in a backwards compatible way) everything beyond the size of the command must be cleared to prevent issues in the future. Fixes: 0420e542569b ("RDMA/efa: Implement functions that submit and complete admin commands") Link: https://lore.kernel.org/r/20191112092608.46964-1-galpress@amazon.com Reviewed-by: Daniel Kranzdorf Reviewed-by: Firas JahJah Signed-off-by: Gal Pressman Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/efa/efa_com.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/efa/efa_com.c b/drivers/infiniband/hw/efa/efa_com.c index 3c412bc5b94f..0778f4f7dccd 100644 --- a/drivers/infiniband/hw/efa/efa_com.c +++ b/drivers/infiniband/hw/efa/efa_com.c @@ -317,6 +317,7 @@ static struct efa_comp_ctx *__efa_com_submit_admin_cmd(struct efa_com_admin_queu struct efa_admin_acq_entry *comp, size_t comp_size_in_bytes) { + struct efa_admin_aq_entry *aqe; struct efa_comp_ctx *comp_ctx; u16 queue_size_mask; u16 cmd_id; @@ -350,7 +351,9 @@ static struct efa_comp_ctx *__efa_com_submit_admin_cmd(struct efa_com_admin_queu reinit_completion(&comp_ctx->wait_event); - memcpy(&aq->sq.entries[pi], cmd, cmd_size_in_bytes); + aqe = &aq->sq.entries[pi]; + memset(aqe, 0, sizeof(*aqe)); + memcpy(aqe, cmd, cmd_size_in_bytes); aq->sq.pc++; atomic64_inc(&aq->stats.submitted_cmd); -- cgit From 72b894b09a96b741c92562709f6629310f2b34a1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 Nov 2019 08:32:14 +0100 Subject: IB/umem: remove the dmasync argument to ib_umem_get The argument is always ignored, so remove it. Link: https://lore.kernel.org/r/20191113073214.9514-3-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Acked-by: Michal Kalderon Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 10 ++++----- drivers/infiniband/hw/cxgb4/mem.c | 2 +- drivers/infiniband/hw/efa/efa_verbs.c | 2 +- drivers/infiniband/hw/hns/hns_roce_cq.c | 2 +- drivers/infiniband/hw/hns/hns_roce_db.c | 2 +- drivers/infiniband/hw/hns/hns_roce_mr.c | 4 ++-- drivers/infiniband/hw/hns/hns_roce_qp.c | 2 +- drivers/infiniband/hw/hns/hns_roce_srq.c | 4 ++-- drivers/infiniband/hw/i40iw/i40iw_verbs.c | 2 +- drivers/infiniband/hw/mlx4/cq.c | 2 +- drivers/infiniband/hw/mlx4/doorbell.c | 2 +- drivers/infiniband/hw/mlx4/mr.c | 2 +- drivers/infiniband/hw/mlx4/qp.c | 5 ++--- drivers/infiniband/hw/mlx4/srq.c | 2 +- drivers/infiniband/hw/mlx5/cq.c | 4 ++-- drivers/infiniband/hw/mlx5/devx.c | 2 +- drivers/infiniband/hw/mlx5/doorbell.c | 2 +- drivers/infiniband/hw/mlx5/mr.c | 2 +- drivers/infiniband/hw/mlx5/qp.c | 4 ++-- drivers/infiniband/hw/mlx5/srq.c | 2 +- drivers/infiniband/hw/mthca/mthca_provider.c | 4 +--- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 2 +- drivers/infiniband/hw/qedr/verbs.c | 29 ++++++++++++--------------- drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c | 2 +- drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c | 2 +- drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c | 4 ++-- drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c | 2 +- 27 files changed, 49 insertions(+), 55 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 8afd7d93cfe4..9b6ca15a183c 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -837,7 +837,7 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd, bytes += (qplib_qp->sq.max_wqe * psn_sz); } bytes = PAGE_ALIGN(bytes); - umem = ib_umem_get(udata, ureq.qpsva, bytes, IB_ACCESS_LOCAL_WRITE, 1); + umem = ib_umem_get(udata, ureq.qpsva, bytes, IB_ACCESS_LOCAL_WRITE); if (IS_ERR(umem)) return PTR_ERR(umem); @@ -851,7 +851,7 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd, bytes = (qplib_qp->rq.max_wqe * BNXT_QPLIB_MAX_RQE_ENTRY_SIZE); bytes = PAGE_ALIGN(bytes); umem = ib_umem_get(udata, ureq.qprva, bytes, - IB_ACCESS_LOCAL_WRITE, 1); + IB_ACCESS_LOCAL_WRITE); if (IS_ERR(umem)) goto rqfail; qp->rumem = umem; @@ -1304,7 +1304,7 @@ static int bnxt_re_init_user_srq(struct bnxt_re_dev *rdev, bytes = (qplib_srq->max_wqe * BNXT_QPLIB_MAX_RQE_ENTRY_SIZE); bytes = PAGE_ALIGN(bytes); - umem = ib_umem_get(udata, ureq.srqva, bytes, IB_ACCESS_LOCAL_WRITE, 1); + umem = ib_umem_get(udata, ureq.srqva, bytes, IB_ACCESS_LOCAL_WRITE); if (IS_ERR(umem)) return PTR_ERR(umem); @@ -2547,7 +2547,7 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, cq->umem = ib_umem_get(udata, req.cq_va, entries * sizeof(struct cq_base), - IB_ACCESS_LOCAL_WRITE, 1); + IB_ACCESS_LOCAL_WRITE); if (IS_ERR(cq->umem)) { rc = PTR_ERR(cq->umem); goto fail; @@ -3512,7 +3512,7 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length, /* The fixed portion of the rkey is the same as the lkey */ mr->ib_mr.rkey = mr->qplib_mr.rkey; - umem = ib_umem_get(udata, start, length, mr_access_flags, 0); + umem = ib_umem_get(udata, start, length, mr_access_flags); if (IS_ERR(umem)) { dev_err(rdev_to_dev(rdev), "Failed to get umem"); rc = -EFAULT; diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 35c284af574d..fe3a7e8561df 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -543,7 +543,7 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mhp->rhp = rhp; - mhp->umem = ib_umem_get(udata, start, length, acc, 0); + mhp->umem = ib_umem_get(udata, start, length, acc); if (IS_ERR(mhp->umem)) goto err_free_skb; diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index b242ea7a3bc8..657baa63faec 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -1371,7 +1371,7 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, goto err_out; } - mr->umem = ib_umem_get(udata, start, length, access_flags, 0); + mr->umem = ib_umem_get(udata, start, length, access_flags); if (IS_ERR(mr->umem)) { err = PTR_ERR(mr->umem); ibdev_dbg(&dev->ibdev, diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index e25fa19c249c..cde2960328df 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -219,7 +219,7 @@ static int hns_roce_ib_get_cq_umem(struct hns_roce_dev *hr_dev, u32 npages; *umem = ib_umem_get(udata, buf_addr, cqe * hr_dev->caps.cq_entry_sz, - IB_ACCESS_LOCAL_WRITE, 1); + IB_ACCESS_LOCAL_WRITE); if (IS_ERR(*umem)) return PTR_ERR(*umem); diff --git a/drivers/infiniband/hw/hns/hns_roce_db.c b/drivers/infiniband/hw/hns/hns_roce_db.c index c00714c2f16a..10af6958ab69 100644 --- a/drivers/infiniband/hw/hns/hns_roce_db.c +++ b/drivers/infiniband/hw/hns/hns_roce_db.c @@ -31,7 +31,7 @@ int hns_roce_db_map_user(struct hns_roce_ucontext *context, refcount_set(&page->refcount, 1); page->user_virt = page_addr; - page->umem = ib_umem_get(udata, page_addr, PAGE_SIZE, 0, 0); + page->umem = ib_umem_get(udata, page_addr, PAGE_SIZE, 0); if (IS_ERR(page->umem)) { ret = PTR_ERR(page->umem); kfree(page); diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 6589e283cc77..9ad19170c3f9 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -1145,7 +1145,7 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (!mr) return ERR_PTR(-ENOMEM); - mr->umem = ib_umem_get(udata, start, length, access_flags, 0); + mr->umem = ib_umem_get(udata, start, length, access_flags); if (IS_ERR(mr->umem)) { ret = PTR_ERR(mr->umem); goto err_free; @@ -1230,7 +1230,7 @@ static int rereg_mr_trans(struct ib_mr *ibmr, int flags, } ib_umem_release(mr->umem); - mr->umem = ib_umem_get(udata, start, length, mr_access_flags, 0); + mr->umem = ib_umem_get(udata, start, length, mr_access_flags); if (IS_ERR(mr->umem)) { ret = PTR_ERR(mr->umem); mr->umem = NULL; diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 9442f017db02..a6565b674801 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -745,7 +745,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, } hr_qp->umem = ib_umem_get(udata, ucmd.buf_addr, - hr_qp->buff_size, 0, 0); + hr_qp->buff_size, 0); if (IS_ERR(hr_qp->umem)) { dev_err(dev, "ib_umem_get error for create qp\n"); ret = PTR_ERR(hr_qp->umem); diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index a1bfa51c67fe..0cceae0fb4af 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -186,7 +186,7 @@ static int create_user_srq(struct hns_roce_srq *srq, struct ib_udata *udata, if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) return -EFAULT; - srq->umem = ib_umem_get(udata, ucmd.buf_addr, srq_buf_size, 0, 0); + srq->umem = ib_umem_get(udata, ucmd.buf_addr, srq_buf_size, 0); if (IS_ERR(srq->umem)) return PTR_ERR(srq->umem); @@ -206,7 +206,7 @@ static int create_user_srq(struct hns_roce_srq *srq, struct ib_udata *udata, /* config index queue BA */ srq->idx_que.umem = ib_umem_get(udata, ucmd.que_addr, - srq->idx_que.buf_size, 0, 0); + srq->idx_que.buf_size, 0); if (IS_ERR(srq->idx_que.umem)) { dev_err(hr_dev->dev, "ib_umem_get error for index queue\n"); ret = PTR_ERR(srq->idx_que.umem); diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index cd9ee1664a69..86375947bc67 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -1763,7 +1763,7 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd, if (length > I40IW_MAX_MR_SIZE) return ERR_PTR(-EINVAL); - region = ib_umem_get(udata, start, length, acc, 0); + region = ib_umem_get(udata, start, length, acc); if (IS_ERR(region)) return (struct ib_mr *)region; diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index a7d238d312f0..306b21281fa2 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -145,7 +145,7 @@ static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_udata *udata, int n; *umem = ib_umem_get(udata, buf_addr, cqe * cqe_size, - IB_ACCESS_LOCAL_WRITE, 1); + IB_ACCESS_LOCAL_WRITE); if (IS_ERR(*umem)) return PTR_ERR(*umem); diff --git a/drivers/infiniband/hw/mlx4/doorbell.c b/drivers/infiniband/hw/mlx4/doorbell.c index 0f390351cef0..714f9df5bf39 100644 --- a/drivers/infiniband/hw/mlx4/doorbell.c +++ b/drivers/infiniband/hw/mlx4/doorbell.c @@ -64,7 +64,7 @@ int mlx4_ib_db_map_user(struct ib_udata *udata, unsigned long virt, page->user_virt = (virt & PAGE_MASK); page->refcnt = 0; - page->umem = ib_umem_get(udata, virt & PAGE_MASK, PAGE_SIZE, 0, 0); + page->umem = ib_umem_get(udata, virt & PAGE_MASK, PAGE_SIZE, 0); if (IS_ERR(page->umem)) { err = PTR_ERR(page->umem); kfree(page); diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 6ae503cfc526..dfa17bcdcdbc 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -398,7 +398,7 @@ static struct ib_umem *mlx4_get_umem_mr(struct ib_udata *udata, u64 start, up_read(¤t->mm->mmap_sem); } - return ib_umem_get(udata, start, length, access_flags, 0); + return ib_umem_get(udata, start, length, access_flags); } struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index bd4aa04416c6..85f57b76e446 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -916,7 +916,7 @@ static int create_rq(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) + (qp->sq.wqe_cnt << qp->sq.wqe_shift); - qp->umem = ib_umem_get(udata, wq.buf_addr, qp->buf_size, 0, 0); + qp->umem = ib_umem_get(udata, wq.buf_addr, qp->buf_size, 0); if (IS_ERR(qp->umem)) { err = PTR_ERR(qp->umem); goto err; @@ -1110,8 +1110,7 @@ static int create_qp_common(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, if (err) goto err; - qp->umem = - ib_umem_get(udata, ucmd.buf_addr, qp->buf_size, 0, 0); + qp->umem = ib_umem_get(udata, ucmd.buf_addr, qp->buf_size, 0); if (IS_ERR(qp->umem)) { err = PTR_ERR(qp->umem); goto err; diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index 848db7264cc9..8dcf6e3d9ae2 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -110,7 +110,7 @@ int mlx4_ib_create_srq(struct ib_srq *ib_srq, if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) return -EFAULT; - srq->umem = ib_umem_get(udata, ucmd.buf_addr, buf_size, 0, 0); + srq->umem = ib_umem_get(udata, ucmd.buf_addr, buf_size, 0); if (IS_ERR(srq->umem)) return PTR_ERR(srq->umem); diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index cc938d27f913..dd8d24ee8e1d 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -709,7 +709,7 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, cq->buf.umem = ib_umem_get(udata, ucmd.buf_addr, entries * ucmd.cqe_size, - IB_ACCESS_LOCAL_WRITE, 1); + IB_ACCESS_LOCAL_WRITE); if (IS_ERR(cq->buf.umem)) { err = PTR_ERR(cq->buf.umem); return err; @@ -1110,7 +1110,7 @@ static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, umem = ib_umem_get(udata, ucmd.buf_addr, (size_t)ucmd.cqe_size * entries, - IB_ACCESS_LOCAL_WRITE, 1); + IB_ACCESS_LOCAL_WRITE); if (IS_ERR(umem)) { err = PTR_ERR(umem); return err; diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 2a5812a4c3bb..9d0a18cf9e5e 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -2134,7 +2134,7 @@ static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext, if (err) return err; - obj->umem = ib_umem_get(&attrs->driver_udata, addr, size, access, 0); + obj->umem = ib_umem_get(&attrs->driver_udata, addr, size, access); if (IS_ERR(obj->umem)) return PTR_ERR(obj->umem); diff --git a/drivers/infiniband/hw/mlx5/doorbell.c b/drivers/infiniband/hw/mlx5/doorbell.c index 8f4e5f22b84c..12737c509aa2 100644 --- a/drivers/infiniband/hw/mlx5/doorbell.c +++ b/drivers/infiniband/hw/mlx5/doorbell.c @@ -64,7 +64,7 @@ int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, page->user_virt = (virt & PAGE_MASK); page->refcnt = 0; - page->umem = ib_umem_get(udata, virt & PAGE_MASK, PAGE_SIZE, 0, 0); + page->umem = ib_umem_get(udata, virt & PAGE_MASK, PAGE_SIZE, 0); if (IS_ERR(page->umem)) { err = PTR_ERR(page->umem); kfree(page); diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 077ca10d9ed9..60d39b9ec41c 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -764,7 +764,7 @@ static int mr_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata, if (order) *order = ilog2(roundup_pow_of_two(*ncont)); } else { - u = ib_umem_get(udata, start, length, access_flags, 0); + u = ib_umem_get(udata, start, length, access_flags); if (IS_ERR(u)) { mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(u)); return PTR_ERR(u); diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index c831b455ffa8..26fb0227a514 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -749,7 +749,7 @@ static int mlx5_ib_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata, { int err; - *umem = ib_umem_get(udata, addr, size, 0, 0); + *umem = ib_umem_get(udata, addr, size, 0); if (IS_ERR(*umem)) { mlx5_ib_dbg(dev, "umem_get failed\n"); return PTR_ERR(*umem); @@ -806,7 +806,7 @@ static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, if (!ucmd->buf_addr) return -EINVAL; - rwq->umem = ib_umem_get(udata, ucmd->buf_addr, rwq->buf_size, 0, 0); + rwq->umem = ib_umem_get(udata, ucmd->buf_addr, rwq->buf_size, 0); if (IS_ERR(rwq->umem)) { mlx5_ib_dbg(dev, "umem_get failed\n"); err = PTR_ERR(rwq->umem); diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 4e7fde86c96b..62939df3c692 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -80,7 +80,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, srq->wq_sig = !!(ucmd.flags & MLX5_SRQ_FLAG_SIGNATURE); - srq->umem = ib_umem_get(udata, ucmd.buf_addr, buf_size, 0, 0); + srq->umem = ib_umem_get(udata, ucmd.buf_addr, buf_size, 0); if (IS_ERR(srq->umem)) { mlx5_ib_dbg(dev, "failed umem get, size %d\n", buf_size); err = PTR_ERR(srq->umem); diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 23554d8bf241..33002530fee7 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -880,9 +880,7 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (!mr) return ERR_PTR(-ENOMEM); - mr->umem = ib_umem_get(udata, start, length, acc, - ucmd.mr_attrs & MTHCA_MR_DMASYNC); - + mr->umem = ib_umem_get(udata, start, length, acc); if (IS_ERR(mr->umem)) { err = PTR_ERR(mr->umem); goto err; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index e72050de5734..9bc1ca6f6f9e 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -869,7 +869,7 @@ struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len, mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) return ERR_PTR(status); - mr->umem = ib_umem_get(udata, start, len, acc, 0); + mr->umem = ib_umem_get(udata, start, len, acc); if (IS_ERR(mr->umem)) { status = -EFAULT; goto umem_err; diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index fea95faa3b21..55b77d753d12 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -762,7 +762,7 @@ static inline int qedr_init_user_queue(struct ib_udata *udata, struct qedr_dev *dev, struct qedr_userq *q, u64 buf_addr, size_t buf_len, bool requires_db_rec, - int access, int dmasync, + int access, int alloc_and_init) { u32 fw_pages; @@ -770,7 +770,7 @@ static inline int qedr_init_user_queue(struct ib_udata *udata, q->buf_addr = buf_addr; q->buf_len = buf_len; - q->umem = ib_umem_get(udata, q->buf_addr, q->buf_len, access, dmasync); + q->umem = ib_umem_get(udata, q->buf_addr, q->buf_len, access); if (IS_ERR(q->umem)) { DP_ERR(dev, "create user queue: failed ib_umem_get, got %ld\n", PTR_ERR(q->umem)); @@ -927,9 +927,8 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, cq->cq_type = QEDR_CQ_TYPE_USER; rc = qedr_init_user_queue(udata, dev, &cq->q, ureq.addr, - ureq.len, true, - IB_ACCESS_LOCAL_WRITE, - 1, 1); + ureq.len, true, IB_ACCESS_LOCAL_WRITE, + 1); if (rc) goto err0; @@ -1401,19 +1400,19 @@ static void qedr_free_srq_kernel_params(struct qedr_srq *srq) static int qedr_init_srq_user_params(struct ib_udata *udata, struct qedr_srq *srq, struct qedr_create_srq_ureq *ureq, - int access, int dmasync) + int access) { struct scatterlist *sg; int rc; rc = qedr_init_user_queue(udata, srq->dev, &srq->usrq, ureq->srq_addr, - ureq->srq_len, false, access, dmasync, 1); + ureq->srq_len, false, access, 1); if (rc) return rc; srq->prod_umem = ib_umem_get(udata, ureq->prod_pair_addr, - sizeof(struct rdma_srq_producers), access, dmasync); + sizeof(struct rdma_srq_producers), access); if (IS_ERR(srq->prod_umem)) { qedr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl); ib_umem_release(srq->usrq.umem); @@ -1510,7 +1509,7 @@ int qedr_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr, goto err0; } - rc = qedr_init_srq_user_params(udata, srq, &ureq, 0, 0); + rc = qedr_init_srq_user_params(udata, srq, &ureq, 0); if (rc) goto err0; @@ -1751,18 +1750,16 @@ static int qedr_create_user_qp(struct qedr_dev *dev, return rc; } - /* SQ - read access only (0), dma sync not required (0) */ + /* SQ - read access only (0) */ rc = qedr_init_user_queue(udata, dev, &qp->usq, ureq.sq_addr, - ureq.sq_len, true, 0, 0, - alloc_and_init); + ureq.sq_len, true, 0, alloc_and_init); if (rc) return rc; if (!qp->srq) { - /* RQ - read access only (0), dma sync not required (0) */ + /* RQ - read access only (0) */ rc = qedr_init_user_queue(udata, dev, &qp->urq, ureq.rq_addr, - ureq.rq_len, true, - 0, 0, alloc_and_init); + ureq.rq_len, true, 0, alloc_and_init); if (rc) return rc; } @@ -2837,7 +2834,7 @@ struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len, mr->type = QEDR_MR_USER; - mr->umem = ib_umem_get(udata, start, len, acc, 0); + mr->umem = ib_umem_get(udata, start, len, acc); if (IS_ERR(mr->umem)) { rc = -EFAULT; goto err0; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c index 7800e6930502..a26a4fd86bf4 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c @@ -136,7 +136,7 @@ int pvrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, } cq->umem = ib_umem_get(udata, ucmd.buf_addr, ucmd.buf_size, - IB_ACCESS_LOCAL_WRITE, 1); + IB_ACCESS_LOCAL_WRITE); if (IS_ERR(cq->umem)) { ret = PTR_ERR(cq->umem); goto err_cq; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c index f3a3d22ee8d7..c61e665ff261 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c @@ -126,7 +126,7 @@ struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, return ERR_PTR(-EINVAL); } - umem = ib_umem_get(udata, start, length, access_flags, 0); + umem = ib_umem_get(udata, start, length, access_flags); if (IS_ERR(umem)) { dev_warn(&dev->pdev->dev, "could not get umem for mem region\n"); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c index 22daf2389d95..f15809c28f67 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c @@ -277,7 +277,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, if (!is_srq) { /* set qp->sq.wqe_cnt, shift, buf_size.. */ qp->rumem = ib_umem_get(udata, ucmd.rbuf_addr, - ucmd.rbuf_size, 0, 0); + ucmd.rbuf_size, 0); if (IS_ERR(qp->rumem)) { ret = PTR_ERR(qp->rumem); goto err_qp; @@ -289,7 +289,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, } qp->sumem = ib_umem_get(udata, ucmd.sbuf_addr, - ucmd.sbuf_size, 0, 0); + ucmd.sbuf_size, 0); if (IS_ERR(qp->sumem)) { if (!is_srq) ib_umem_release(qp->rumem); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c index 36cdfbdbd325..98c8be71d91d 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c @@ -146,7 +146,7 @@ int pvrdma_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr, goto err_srq; } - srq->umem = ib_umem_get(udata, ucmd.buf_addr, ucmd.buf_size, 0, 0); + srq->umem = ib_umem_get(udata, ucmd.buf_addr, ucmd.buf_size, 0); if (IS_ERR(srq->umem)) { ret = PTR_ERR(srq->umem); goto err_srq; -- cgit From 9067f2f0b41d7e817fc8c5259bab1f17512b0147 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 23 Sep 2019 21:07:46 +0200 Subject: RDMA/iw_cgxb4: Fix an error handling path in 'c4iw_connect()' We should jump to fail3 in order to undo the 'xa_insert_irq()' call. Link: https://lore.kernel.org/r/20190923190746.10964-1-christophe.jaillet@wanadoo.fr Signed-off-by: Christophe JAILLET Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/cxgb4/cm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index e87fc0408470..81440eaf0a00 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -3381,7 +3381,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) if (raddr->sin_addr.s_addr == htonl(INADDR_ANY)) { err = pick_local_ipaddrs(dev, cm_id); if (err) - goto fail2; + goto fail3; } /* find a route */ @@ -3403,7 +3403,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) if (ipv6_addr_type(&raddr6->sin6_addr) == IPV6_ADDR_ANY) { err = pick_local_ip6addrs(dev, cm_id); if (err) - goto fail2; + goto fail3; } /* find a route */ -- cgit From ff3195b3ed85d0068fb9a80eaa2a1471a04df76a Mon Sep 17 00:00:00 2001 From: Danit Goldberg Date: Fri, 15 Nov 2019 17:44:57 +0200 Subject: IB/mlx4: Update HW GID table while adding vlan GID When adding a new GID compare the vlan along with the GID and type. This allows vlan's to have GIDs that alias each other, such as the default GID. Otherwise they the GID cache view can become inconsistent with the HW view. Link: https://lore.kernel.org/r/20191115154457.247763-1-leon@kernel.org Signed-off-by: Danit Goldberg Reviewed-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx4/main.c | 9 ++++++++- drivers/infiniband/hw/mlx4/mlx4_ib.h | 1 + 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index f89b129b7e3a..0b5dc1d5928f 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -256,6 +256,8 @@ static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context) int hw_update = 0; int i; struct gid_entry *gids = NULL; + u16 vlan_id = 0xffff; + u8 mac[ETH_ALEN]; if (!rdma_cap_roce_gid_table(attr->device, attr->port_num)) return -EINVAL; @@ -266,12 +268,16 @@ static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context) if (!context) return -EINVAL; + ret = rdma_read_gid_l2_fields(attr, &vlan_id, &mac[0]); + if (ret) + return ret; port_gid_table = &iboe->gids[attr->port_num - 1]; spin_lock_bh(&iboe->lock); for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) { if (!memcmp(&port_gid_table->gids[i].gid, &attr->gid, sizeof(attr->gid)) && - port_gid_table->gids[i].gid_type == attr->gid_type) { + port_gid_table->gids[i].gid_type == attr->gid_type && + port_gid_table->gids[i].vlan_id == vlan_id) { found = i; break; } @@ -291,6 +297,7 @@ static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context) memcpy(&port_gid_table->gids[free].gid, &attr->gid, sizeof(attr->gid)); port_gid_table->gids[free].gid_type = attr->gid_type; + port_gid_table->gids[free].vlan_id = vlan_id; port_gid_table->gids[free].ctx->real_index = free; port_gid_table->gids[free].ctx->refcount = 1; hw_update = 1; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 0d846fea8fdc..d188573187fa 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -508,6 +508,7 @@ struct gid_entry { union ib_gid gid; enum ib_gid_type gid_type; struct gid_cache_context *ctx; + u16 vlan_id; }; struct mlx4_port_gid_table { -- cgit From c16339b69c0d58f456154b43fc396b1f1c3dc055 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Fri, 15 Nov 2019 17:45:55 +0200 Subject: IB/mlx5: Support extended number of strides for Striding RQ Extends the minimum single WQE strides from 64 to 8, which is exposed by the "min_single_wqe_log_num_of_strides" field of striding_rq_caps. Choose right number of strides based on FW capability. Link: https://lore.kernel.org/r/20191115154555.247856-1-leon@kernel.org Signed-off-by: Mark Zhang Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 10 ++++++-- drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 + drivers/infiniband/hw/mlx5/qp.c | 44 ++++++++++++++++++++++++++++-------- 3 files changed, 43 insertions(+), 12 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 2a510de4ae0f..4ced72185172 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1149,8 +1149,14 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES; resp.striding_rq_caps.max_single_stride_log_num_of_bytes = MLX5_MAX_SINGLE_STRIDE_LOG_NUM_BYTES; - resp.striding_rq_caps.min_single_wqe_log_num_of_strides = - MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES; + if (MLX5_CAP_GEN(dev->mdev, ext_stride_num_range)) + resp.striding_rq_caps + .min_single_wqe_log_num_of_strides = + MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES; + else + resp.striding_rq_caps + .min_single_wqe_log_num_of_strides = + MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES; resp.striding_rq_caps.max_single_wqe_log_num_of_strides = MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES; resp.striding_rq_caps.supported_qpts = diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index d14fe43837e7..5e826eb7e4fe 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -291,6 +291,7 @@ enum mlx5_ib_wq_flags { #define MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES 16 #define MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES 6 #define MLX5_MAX_SINGLE_STRIDE_LOG_NUM_BYTES 13 +#define MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES 3 struct mlx5_ib_rwq { struct ib_wq ibwq; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 26fb0227a514..82814c6d15a3 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -5957,12 +5957,21 @@ static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd, } MLX5_SET(wq, wq, log_wq_stride, rwq->log_rq_stride); if (rwq->create_flags & MLX5_IB_WQ_FLAGS_STRIDING_RQ) { + /* + * In Firmware number of strides in each WQE is: + * "512 * 2^single_wqe_log_num_of_strides" + * Values 3 to 8 are accepted as 10 to 15, 9 to 18 are + * accepted as 0 to 9 + */ + static const u8 fw_map[] = { 10, 11, 12, 13, 14, 15, 0, 1, + 2, 3, 4, 5, 6, 7, 8, 9 }; MLX5_SET(wq, wq, two_byte_shift_en, rwq->two_byte_shift_en); MLX5_SET(wq, wq, log_wqe_stride_size, rwq->single_stride_log_num_of_bytes - MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES); - MLX5_SET(wq, wq, log_wqe_num_of_strides, rwq->log_num_strides - - MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES); + MLX5_SET(wq, wq, log_wqe_num_of_strides, + fw_map[rwq->log_num_strides - + MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES]); } MLX5_SET(wq, wq, log_wq_sz, rwq->log_rq_size); MLX5_SET(wq, wq, pd, to_mpd(pd)->pdn); @@ -6037,6 +6046,19 @@ static int set_user_rq_size(struct mlx5_ib_dev *dev, return 0; } +static bool log_of_strides_valid(struct mlx5_ib_dev *dev, u32 log_num_strides) +{ + if ((log_num_strides > MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES) || + (log_num_strides < MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES)) + return false; + + if (!MLX5_CAP_GEN(dev->mdev, ext_stride_num_range) && + (log_num_strides < MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES)) + return false; + + return true; +} + static int prepare_user_rq(struct ib_pd *pd, struct ib_wq_init_attr *init_attr, struct ib_udata *udata, @@ -6084,14 +6106,16 @@ static int prepare_user_rq(struct ib_pd *pd, MLX5_MAX_SINGLE_STRIDE_LOG_NUM_BYTES); return -EINVAL; } - if ((ucmd.single_wqe_log_num_of_strides > - MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES) || - (ucmd.single_wqe_log_num_of_strides < - MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES)) { - mlx5_ib_dbg(dev, "Invalid log num strides (%u. Range is %u - %u)\n", - ucmd.single_wqe_log_num_of_strides, - MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES, - MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES); + if (!log_of_strides_valid(dev, + ucmd.single_wqe_log_num_of_strides)) { + mlx5_ib_dbg( + dev, + "Invalid log num strides (%u. Range is %u - %u)\n", + ucmd.single_wqe_log_num_of_strides, + MLX5_CAP_GEN(dev->mdev, ext_stride_num_range) ? + MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES : + MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES, + MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES); return -EINVAL; } rwq->single_stride_log_num_of_bytes = -- cgit From a25984f3baaa97079ce3e75c5a1d362155915a31 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Mon, 18 Nov 2019 17:06:45 +0200 Subject: RDMA/qedr: Fix null-pointer dereference when calling rdma_user_mmap_get_offset When running against rdma-core that doesn't support doorbell recovery, the rdma_user_mmap_entry won't be allocated for doorbell recovery related mappings. We have a flag indicating whether rdma-core supports doorbell recovery or not which was used during initialization, however some cases didn't check that the rdma_user_mmap_entry exists before attempting to acquire it's offset. Fixes: 97f612509294 ("RDMA/qedr: Add doorbell overflow recovery support") Link: https://lore.kernel.org/r/20191118150645.26602-1-michal.kalderon@marvell.com Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qedr/verbs.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 55b77d753d12..4cd292966aa9 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -683,7 +683,9 @@ static int qedr_copy_cq_uresp(struct qedr_dev *dev, uresp.db_offset = db_offset; uresp.icid = cq->icid; - uresp.db_rec_addr = rdma_user_mmap_get_offset(cq->q.db_mmap_entry); + if (cq->q.db_mmap_entry) + uresp.db_rec_addr = + rdma_user_mmap_get_offset(cq->q.db_mmap_entry); rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp)); if (rc) @@ -1012,7 +1014,7 @@ err1: if (udata) { qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl); ib_umem_release(cq->q.umem); - if (ctx) + if (cq->q.db_mmap_entry) rdma_user_mmap_entry_remove(cq->q.db_mmap_entry); } else { dev->ops->common->chain_free(dev->cdev, &cq->pbl); @@ -1245,7 +1247,9 @@ static void qedr_copy_rq_uresp(struct qedr_dev *dev, } uresp->rq_icid = qp->icid; - uresp->rq_db_rec_addr = rdma_user_mmap_get_offset(qp->urq.db_mmap_entry); + if (qp->urq.db_mmap_entry) + uresp->rq_db_rec_addr = + rdma_user_mmap_get_offset(qp->urq.db_mmap_entry); } static void qedr_copy_sq_uresp(struct qedr_dev *dev, @@ -1260,8 +1264,9 @@ static void qedr_copy_sq_uresp(struct qedr_dev *dev, else uresp->sq_icid = qp->icid + 1; - uresp->sq_db_rec_addr = - rdma_user_mmap_get_offset(qp->usq.db_mmap_entry); + if (qp->usq.db_mmap_entry) + uresp->sq_db_rec_addr = + rdma_user_mmap_get_offset(qp->usq.db_mmap_entry); } static int qedr_copy_qp_uresp(struct qedr_dev *dev, -- cgit From 9c0015ef092879f61129cdffbbe22fd30201d39b Mon Sep 17 00:00:00 2001 From: Danit Goldberg Date: Wed, 6 Nov 2019 15:18:12 +0200 Subject: IB/mlx5: Implement callbacks for getting VFs GUID attributes Implement the IB defined callback mlx5_ib_get_vf_guid used to query FW for VFs attributes and return node and port GUIDs. Signed-off-by: Danit Goldberg Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/ib_virt.c | 24 ++++++++++++++++++++++++ drivers/infiniband/hw/mlx5/main.c | 1 + drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 +++ 3 files changed, 28 insertions(+) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/mlx5/ib_virt.c b/drivers/infiniband/hw/mlx5/ib_virt.c index 649a3364f838..4f0edd4832bd 100644 --- a/drivers/infiniband/hw/mlx5/ib_virt.c +++ b/drivers/infiniband/hw/mlx5/ib_virt.c @@ -201,3 +201,27 @@ int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u8 port, return -EINVAL; } + +int mlx5_ib_get_vf_guid(struct ib_device *device, int vf, u8 port, + struct ifla_vf_guid *node_guid, + struct ifla_vf_guid *port_guid) +{ + struct mlx5_ib_dev *dev = to_mdev(device); + struct mlx5_core_dev *mdev = dev->mdev; + struct mlx5_hca_vport_context *rep; + int err; + + rep = kzalloc(sizeof(*rep), GFP_KERNEL); + if (!rep) + return -ENOMEM; + + err = mlx5_query_hca_vport_context(mdev, 1, 1, vf+1, rep); + if (err) + goto ex; + + port_guid->guid = rep->port_guid; + node_guid->guid = rep->node_guid; +ex: + kfree(rep); + return err; +} diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 46ea4f0b9b51..b0d7e2d2991e 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -6310,6 +6310,7 @@ static const struct ib_device_ops mlx5_ib_dev_ipoib_enhanced_ops = { static const struct ib_device_ops mlx5_ib_dev_sriov_ops = { .get_vf_config = mlx5_ib_get_vf_config, + .get_vf_guid = mlx5_ib_get_vf_guid, .get_vf_stats = mlx5_ib_get_vf_stats, .set_vf_guid = mlx5_ib_set_vf_guid, .set_vf_link_state = mlx5_ib_set_vf_link_state, diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 2ceaef3ea3fb..96811f6ca15a 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1301,6 +1301,9 @@ int mlx5_ib_set_vf_link_state(struct ib_device *device, int vf, u8 port, int state); int mlx5_ib_get_vf_stats(struct ib_device *device, int vf, u8 port, struct ifla_vf_stats *stats); +int mlx5_ib_get_vf_guid(struct ib_device *device, int vf, u8 port, + struct ifla_vf_guid *node_guid, + struct ifla_vf_guid *port_guid); int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid, int type); -- cgit From 6e419e35e68ad7327a383cd52053071b8eb9843b Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 20 Nov 2019 21:41:38 +0800 Subject: RDMA/bnxt_re: Fix Kconfig indentation Adjust indentation from spaces to tab (+optional two spaces) as in coding style with command like: $ sed -e 's/^ /\t/' -i */Kconfig Link: https://lore.kernel.org/r/20191120134138.15245-1-krzk@kernel.org Signed-off-by: Krzysztof Kozlowski Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/Kconfig | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/bnxt_re/Kconfig b/drivers/infiniband/hw/bnxt_re/Kconfig index ab8779d23382..b83f1cc38c52 100644 --- a/drivers/infiniband/hw/bnxt_re/Kconfig +++ b/drivers/infiniband/hw/bnxt_re/Kconfig @@ -1,11 +1,11 @@ # SPDX-License-Identifier: GPL-2.0-only config INFINIBAND_BNXT_RE - tristate "Broadcom Netxtreme HCA support" - depends on 64BIT - depends on ETHERNET && NETDEVICES && PCI && INET && DCB - select NET_VENDOR_BROADCOM - select BNXT - ---help--- + tristate "Broadcom Netxtreme HCA support" + depends on 64BIT + depends on ETHERNET && NETDEVICES && PCI && INET && DCB + select NET_VENDOR_BROADCOM + select BNXT + ---help--- This driver supports Broadcom NetXtreme-E 10/25/40/50 gigabit RoCE HCAs. To compile this driver as a module, choose M here: the module will be called bnxt_re. -- cgit From e284b159c6881c8bec9713daba2653268f4c4948 Mon Sep 17 00:00:00 2001 From: Luke Starrett Date: Thu, 21 Nov 2019 01:22:21 -0500 Subject: RDMA/bnxt_re: Fix chip number validation Broadcom's Gen P5 series In the first version of Gen P5 ASIC, chip-id was always set to 0x1750 for all adaptor port configurations. This has been fixed in the new chip rev. Due to this missing fix users are not able to use adaptors based on latest chip rev of Broadcom's Gen P5 adaptors. Fixes: ae8637e13185 ("RDMA/bnxt_re: Add chip context to identify 57500 series") Link: https://lore.kernel.org/r/1574317343-23300-2-git-send-email-devesh.sharma@broadcom.com Signed-off-by: Naresh Kumar PBS Signed-off-by: Selvin Xavier Signed-off-by: Luke Starrett Signed-off-by: Devesh Sharma Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/qplib_res.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index fbda11a7ab1a..aaa76d792185 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -186,7 +186,9 @@ struct bnxt_qplib_chip_ctx { u8 chip_metal; }; -#define CHIP_NUM_57500 0x1750 +#define CHIP_NUM_57508 0x1750 +#define CHIP_NUM_57504 0x1751 +#define CHIP_NUM_57502 0x1752 struct bnxt_qplib_res { struct pci_dev *pdev; @@ -203,7 +205,9 @@ struct bnxt_qplib_res { static inline bool bnxt_qplib_is_chip_gen_p5(struct bnxt_qplib_chip_ctx *cctx) { - return (cctx->chip_num == CHIP_NUM_57500); + return (cctx->chip_num == CHIP_NUM_57508 || + cctx->chip_num == CHIP_NUM_57504 || + cctx->chip_num == CHIP_NUM_57502); } static inline u8 bnxt_qplib_get_hwq_type(struct bnxt_qplib_res *res) -- cgit From 98998ffe5216c7fa2c0225bb5b049ca5cdf8d195 Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Thu, 21 Nov 2019 01:22:22 -0500 Subject: RDMA/bnxt_re: Fix stat push into dma buffer on gen p5 devices Due to recent advances in the firmware for Broadcom's gen p5 series of adaptors the driver code to report hardware counters has been broken w.r.t. roce devices. The new firmware command expects dma length to be specified during stat dma buffer allocation. Fixes: 2792b5b95ed5 ("bnxt_en: Update firmware interface spec. to 1.10.0.89.") Link: https://lore.kernel.org/r/1574317343-23300-3-git-send-email-devesh.sharma@broadcom.com Signed-off-by: Devesh Sharma Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/main.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index d6785b8339d2..a2ac88e01a99 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -494,6 +494,7 @@ static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev, bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_STAT_CTX_ALLOC, -1, -1); req.update_period_ms = cpu_to_le32(1000); req.stats_dma_addr = cpu_to_le64(dma_map); + req.stats_dma_length = cpu_to_le16(sizeof(struct ctx_hw_stats_ext)); req.stat_ctx_flags = STAT_CTX_ALLOC_REQ_STAT_CTX_FLAGS_ROCE; bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); -- cgit From fca5b9dc0986aa49b3f0a7cfe24b6c82422ac1d7 Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Thu, 21 Nov 2019 01:22:23 -0500 Subject: RDMA/bnxt_re: Fix missing le16_to_cpu From sparse: drivers/infiniband/hw/bnxt_re/main.c:1274:18: warning: cast from restricted __le16 drivers/infiniband/hw/bnxt_re/main.c:1275:18: warning: cast from restricted __le16 drivers/infiniband/hw/bnxt_re/main.c:1276:18: warning: cast from restricted __le16 drivers/infiniband/hw/bnxt_re/main.c:1277:21: warning: restricted __le16 degrades to integer Fixes: 2b827ea1926b ("RDMA/bnxt_re: Query HWRM Interface version from FW") Link: https://lore.kernel.org/r/1574317343-23300-4-git-send-email-devesh.sharma@broadcom.com Signed-off-by: Devesh Sharma Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index a2ac88e01a99..e7e8a0f49464 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -1291,10 +1291,10 @@ static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev) return; } rdev->qplib_ctx.hwrm_intf_ver = - (u64)resp.hwrm_intf_major << 48 | - (u64)resp.hwrm_intf_minor << 32 | - (u64)resp.hwrm_intf_build << 16 | - resp.hwrm_intf_patch; + (u64)le16_to_cpu(resp.hwrm_intf_major) << 48 | + (u64)le16_to_cpu(resp.hwrm_intf_minor) << 32 | + (u64)le16_to_cpu(resp.hwrm_intf_build) << 16 | + le16_to_cpu(resp.hwrm_intf_patch); } static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev) -- cgit From 25d24f4241f7bccdc1978daa026d8847e04a73a6 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 22 Nov 2019 15:48:14 +0000 Subject: IB/hfi1: remove redundant assignment to variable ret The variable ret is being initialized with a value that is never read and it is being updated later with a new value. The initialization is redundant and can be removed. Link: https://lore.kernel.org/r/20191122154814.87257-1-colin.king@canonical.com Addresses-Coverity: ("Unused value") Signed-off-by: Colin Ian King Acked-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/platform.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c index cbf7faa5038c..36593f2efe26 100644 --- a/drivers/infiniband/hw/hfi1/platform.c +++ b/drivers/infiniband/hw/hfi1/platform.c @@ -634,7 +634,7 @@ static void apply_tx_lanes(struct hfi1_pportdata *ppd, u8 field_id, u32 config_data, const char *message) { u8 i; - int ret = HCMD_SUCCESS; + int ret; for (i = 0; i < 4; i++) { ret = load_8051_config(ppd->dd, field_id, i, config_data); -- cgit From bcf7cc534cd40f8098850274e8459fb0d088fcda Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Thu, 21 Nov 2019 16:15:07 +0200 Subject: RDMA/efa: Store network attributes in device attributes There's no reason to separate the network attributes from all other device attributes. Embed the fields inside the device attributes and query them all in one function. Link: https://lore.kernel.org/r/20191121141509.59297-2-galpress@amazon.com Reviewed-by: Daniel Kranzdorf Reviewed-by: Yossi Leybovich Signed-off-by: Gal Pressman Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/efa/efa.h | 2 -- drivers/infiniband/hw/efa/efa_com_cmd.c | 34 ++++++++++++--------------------- drivers/infiniband/hw/efa/efa_com_cmd.h | 9 ++------- drivers/infiniband/hw/efa/efa_main.c | 16 ---------------- drivers/infiniband/hw/efa/efa_verbs.c | 8 ++++---- 5 files changed, 18 insertions(+), 51 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/efa/efa.h b/drivers/infiniband/hw/efa/efa.h index 2bda07078b97..aa7396a1588a 100644 --- a/drivers/infiniband/hw/efa/efa.h +++ b/drivers/infiniband/hw/efa/efa.h @@ -60,8 +60,6 @@ struct efa_dev { u64 mem_bar_len; u64 db_bar_addr; u64 db_bar_len; - u8 addr[EFA_GID_SIZE]; - u32 mtu; int admin_msix_vector_idx; struct efa_irq admin_irq; diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.c b/drivers/infiniband/hw/efa/efa_com_cmd.c index c079f1332082..4713c2756ad3 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.c +++ b/drivers/infiniband/hw/efa/efa_com_cmd.c @@ -423,28 +423,6 @@ static int efa_com_get_feature(struct efa_com_dev *edev, return efa_com_get_feature_ex(edev, get_resp, feature_id, 0, 0); } -int efa_com_get_network_attr(struct efa_com_dev *edev, - struct efa_com_get_network_attr_result *result) -{ - struct efa_admin_get_feature_resp resp; - int err; - - err = efa_com_get_feature(edev, &resp, - EFA_ADMIN_NETWORK_ATTR); - if (err) { - ibdev_err_ratelimited(edev->efa_dev, - "Failed to get network attributes %d\n", - err); - return err; - } - - memcpy(result->addr, resp.u.network_attr.addr, - sizeof(resp.u.network_attr.addr)); - result->mtu = resp.u.network_attr.mtu; - - return 0; -} - int efa_com_get_device_attr(struct efa_com_dev *edev, struct efa_com_get_device_attr_result *result) { @@ -501,6 +479,18 @@ int efa_com_get_device_attr(struct efa_com_dev *edev, result->max_llq_size = resp.u.queue_attr.max_llq_size; result->sub_cqs_per_cq = resp.u.queue_attr.sub_cqs_per_cq; + err = efa_com_get_feature(edev, &resp, EFA_ADMIN_NETWORK_ATTR); + if (err) { + ibdev_err_ratelimited(edev->efa_dev, + "Failed to get network attributes %d\n", + err); + return err; + } + + memcpy(result->addr, resp.u.network_attr.addr, + sizeof(resp.u.network_attr.addr)); + result->mtu = resp.u.network_attr.mtu; + return 0; } diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.h b/drivers/infiniband/hw/efa/efa_com_cmd.h index 7f6c13052f49..6134d13ecc6f 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.h +++ b/drivers/infiniband/hw/efa/efa_com_cmd.h @@ -100,14 +100,11 @@ struct efa_com_destroy_ah_params { u16 pdn; }; -struct efa_com_get_network_attr_result { - u8 addr[EFA_GID_SIZE]; - u32 mtu; -}; - struct efa_com_get_device_attr_result { + u8 addr[EFA_GID_SIZE]; u64 page_size_cap; u64 max_mr_pages; + u32 mtu; u32 fw_version; u32 admin_api_version; u32 device_version; @@ -271,8 +268,6 @@ int efa_com_create_ah(struct efa_com_dev *edev, struct efa_com_create_ah_result *result); int efa_com_destroy_ah(struct efa_com_dev *edev, struct efa_com_destroy_ah_params *params); -int efa_com_get_network_attr(struct efa_com_dev *edev, - struct efa_com_get_network_attr_result *result); int efa_com_get_device_attr(struct efa_com_dev *edev, struct efa_com_get_device_attr_result *result); int efa_com_get_hw_hints(struct efa_com_dev *edev, diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c index 0e3050d01b75..faf3ff1bca2a 100644 --- a/drivers/infiniband/hw/efa/efa_main.c +++ b/drivers/infiniband/hw/efa/efa_main.c @@ -30,15 +30,6 @@ MODULE_DEVICE_TABLE(pci, efa_pci_tbl); (BIT(EFA_ADMIN_FATAL_ERROR) | BIT(EFA_ADMIN_WARNING) | \ BIT(EFA_ADMIN_NOTIFICATION) | BIT(EFA_ADMIN_KEEP_ALIVE)) -static void efa_update_network_attr(struct efa_dev *dev, - struct efa_com_get_network_attr_result *network_attr) -{ - memcpy(dev->addr, network_attr->addr, sizeof(network_attr->addr)); - dev->mtu = network_attr->mtu; - - dev_dbg(&dev->pdev->dev, "Full address %pI6\n", dev->addr); -} - /* This handler will called for unknown event group or unimplemented handlers */ static void unimplemented_aenq_handler(void *data, struct efa_admin_aenq_entry *aenq_e) @@ -234,7 +225,6 @@ static const struct ib_device_ops efa_dev_ops = { static int efa_ib_device_add(struct efa_dev *dev) { - struct efa_com_get_network_attr_result network_attr; struct efa_com_get_hw_hints_result hw_hints; struct pci_dev *pdev = dev->pdev; int err; @@ -250,12 +240,6 @@ static int efa_ib_device_add(struct efa_dev *dev) if (err) return err; - err = efa_com_get_network_attr(&dev->edev, &network_attr); - if (err) - goto err_release_doorbell_bar; - - efa_update_network_attr(dev, &network_attr); - err = efa_com_get_hw_hints(&dev->edev, &hw_hints); if (err) goto err_release_doorbell_bar; diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index 657baa63faec..e1f1c1495da1 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -232,9 +232,9 @@ int efa_query_port(struct ib_device *ibdev, u8 port, props->pkey_tbl_len = 1; props->active_speed = IB_SPEED_EDR; props->active_width = IB_WIDTH_4X; - props->max_mtu = ib_mtu_int_to_enum(dev->mtu); - props->active_mtu = ib_mtu_int_to_enum(dev->mtu); - props->max_msg_sz = dev->mtu; + props->max_mtu = ib_mtu_int_to_enum(dev->dev_attr.mtu); + props->active_mtu = ib_mtu_int_to_enum(dev->dev_attr.mtu); + props->max_msg_sz = dev->dev_attr.mtu; props->max_vl_num = 1; return 0; @@ -295,7 +295,7 @@ int efa_query_gid(struct ib_device *ibdev, u8 port, int index, { struct efa_dev *dev = to_edev(ibdev); - memcpy(gid->raw, dev->addr, sizeof(dev->addr)); + memcpy(gid->raw, dev->dev_attr.addr, sizeof(dev->dev_attr.addr)); return 0; } -- cgit From e6c4f3ff434c8b336662a2053d48ce677c9fd608 Mon Sep 17 00:00:00 2001 From: Daniel Kranzdorf Date: Thu, 21 Nov 2019 16:15:08 +0200 Subject: RDMA/efa: Support remote read access in MR registration Enable remote read access for memory regions in order to support RDMA operations. Link: https://lore.kernel.org/r/20191121141509.59297-3-galpress@amazon.com Signed-off-by: Daniel Kranzdorf Reviewed-by: Yossi Leybovich Signed-off-by: Gal Pressman Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/efa/efa_admin_cmds_defs.h | 12 +++++++++--- drivers/infiniband/hw/efa/efa_com_cmd.c | 3 +-- drivers/infiniband/hw/efa/efa_com_cmd.h | 7 +------ drivers/infiniband/hw/efa/efa_verbs.c | 5 +++-- 4 files changed, 14 insertions(+), 13 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h index 2be0469d545f..7fa9d532db61 100644 --- a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h +++ b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h @@ -362,9 +362,13 @@ struct efa_admin_reg_mr_cmd { /* * permissions - * 0 : local_write_enable - Write permissions: value - * of 1 needed for RQ buffers and for RDMA write - * 7:1 : reserved1 - remote access flags, etc + * 0 : local_write_enable - Local write permissions: + * must be set for RQ buffers and buffers posted for + * RDMA Read requests + * 1 : reserved1 - MBZ + * 2 : remote_read_enable - Remote read permissions: + * must be set to enable RDMA read from the region + * 7:3 : reserved2 - MBZ */ u8 permissions; @@ -780,6 +784,8 @@ struct efa_admin_mmio_req_read_less_resp { #define EFA_ADMIN_REG_MR_CMD_MEM_ADDR_PHY_MODE_EN_SHIFT 7 #define EFA_ADMIN_REG_MR_CMD_MEM_ADDR_PHY_MODE_EN_MASK BIT(7) #define EFA_ADMIN_REG_MR_CMD_LOCAL_WRITE_ENABLE_MASK BIT(0) +#define EFA_ADMIN_REG_MR_CMD_REMOTE_READ_ENABLE_SHIFT 2 +#define EFA_ADMIN_REG_MR_CMD_REMOTE_READ_ENABLE_MASK BIT(2) /* create_cq_cmd */ #define EFA_ADMIN_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED_SHIFT 5 diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.c b/drivers/infiniband/hw/efa/efa_com_cmd.c index 4713c2756ad3..520c9d920f9e 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.c +++ b/drivers/infiniband/hw/efa/efa_com_cmd.c @@ -230,8 +230,7 @@ int efa_com_register_mr(struct efa_com_dev *edev, mr_cmd.flags |= params->page_shift & EFA_ADMIN_REG_MR_CMD_PHYS_PAGE_SIZE_SHIFT_MASK; mr_cmd.iova = params->iova; - mr_cmd.permissions |= params->permissions & - EFA_ADMIN_REG_MR_CMD_LOCAL_WRITE_ENABLE_MASK; + mr_cmd.permissions = params->permissions; if (params->inline_pbl) { memcpy(mr_cmd.pbl.inline_pbl_array, diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.h b/drivers/infiniband/hw/efa/efa_com_cmd.h index 6134d13ecc6f..d119186c41d0 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.h +++ b/drivers/infiniband/hw/efa/efa_com_cmd.h @@ -178,12 +178,7 @@ struct efa_com_reg_mr_params { * address mapping */ u8 page_shift; - /* - * permissions - * 0: local_write_enable - Write permissions: value of 1 needed - * for RQ buffers and for RDMA write:1: reserved1 - remote - * access flags, etc - */ + /* see permissions field of struct efa_admin_reg_mr_cmd */ u8 permissions; u8 inline_pbl; u8 indirect; diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index e1f1c1495da1..9701f8e52c71 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -70,7 +70,8 @@ static const char *const efa_stats_names[] = { #define EFA_CHUNK_USED_SIZE \ ((EFA_PTRS_PER_CHUNK * EFA_CHUNK_PAYLOAD_PTR_SIZE) + EFA_CHUNK_PTR_SIZE) -#define EFA_SUPPORTED_ACCESS_FLAGS IB_ACCESS_LOCAL_WRITE +#define EFA_SUPPORTED_ACCESS_FLAGS \ + (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ) struct pbl_chunk { dma_addr_t dma_addr; @@ -1382,7 +1383,7 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, params.pd = to_epd(ibpd)->pdn; params.iova = virt_addr; params.mr_length_in_bytes = length; - params.permissions = access_flags & 0x1; + params.permissions = access_flags; pg_sz = ib_umem_find_best_pgsz(mr->umem, dev->dev_attr.page_size_cap, -- cgit From 666e8ff535d401eb286fa20446e55ae984d91049 Mon Sep 17 00:00:00 2001 From: Daniel Kranzdorf Date: Thu, 21 Nov 2019 16:15:09 +0200 Subject: RDMA/efa: Expose RDMA read related attributes Query the device attributes for RDMA operations, including maximum transfer size and maximum number of SGEs per RDMA WR, and report them back to the userspace library. Link: https://lore.kernel.org/r/20191121141509.59297-4-galpress@amazon.com Signed-off-by: Daniel Kranzdorf Reviewed-by: Yossi Leybovich Signed-off-by: Gal Pressman Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/efa/efa_admin_cmds_defs.h | 17 +++++++++++++++++ drivers/infiniband/hw/efa/efa_com_cmd.c | 3 +++ drivers/infiniband/hw/efa/efa_com_cmd.h | 3 +++ drivers/infiniband/hw/efa/efa_verbs.c | 22 +++++++++++++++++----- 4 files changed, 40 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h index 7fa9d532db61..e96bcb16bd2b 100644 --- a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h +++ b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h @@ -562,6 +562,16 @@ struct efa_admin_feature_device_attr_desc { /* Indicates how many bits are used virtual address access */ u8 virt_addr_width; + + /* + * 0 : rdma_read - If set, RDMA Read is supported on + * TX queues + * 31:1 : reserved - MBZ + */ + u32 device_caps; + + /* Max RDMA transfer size in bytes */ + u32 max_rdma_size; }; struct efa_admin_feature_queue_attr_desc { @@ -608,6 +618,9 @@ struct efa_admin_feature_queue_attr_desc { /* The maximum size of LLQ in bytes */ u32 max_llq_size; + + /* Maximum number of SGEs for a single RDMA read WQE */ + u16 max_wr_rdma_sges; }; struct efa_admin_feature_aenq_desc { @@ -622,6 +635,7 @@ struct efa_admin_feature_network_attr_desc { /* Raw address data in network byte order */ u8 addr[16]; + /* max packet payload size in bytes */ u32 mtu; }; @@ -797,4 +811,7 @@ struct efa_admin_mmio_req_read_less_resp { /* get_set_feature_common_desc */ #define EFA_ADMIN_GET_SET_FEATURE_COMMON_DESC_SELECT_MASK GENMASK(1, 0) +/* feature_device_attr_desc */ +#define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RDMA_READ_MASK BIT(0) + #endif /* _EFA_ADMIN_CMDS_H_ */ diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.c b/drivers/infiniband/hw/efa/efa_com_cmd.c index 520c9d920f9e..e20bd84a1014 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.c +++ b/drivers/infiniband/hw/efa/efa_com_cmd.c @@ -444,6 +444,8 @@ int efa_com_get_device_attr(struct efa_com_dev *edev, result->phys_addr_width = resp.u.device_attr.phys_addr_width; result->virt_addr_width = resp.u.device_attr.virt_addr_width; result->db_bar = resp.u.device_attr.db_bar; + result->max_rdma_size = resp.u.device_attr.max_rdma_size; + result->device_caps = resp.u.device_attr.device_caps; if (result->admin_api_version < 1) { ibdev_err_ratelimited( @@ -477,6 +479,7 @@ int efa_com_get_device_attr(struct efa_com_dev *edev, result->max_ah = resp.u.queue_attr.max_ah; result->max_llq_size = resp.u.queue_attr.max_llq_size; result->sub_cqs_per_cq = resp.u.queue_attr.sub_cqs_per_cq; + result->max_wr_rdma_sge = resp.u.queue_attr.max_wr_rdma_sges; err = efa_com_get_feature(edev, &resp, EFA_ADMIN_NETWORK_ATTR); if (err) { diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.h b/drivers/infiniband/hw/efa/efa_com_cmd.h index d119186c41d0..31db5a0cbd5b 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.h +++ b/drivers/infiniband/hw/efa/efa_com_cmd.h @@ -121,9 +121,12 @@ struct efa_com_get_device_attr_result { u32 max_pd; u32 max_ah; u32 max_llq_size; + u32 max_rdma_size; + u32 device_caps; u16 sub_cqs_per_cq; u16 max_sq_sge; u16 max_rq_sge; + u16 max_wr_rdma_sge; u8 db_bar; }; diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index 9701f8e52c71..c9d294caa27a 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -70,9 +70,6 @@ static const char *const efa_stats_names[] = { #define EFA_CHUNK_USED_SIZE \ ((EFA_PTRS_PER_CHUNK * EFA_CHUNK_PAYLOAD_PTR_SIZE) + EFA_CHUNK_PTR_SIZE) -#define EFA_SUPPORTED_ACCESS_FLAGS \ - (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ) - struct pbl_chunk { dma_addr_t dma_addr; u64 *buf; @@ -142,6 +139,11 @@ to_emmap(struct rdma_user_mmap_entry *rdma_entry) return container_of(rdma_entry, struct efa_user_mmap_entry, rdma_entry); } +static inline bool is_rdma_read_cap(struct efa_dev *dev) +{ + return dev->dev_attr.device_caps & EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RDMA_READ_MASK; +} + #define field_avail(x, fld, sz) (offsetof(typeof(x), fld) + \ FIELD_SIZEOF(typeof(x), fld) <= (sz)) @@ -201,12 +203,17 @@ int efa_query_device(struct ib_device *ibdev, dev_attr->max_rq_depth); props->max_send_sge = dev_attr->max_sq_sge; props->max_recv_sge = dev_attr->max_rq_sge; + props->max_sge_rd = dev_attr->max_wr_rdma_sge; if (udata && udata->outlen) { resp.max_sq_sge = dev_attr->max_sq_sge; resp.max_rq_sge = dev_attr->max_rq_sge; resp.max_sq_wr = dev_attr->max_sq_depth; resp.max_rq_wr = dev_attr->max_rq_depth; + resp.max_rdma_size = dev_attr->max_rdma_size; + + if (is_rdma_read_cap(dev)) + resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RDMA_READ; err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen)); @@ -1345,6 +1352,7 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, struct efa_com_reg_mr_params params = {}; struct efa_com_reg_mr_result result = {}; struct pbl_context pbl; + int supp_access_flags; unsigned int pg_sz; struct efa_mr *mr; int inline_size; @@ -1358,10 +1366,14 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, goto err_out; } - if (access_flags & ~EFA_SUPPORTED_ACCESS_FLAGS) { + supp_access_flags = + IB_ACCESS_LOCAL_WRITE | + (is_rdma_read_cap(dev) ? IB_ACCESS_REMOTE_READ : 0); + + if (access_flags & ~supp_access_flags) { ibdev_dbg(&dev->ibdev, "Unsupported access flags[%#x], supported[%#x]\n", - access_flags, EFA_SUPPORTED_ACCESS_FLAGS); + access_flags, supp_access_flags); err = -EOPNOTSUPP; goto err_out; } -- cgit From e2b2744a06d35ba44f32c86e0579d986931187b3 Mon Sep 17 00:00:00 2001 From: Yixian Liu Date: Mon, 18 Nov 2019 10:34:50 +0800 Subject: RDMA/hns: Redefine interfaces used in creating cq Some interfaces defined with unnecessary input parameters, such as "nent" and "vector". This patch redefined these interfaces to make the code more readable and simple. Link: https://lore.kernel.org/r/1574044493-46984-2-git-send-email-liweihang@hisilicon.com Signed-off-by: Yixian Liu Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_cq.c | 83 ++++++++++++++--------------- drivers/infiniband/hw/hns/hns_roce_device.h | 6 +-- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 9 ++-- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 10 ++-- 4 files changed, 53 insertions(+), 55 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index cde2960328df..87d9b0fb84c8 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -82,9 +82,8 @@ static int hns_roce_hw_create_cq(struct hns_roce_dev *dev, HNS_ROCE_CMD_TIMEOUT_MSECS); } -static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, int nent, - struct hns_roce_mtt *hr_mtt, - struct hns_roce_cq *hr_cq, int vector) +static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, + struct hns_roce_cq *hr_cq) { struct hns_roce_cmd_mailbox *mailbox; struct hns_roce_hem_table *mtt_table; @@ -103,18 +102,13 @@ static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, int nent, mtt_table = &hr_dev->mr_table.mtt_table; mtts = hns_roce_table_find(hr_dev, mtt_table, - hr_mtt->first_seg, &dma_handle); + hr_cq->hr_buf.hr_mtt.first_seg, + &dma_handle); if (!mtts) { dev_err(dev, "Failed to find mtt for CQ buf.\n"); return -EINVAL; } - if (vector >= hr_dev->caps.num_comp_vectors) { - dev_err(dev, "Invalid vector(0x%x) for CQ alloc.\n", vector); - return -EINVAL; - } - hr_cq->vector = vector; - ret = hns_roce_bitmap_alloc(&cq_table->bitmap, &hr_cq->cqn); if (ret) { dev_err(dev, "Num of CQ out of range.\n"); @@ -143,8 +137,7 @@ static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, int nent, goto err_xa; } - hr_dev->hw->write_cqc(hr_dev, hr_cq, mailbox->buf, mtts, dma_handle, - nent, vector); + hr_dev->hw->write_cqc(hr_dev, hr_cq, mailbox->buf, mtts, dma_handle); /* Send mailbox to hw */ ret = hns_roce_hw_create_cq(hr_dev, mailbox, hr_cq->cqn); @@ -210,15 +203,18 @@ void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) } static int hns_roce_ib_get_cq_umem(struct hns_roce_dev *hr_dev, - struct ib_udata *udata, - struct hns_roce_cq_buf *buf, - struct ib_umem **umem, u64 buf_addr, int cqe) + struct hns_roce_cq *hr_cq, + struct hns_roce_ib_create_cq ucmd, + struct ib_udata *udata) { - int ret; + struct hns_roce_cq_buf *buf = &hr_cq->hr_buf; + struct ib_umem **umem = &hr_cq->umem; u32 page_shift; u32 npages; + int ret; - *umem = ib_umem_get(udata, buf_addr, cqe * hr_dev->caps.cq_entry_sz, + *umem = ib_umem_get(udata, ucmd.buf_addr, + hr_cq->cq_depth * hr_dev->caps.cq_entry_sz, IB_ACCESS_LOCAL_WRITE); if (IS_ERR(*umem)) return PTR_ERR(*umem); @@ -257,10 +253,12 @@ err_buf: } static int hns_roce_ib_alloc_cq_buf(struct hns_roce_dev *hr_dev, - struct hns_roce_cq_buf *buf, u32 nent) + struct hns_roce_cq *hr_cq) { - int ret; + struct hns_roce_cq_buf *buf = &hr_cq->hr_buf; u32 page_shift = PAGE_SHIFT + hr_dev->caps.cqe_buf_pg_sz; + u32 nent = hr_cq->cq_depth; + int ret; ret = hns_roce_buf_alloc(hr_dev, nent * hr_dev->caps.cq_entry_sz, (1 << page_shift) * 2, &buf->hr_buf, @@ -295,17 +293,16 @@ out: } static void hns_roce_ib_free_cq_buf(struct hns_roce_dev *hr_dev, - struct hns_roce_cq_buf *buf, int cqe) + struct hns_roce_cq *hr_cq) { - hns_roce_buf_free(hr_dev, (cqe + 1) * hr_dev->caps.cq_entry_sz, - &buf->hr_buf); + hns_roce_buf_free(hr_dev, hr_cq->cq_depth * hr_dev->caps.cq_entry_sz, + &hr_cq->hr_buf.hr_buf); } static int create_user_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, struct ib_udata *udata, - struct hns_roce_ib_create_cq_resp *resp, - int cq_entries) + struct hns_roce_ib_create_cq_resp *resp) { struct hns_roce_ib_create_cq ucmd; struct device *dev = hr_dev->dev; @@ -319,9 +316,7 @@ static int create_user_cq(struct hns_roce_dev *hr_dev, } /* Get user space address, write it into mtt table */ - ret = hns_roce_ib_get_cq_umem(hr_dev, udata, &hr_cq->hr_buf, - &hr_cq->umem, ucmd.buf_addr, - cq_entries); + ret = hns_roce_ib_get_cq_umem(hr_dev, hr_cq, ucmd, udata); if (ret) { dev_err(dev, "Failed to get_cq_umem.\n"); return ret; @@ -349,7 +344,7 @@ err_mtt: } static int create_kernel_cq(struct hns_roce_dev *hr_dev, - struct hns_roce_cq *hr_cq, int cq_entries) + struct hns_roce_cq *hr_cq) { struct device *dev = hr_dev->dev; int ret; @@ -365,7 +360,7 @@ static int create_kernel_cq(struct hns_roce_dev *hr_dev, } /* Init mtt table and write buff address to mtt table */ - ret = hns_roce_ib_alloc_cq_buf(hr_dev, &hr_cq->hr_buf, cq_entries); + ret = hns_roce_ib_alloc_cq_buf(hr_dev, hr_cq); if (ret) { dev_err(dev, "Failed to alloc_cq_buf.\n"); goto err_db; @@ -403,7 +398,7 @@ static void destroy_kernel_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) { hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt); - hns_roce_ib_free_cq_buf(hr_dev, &hr_cq->hr_buf, hr_cq->ib_cq.cqe); + hns_roce_ib_free_cq_buf(hr_dev, hr_cq); if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) hns_roce_free_db(hr_dev, &hr_cq->db); @@ -414,11 +409,11 @@ int hns_roce_ib_create_cq(struct ib_cq *ib_cq, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device); - struct device *dev = hr_dev->dev; struct hns_roce_ib_create_cq_resp resp = {}; struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq); + struct device *dev = hr_dev->dev; int vector = attr->comp_vector; - int cq_entries = attr->cqe; + u32 cq_entries = attr->cqe; int ret; if (cq_entries < 1 || cq_entries > hr_dev->caps.max_cqes) { @@ -427,21 +422,27 @@ int hns_roce_ib_create_cq(struct ib_cq *ib_cq, return -EINVAL; } - if (hr_dev->caps.min_cqes) - cq_entries = max(cq_entries, hr_dev->caps.min_cqes); + if (vector >= hr_dev->caps.num_comp_vectors) { + dev_err(dev, "Create CQ failed, vector=%d, max=%d\n", + vector, hr_dev->caps.num_comp_vectors); + return -EINVAL; + } - cq_entries = roundup_pow_of_two((unsigned int)cq_entries); - hr_cq->ib_cq.cqe = cq_entries - 1; + cq_entries = max(cq_entries, hr_dev->caps.min_cqes); + cq_entries = roundup_pow_of_two(cq_entries); + hr_cq->ib_cq.cqe = cq_entries - 1; /* used as cqe index */ + hr_cq->cq_depth = cq_entries; + hr_cq->vector = vector; spin_lock_init(&hr_cq->lock); if (udata) { - ret = create_user_cq(hr_dev, hr_cq, udata, &resp, cq_entries); + ret = create_user_cq(hr_dev, hr_cq, udata, &resp); if (ret) { dev_err(dev, "Create cq failed in user mode!\n"); goto err_cq; } } else { - ret = create_kernel_cq(hr_dev, hr_cq, cq_entries); + ret = create_kernel_cq(hr_dev, hr_cq); if (ret) { dev_err(dev, "Create cq failed in kernel mode!\n"); goto err_cq; @@ -449,8 +450,7 @@ int hns_roce_ib_create_cq(struct ib_cq *ib_cq, } /* Allocate cq index, fill cq_context */ - ret = hns_roce_cq_alloc(hr_dev, cq_entries, &hr_cq->hr_buf.hr_mtt, - hr_cq, vector); + ret = hns_roce_cq_alloc(hr_dev, hr_cq); if (ret) { dev_err(dev, "Alloc CQ failed(%d).\n", ret); goto err_dbmap; @@ -468,7 +468,6 @@ int hns_roce_ib_create_cq(struct ib_cq *ib_cq, /* Get created cq handler and carry out event */ hr_cq->comp = hns_roce_ib_cq_comp; hr_cq->event = hns_roce_ib_cq_event; - hr_cq->cq_depth = cq_entries; if (udata) { resp.cqn = hr_cq->cqn; @@ -515,7 +514,7 @@ void hns_roce_ib_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) &hr_cq->db); } else { /* Free the buff of stored cq */ - hns_roce_ib_free_cq_buf(hr_dev, &hr_cq->hr_buf, ib_cq->cqe); + hns_roce_ib_free_cq_buf(hr_dev, hr_cq); if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) hns_roce_free_db(hr_dev, &hr_cq->db); } diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index a1b712e9d454..8f628a772644 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -812,8 +812,8 @@ struct hns_roce_caps { int max_qp_init_rdma; int max_qp_dest_rdma; int num_cqs; - int max_cqes; - int min_cqes; + u32 max_cqes; + u32 min_cqes; u32 min_wqes; int reserved_cqs; int reserved_srqs; @@ -944,7 +944,7 @@ struct hns_roce_hw { int (*mw_write_mtpt)(void *mb_buf, struct hns_roce_mw *mw); void (*write_cqc)(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, void *mb_buf, u64 *mtts, - dma_addr_t dma_handle, int nent, u32 vector); + dma_addr_t dma_handle); int (*set_hem)(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table, int obj, int step_idx); int (*clear_hem)(struct hns_roce_dev *hr_dev, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 89a4c3afdda1..600d9f91889a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -1990,7 +1990,7 @@ static void *get_sw_cqe(struct hns_roce_cq *hr_cq, int n) /* Get cqe when Owner bit is Conversely with the MSB of cons_idx */ return (roce_get_bit(hr_cqe->cqe_byte_4, CQE_BYTE_4_OWNER_S) ^ - !!(n & (hr_cq->ib_cq.cqe + 1))) ? hr_cqe : NULL; + !!(n & hr_cq->cq_depth)) ? hr_cqe : NULL; } static struct hns_roce_cqe *next_cqe_sw(struct hns_roce_cq *hr_cq) @@ -2073,8 +2073,7 @@ static void hns_roce_v1_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn, static void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, void *mb_buf, - u64 *mtts, dma_addr_t dma_handle, int nent, - u32 vector) + u64 *mtts, dma_addr_t dma_handle) { struct hns_roce_cq_context *cq_context = NULL; struct hns_roce_buf_list *tptr_buf; @@ -2109,9 +2108,9 @@ static void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev, roce_set_field(cq_context->cqc_byte_12, CQ_CONTEXT_CQC_BYTE_12_CQ_CQE_SHIFT_M, CQ_CONTEXT_CQC_BYTE_12_CQ_CQE_SHIFT_S, - ilog2((unsigned int)nent)); + ilog2(hr_cq->cq_depth)); roce_set_field(cq_context->cqc_byte_12, CQ_CONTEXT_CQC_BYTE_12_CEQN_M, - CQ_CONTEXT_CQC_BYTE_12_CEQN_S, vector); + CQ_CONTEXT_CQC_BYTE_12_CEQN_S, hr_cq->vector); cq_context->cur_cqe_ba0_l = cpu_to_le32((u32)(mtts[0])); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 907c95149cb9..77c9d7fdd9bc 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -2457,7 +2457,7 @@ static void *get_sw_cqe_v2(struct hns_roce_cq *hr_cq, int n) /* Get cqe when Owner bit is Conversely with the MSB of cons_idx */ return (roce_get_bit(cqe->byte_4, V2_CQE_BYTE_4_OWNER_S) ^ - !!(n & (hr_cq->ib_cq.cqe + 1))) ? cqe : NULL; + !!(n & hr_cq->cq_depth)) ? cqe : NULL; } static struct hns_roce_v2_cqe *next_cqe_sw_v2(struct hns_roce_cq *hr_cq) @@ -2550,8 +2550,7 @@ static void hns_roce_v2_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn, static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, void *mb_buf, - u64 *mtts, dma_addr_t dma_handle, int nent, - u32 vector) + u64 *mtts, dma_addr_t dma_handle) { struct hns_roce_v2_cq_context *cq_context; @@ -2563,9 +2562,10 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev, roce_set_field(cq_context->byte_4_pg_ceqn, V2_CQC_BYTE_4_ARM_ST_M, V2_CQC_BYTE_4_ARM_ST_S, REG_NXT_CEQE); roce_set_field(cq_context->byte_4_pg_ceqn, V2_CQC_BYTE_4_SHIFT_M, - V2_CQC_BYTE_4_SHIFT_S, ilog2((unsigned int)nent)); + V2_CQC_BYTE_4_SHIFT_S, + ilog2(hr_cq->cq_depth)); roce_set_field(cq_context->byte_4_pg_ceqn, V2_CQC_BYTE_4_CEQN_M, - V2_CQC_BYTE_4_CEQN_S, vector); + V2_CQC_BYTE_4_CEQN_S, hr_cq->vector); roce_set_field(cq_context->byte_8_cqn, V2_CQC_BYTE_8_CQN_M, V2_CQC_BYTE_8_CQN_S, hr_cq->cqn); -- cgit From 18a96d25ce84a365fbe9ddcb887ade80eb3a6017 Mon Sep 17 00:00:00 2001 From: Yixian Liu Date: Mon, 18 Nov 2019 10:34:51 +0800 Subject: RDMA/hns: Redefine the member of hns_roce_cq struct There is no need to package buf and mtt into hns_roce_cq_buf, which will make code more complex, just delete this struct and move buf and mtt into hns_roce_cq. Furthermore, we add size member for hns_roce_buf to avoid repeatly calculating where needed it. Link: https://lore.kernel.org/r/1574044493-46984-3-git-send-email-liweihang@hisilicon.com Signed-off-by: Yixian Liu Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_cq.c | 73 ++++++++++++----------------- drivers/infiniband/hw/hns/hns_roce_device.h | 9 ++-- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 9 ++-- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 3 +- 4 files changed, 38 insertions(+), 56 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 87d9b0fb84c8..e9c94536c59a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -101,9 +101,9 @@ static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, else mtt_table = &hr_dev->mr_table.mtt_table; - mtts = hns_roce_table_find(hr_dev, mtt_table, - hr_cq->hr_buf.hr_mtt.first_seg, + mtts = hns_roce_table_find(hr_dev, mtt_table, hr_cq->mtt.first_seg, &dma_handle); + if (!mtts) { dev_err(dev, "Failed to find mtt for CQ buf.\n"); return -EINVAL; @@ -207,45 +207,36 @@ static int hns_roce_ib_get_cq_umem(struct hns_roce_dev *hr_dev, struct hns_roce_ib_create_cq ucmd, struct ib_udata *udata) { - struct hns_roce_cq_buf *buf = &hr_cq->hr_buf; + struct hns_roce_buf *buf = &hr_cq->buf; + struct hns_roce_mtt *mtt = &hr_cq->mtt; struct ib_umem **umem = &hr_cq->umem; - u32 page_shift; u32 npages; int ret; - *umem = ib_umem_get(udata, ucmd.buf_addr, - hr_cq->cq_depth * hr_dev->caps.cq_entry_sz, + *umem = ib_umem_get(udata, ucmd.buf_addr, buf->size, IB_ACCESS_LOCAL_WRITE); if (IS_ERR(*umem)) return PTR_ERR(*umem); if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) - buf->hr_mtt.mtt_type = MTT_TYPE_CQE; + mtt->mtt_type = MTT_TYPE_CQE; else - buf->hr_mtt.mtt_type = MTT_TYPE_WQE; - - if (hr_dev->caps.cqe_buf_pg_sz) { - npages = (ib_umem_page_count(*umem) + - (1 << hr_dev->caps.cqe_buf_pg_sz) - 1) / - (1 << hr_dev->caps.cqe_buf_pg_sz); - page_shift = PAGE_SHIFT + hr_dev->caps.cqe_buf_pg_sz; - ret = hns_roce_mtt_init(hr_dev, npages, page_shift, - &buf->hr_mtt); - } else { - ret = hns_roce_mtt_init(hr_dev, ib_umem_page_count(*umem), - PAGE_SHIFT, &buf->hr_mtt); - } + mtt->mtt_type = MTT_TYPE_WQE; + + npages = DIV_ROUND_UP(ib_umem_page_count(*umem), + 1 << hr_dev->caps.cqe_buf_pg_sz); + ret = hns_roce_mtt_init(hr_dev, npages, buf->page_shift, mtt); if (ret) goto err_buf; - ret = hns_roce_ib_umem_write_mtt(hr_dev, &buf->hr_mtt, *umem); + ret = hns_roce_ib_umem_write_mtt(hr_dev, mtt, *umem); if (ret) goto err_mtt; return 0; err_mtt: - hns_roce_mtt_cleanup(hr_dev, &buf->hr_mtt); + hns_roce_mtt_cleanup(hr_dev, mtt); err_buf: ib_umem_release(*umem); @@ -255,39 +246,36 @@ err_buf: static int hns_roce_ib_alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) { - struct hns_roce_cq_buf *buf = &hr_cq->hr_buf; - u32 page_shift = PAGE_SHIFT + hr_dev->caps.cqe_buf_pg_sz; - u32 nent = hr_cq->cq_depth; + struct hns_roce_buf *buf = &hr_cq->buf; + struct hns_roce_mtt *mtt = &hr_cq->mtt; int ret; - ret = hns_roce_buf_alloc(hr_dev, nent * hr_dev->caps.cq_entry_sz, - (1 << page_shift) * 2, &buf->hr_buf, - page_shift); + ret = hns_roce_buf_alloc(hr_dev, buf->size, (1 << buf->page_shift) * 2, + buf, buf->page_shift); if (ret) goto out; if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) - buf->hr_mtt.mtt_type = MTT_TYPE_CQE; + mtt->mtt_type = MTT_TYPE_CQE; else - buf->hr_mtt.mtt_type = MTT_TYPE_WQE; + mtt->mtt_type = MTT_TYPE_WQE; - ret = hns_roce_mtt_init(hr_dev, buf->hr_buf.npages, - buf->hr_buf.page_shift, &buf->hr_mtt); + ret = hns_roce_mtt_init(hr_dev, buf->npages, buf->page_shift, mtt); if (ret) goto err_buf; - ret = hns_roce_buf_write_mtt(hr_dev, &buf->hr_mtt, &buf->hr_buf); + ret = hns_roce_buf_write_mtt(hr_dev, mtt, buf); if (ret) goto err_mtt; return 0; err_mtt: - hns_roce_mtt_cleanup(hr_dev, &buf->hr_mtt); + hns_roce_mtt_cleanup(hr_dev, mtt); err_buf: - hns_roce_buf_free(hr_dev, nent * hr_dev->caps.cq_entry_sz, - &buf->hr_buf); + hns_roce_buf_free(hr_dev, buf->size, buf); + out: return ret; } @@ -295,8 +283,7 @@ out: static void hns_roce_ib_free_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) { - hns_roce_buf_free(hr_dev, hr_cq->cq_depth * hr_dev->caps.cq_entry_sz, - &hr_cq->hr_buf.hr_buf); + hns_roce_buf_free(hr_dev, hr_cq->buf.size, &hr_cq->buf); } static int create_user_cq(struct hns_roce_dev *hr_dev, @@ -337,7 +324,7 @@ static int create_user_cq(struct hns_roce_dev *hr_dev, return 0; err_mtt: - hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt); + hns_roce_mtt_cleanup(hr_dev, &hr_cq->mtt); ib_umem_release(hr_cq->umem); return ret; @@ -390,14 +377,14 @@ static void destroy_user_cq(struct hns_roce_dev *hr_dev, (udata->outlen >= sizeof(*resp))) hns_roce_db_unmap_user(context, &hr_cq->db); - hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt); + hns_roce_mtt_cleanup(hr_dev, &hr_cq->mtt); ib_umem_release(hr_cq->umem); } static void destroy_kernel_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) { - hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt); + hns_roce_mtt_cleanup(hr_dev, &hr_cq->mtt); hns_roce_ib_free_cq_buf(hr_dev, hr_cq); if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) @@ -433,6 +420,8 @@ int hns_roce_ib_create_cq(struct ib_cq *ib_cq, hr_cq->ib_cq.cqe = cq_entries - 1; /* used as cqe index */ hr_cq->cq_depth = cq_entries; hr_cq->vector = vector; + hr_cq->buf.size = hr_cq->cq_depth * hr_dev->caps.cq_entry_sz; + hr_cq->buf.page_shift = PAGE_SHIFT + hr_dev->caps.cqe_buf_pg_sz; spin_lock_init(&hr_cq->lock); if (udata) { @@ -502,7 +491,7 @@ void hns_roce_ib_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) } hns_roce_free_cq(hr_dev, hr_cq); - hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt); + hns_roce_mtt_cleanup(hr_dev, &hr_cq->mtt); ib_umem_release(hr_cq->umem); if (udata) { diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 8f628a772644..608ec5936d71 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -448,6 +448,7 @@ struct hns_roce_buf { struct hns_roce_buf_list *page_list; int nbufs; u32 npages; + u32 size; int page_shift; }; @@ -479,14 +480,10 @@ struct hns_roce_db { int order; }; -struct hns_roce_cq_buf { - struct hns_roce_buf hr_buf; - struct hns_roce_mtt hr_mtt; -}; - struct hns_roce_cq { struct ib_cq ib_cq; - struct hns_roce_cq_buf hr_buf; + struct hns_roce_buf buf; + struct hns_roce_mtt mtt; struct hns_roce_db db; u8 db_en; spinlock_t lock; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 600d9f91889a..9a00361e96aa 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -1980,8 +1980,7 @@ static int hns_roce_v1_write_mtpt(void *mb_buf, struct hns_roce_mr *mr, static void *get_cqe(struct hns_roce_cq *hr_cq, int n) { - return hns_roce_buf_offset(&hr_cq->hr_buf.hr_buf, - n * HNS_ROCE_V1_CQE_ENTRY_SIZE); + return hns_roce_buf_offset(&hr_cq->buf, n * HNS_ROCE_V1_CQE_ENTRY_SIZE); } static void *get_sw_cqe(struct hns_roce_cq *hr_cq, int n) @@ -3655,7 +3654,6 @@ static void hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) struct device *dev = &hr_dev->pdev->dev; u32 cqe_cnt_ori; u32 cqe_cnt_cur; - u32 cq_buf_size; int wait_time = 0; hns_roce_free_cq(hr_dev, hr_cq); @@ -3683,13 +3681,12 @@ static void hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) wait_time++; } - hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt); + hns_roce_mtt_cleanup(hr_dev, &hr_cq->mtt); ib_umem_release(hr_cq->umem); if (!udata) { /* Free the buff of stored cq */ - cq_buf_size = (ibcq->cqe + 1) * hr_dev->caps.cq_entry_sz; - hns_roce_buf_free(hr_dev, cq_buf_size, &hr_cq->hr_buf.hr_buf); + hns_roce_buf_free(hr_dev, hr_cq->buf.size, &hr_cq->buf); } } diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 77c9d7fdd9bc..1026ac6e9440 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -2447,8 +2447,7 @@ static int hns_roce_v2_mw_write_mtpt(void *mb_buf, struct hns_roce_mw *mw) static void *get_cqe_v2(struct hns_roce_cq *hr_cq, int n) { - return hns_roce_buf_offset(&hr_cq->hr_buf.hr_buf, - n * HNS_ROCE_V2_CQE_ENTRY_SIZE); + return hns_roce_buf_offset(&hr_cq->buf, n * HNS_ROCE_V2_CQE_ENTRY_SIZE); } static void *get_sw_cqe_v2(struct hns_roce_cq *hr_cq, int n) -- cgit From 707783ab5f48f054f8da3114ddcdf1685a313a63 Mon Sep 17 00:00:00 2001 From: Yixian Liu Date: Mon, 18 Nov 2019 10:34:52 +0800 Subject: RDMA/hns: Rename the functions used inside creating cq Current names of functions are not proper, such as hns_roce_free_cq, actually it means free cqc, thus we rename them. Furthermore, functions used inside one file can be named without the prefix hns_roce_ which will make the functions for verbs symbols more eye-catching. Link: https://lore.kernel.org/r/1574044493-46984-4-git-send-email-liweihang@hisilicon.com Signed-off-by: Yixian Liu Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_cmd.h | 4 +- drivers/infiniband/hw/hns/hns_roce_cq.c | 66 ++++++++++------------------- drivers/infiniband/hw/hns/hns_roce_device.h | 9 ++-- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 8 ++-- drivers/infiniband/hw/hns/hns_roce_main.c | 4 +- 5 files changed, 35 insertions(+), 56 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.h b/drivers/infiniband/hw/hns/hns_roce_cmd.h index cd3ed2b462bd..1915bacaded0 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cmd.h +++ b/drivers/infiniband/hw/hns/hns_roce_cmd.h @@ -119,8 +119,8 @@ enum { HNS_ROCE_CMD_DESTROY_MPT = 0xf, /* CQ commands */ - HNS_ROCE_CMD_CREATE_CQ = 0x16, - HNS_ROCE_CMD_DESTROY_CQ = 0x17, + HNS_ROCE_CMD_CREATE_CQC = 0x16, + HNS_ROCE_CMD_DESTROY_CQC = 0x17, /* QP/EE commands */ HNS_ROCE_CMD_RST2INIT_QP = 0x19, diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index e9c94536c59a..9174d33bdfee 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -73,17 +73,8 @@ static void hns_roce_ib_cq_event(struct hns_roce_cq *hr_cq, } } -static int hns_roce_hw_create_cq(struct hns_roce_dev *dev, - struct hns_roce_cmd_mailbox *mailbox, - unsigned long cq_num) -{ - return hns_roce_cmd_mbox(dev, mailbox->dma, 0, cq_num, 0, - HNS_ROCE_CMD_CREATE_CQ, - HNS_ROCE_CMD_TIMEOUT_MSECS); -} - -static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, - struct hns_roce_cq *hr_cq) +static int hns_roce_alloc_cqc(struct hns_roce_dev *hr_dev, + struct hns_roce_cq *hr_cq) { struct hns_roce_cmd_mailbox *mailbox; struct hns_roce_hem_table *mtt_table; @@ -140,7 +131,8 @@ static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, hr_dev->hw->write_cqc(hr_dev, hr_cq, mailbox->buf, mtts, dma_handle); /* Send mailbox to hw */ - ret = hns_roce_hw_create_cq(hr_dev, mailbox, hr_cq->cqn); + ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, hr_cq->cqn, 0, + HNS_ROCE_CMD_CREATE_CQC, HNS_ROCE_CMD_TIMEOUT_MSECS); hns_roce_free_cmd_mailbox(hr_dev, mailbox); if (ret) { dev_err(dev, @@ -168,22 +160,15 @@ err_out: return ret; } -static int hns_roce_hw_destroy_cq(struct hns_roce_dev *dev, - struct hns_roce_cmd_mailbox *mailbox, - unsigned long cq_num) -{ - return hns_roce_cmd_mbox(dev, 0, mailbox ? mailbox->dma : 0, cq_num, - mailbox ? 0 : 1, HNS_ROCE_CMD_DESTROY_CQ, - HNS_ROCE_CMD_TIMEOUT_MSECS); -} - -void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) +void hns_roce_free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) { struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; struct device *dev = hr_dev->dev; int ret; - ret = hns_roce_hw_destroy_cq(hr_dev, NULL, hr_cq->cqn); + ret = hns_roce_cmd_mbox(hr_dev, 0, 0, hr_cq->cqn, 1, + HNS_ROCE_CMD_DESTROY_CQC, + HNS_ROCE_CMD_TIMEOUT_MSECS); if (ret) dev_err(dev, "DESTROY_CQ failed (%d) for CQN %06lx\n", ret, hr_cq->cqn); @@ -202,10 +187,9 @@ void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) hns_roce_bitmap_free(&cq_table->bitmap, hr_cq->cqn, BITMAP_NO_RR); } -static int hns_roce_ib_get_cq_umem(struct hns_roce_dev *hr_dev, - struct hns_roce_cq *hr_cq, - struct hns_roce_ib_create_cq ucmd, - struct ib_udata *udata) +static int get_cq_umem(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, + struct hns_roce_ib_create_cq ucmd, + struct ib_udata *udata) { struct hns_roce_buf *buf = &hr_cq->buf; struct hns_roce_mtt *mtt = &hr_cq->mtt; @@ -243,8 +227,7 @@ err_buf: return ret; } -static int hns_roce_ib_alloc_cq_buf(struct hns_roce_dev *hr_dev, - struct hns_roce_cq *hr_cq) +static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) { struct hns_roce_buf *buf = &hr_cq->buf; struct hns_roce_mtt *mtt = &hr_cq->mtt; @@ -280,8 +263,7 @@ out: return ret; } -static void hns_roce_ib_free_cq_buf(struct hns_roce_dev *hr_dev, - struct hns_roce_cq *hr_cq) +static void free_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) { hns_roce_buf_free(hr_dev, hr_cq->buf.size, &hr_cq->buf); } @@ -303,7 +285,7 @@ static int create_user_cq(struct hns_roce_dev *hr_dev, } /* Get user space address, write it into mtt table */ - ret = hns_roce_ib_get_cq_umem(hr_dev, hr_cq, ucmd, udata); + ret = get_cq_umem(hr_dev, hr_cq, ucmd, udata); if (ret) { dev_err(dev, "Failed to get_cq_umem.\n"); return ret; @@ -347,7 +329,7 @@ static int create_kernel_cq(struct hns_roce_dev *hr_dev, } /* Init mtt table and write buff address to mtt table */ - ret = hns_roce_ib_alloc_cq_buf(hr_dev, hr_cq); + ret = alloc_cq_buf(hr_dev, hr_cq); if (ret) { dev_err(dev, "Failed to alloc_cq_buf.\n"); goto err_db; @@ -385,15 +367,14 @@ static void destroy_kernel_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) { hns_roce_mtt_cleanup(hr_dev, &hr_cq->mtt); - hns_roce_ib_free_cq_buf(hr_dev, hr_cq); + free_cq_buf(hr_dev, hr_cq); if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) hns_roce_free_db(hr_dev, &hr_cq->db); } -int hns_roce_ib_create_cq(struct ib_cq *ib_cq, - const struct ib_cq_init_attr *attr, - struct ib_udata *udata) +int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, + struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device); struct hns_roce_ib_create_cq_resp resp = {}; @@ -438,8 +419,7 @@ int hns_roce_ib_create_cq(struct ib_cq *ib_cq, } } - /* Allocate cq index, fill cq_context */ - ret = hns_roce_cq_alloc(hr_dev, hr_cq); + ret = hns_roce_alloc_cqc(hr_dev, hr_cq); if (ret) { dev_err(dev, "Alloc CQ failed(%d).\n", ret); goto err_dbmap; @@ -468,7 +448,7 @@ int hns_roce_ib_create_cq(struct ib_cq *ib_cq, return 0; err_cqc: - hns_roce_free_cq(hr_dev, hr_cq); + hns_roce_free_cqc(hr_dev, hr_cq); err_dbmap: if (udata) @@ -480,7 +460,7 @@ err_cq: return ret; } -void hns_roce_ib_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) +void hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device); struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq); @@ -490,7 +470,7 @@ void hns_roce_ib_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) return; } - hns_roce_free_cq(hr_dev, hr_cq); + hns_roce_free_cqc(hr_dev, hr_cq); hns_roce_mtt_cleanup(hr_dev, &hr_cq->mtt); ib_umem_release(hr_cq->umem); @@ -503,7 +483,7 @@ void hns_roce_ib_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) &hr_cq->db); } else { /* Free the buff of stored cq */ - hns_roce_ib_free_cq_buf(hr_dev, hr_cq); + free_cq_buf(hr_dev, hr_cq); if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) hns_roce_free_db(hr_dev, &hr_cq->db); } diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 608ec5936d71..152701e4a07f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -1240,12 +1240,11 @@ void hns_roce_release_range_qp(struct hns_roce_dev *hr_dev, int base_qpn, __be32 send_ieth(const struct ib_send_wr *wr); int to_hr_qp_type(int qp_type); -int hns_roce_ib_create_cq(struct ib_cq *ib_cq, - const struct ib_cq_init_attr *attr, - struct ib_udata *udata); +int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, + struct ib_udata *udata); -void hns_roce_ib_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata); -void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq); +void hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata); +void hns_roce_free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq); int hns_roce_db_map_user(struct hns_roce_ucontext *context, struct ib_udata *udata, unsigned long virt, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 9a00361e96aa..2a2b2112f886 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -732,7 +732,7 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev) if (!cq) return -ENOMEM; - ret = hns_roce_ib_create_cq(cq, &cq_init_attr, NULL); + ret = hns_roce_create_cq(cq, &cq_init_attr, NULL); if (ret) { dev_err(dev, "Create cq for reserved loop qp failed!"); goto alloc_cq_failed; @@ -868,7 +868,7 @@ alloc_pd_failed: kfree(pd); alloc_mem_failed: - hns_roce_ib_destroy_cq(cq, NULL); + hns_roce_destroy_cq(cq, NULL); alloc_cq_failed: kfree(cq); return ret; @@ -897,7 +897,7 @@ static void hns_roce_v1_release_lp_qp(struct hns_roce_dev *hr_dev) i, ret); } - hns_roce_ib_destroy_cq(&free_mr->mr_free_cq->ib_cq, NULL); + hns_roce_destroy_cq(&free_mr->mr_free_cq->ib_cq, NULL); kfree(&free_mr->mr_free_cq->ib_cq); hns_roce_dealloc_pd(&free_mr->mr_free_pd->ibpd, NULL); kfree(&free_mr->mr_free_pd->ibpd); @@ -3656,7 +3656,7 @@ static void hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) u32 cqe_cnt_cur; int wait_time = 0; - hns_roce_free_cq(hr_dev, hr_cq); + hns_roce_free_cqc(hr_dev, hr_cq); /* * Before freeing cq buffer, we need to ensure that the outstanding CQE diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 066f01c45b5a..854ef6e74788 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -419,14 +419,14 @@ static const struct ib_device_ops hns_roce_dev_ops = { .alloc_pd = hns_roce_alloc_pd, .alloc_ucontext = hns_roce_alloc_ucontext, .create_ah = hns_roce_create_ah, - .create_cq = hns_roce_ib_create_cq, + .create_cq = hns_roce_create_cq, .create_qp = hns_roce_create_qp, .dealloc_pd = hns_roce_dealloc_pd, .dealloc_ucontext = hns_roce_dealloc_ucontext, .del_gid = hns_roce_del_gid, .dereg_mr = hns_roce_dereg_mr, .destroy_ah = hns_roce_destroy_ah, - .destroy_cq = hns_roce_ib_destroy_cq, + .destroy_cq = hns_roce_destroy_cq, .disassociate_ucontext = hns_roce_disassociate_ucontext, .fill_res_entry = hns_roce_fill_res_entry, .get_dma_mr = hns_roce_get_dma_mr, -- cgit From f295e4cece5cb4c60715fed539abcd62468f9ef1 Mon Sep 17 00:00:00 2001 From: Yixian Liu Date: Mon, 18 Nov 2019 10:34:53 +0800 Subject: RDMA/hns: Delete unnecessary callback functions for cq Currently, when cq event occurred, we first call our own callback functions in the event process function, then call ib callback functions. Actually, we can directly call ib callback functions. Link: https://lore.kernel.org/r/1574044493-46984-5-git-send-email-liweihang@hisilicon.com Signed-off-by: Yixian Liu Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_cq.c | 91 ++++++++++++----------------- drivers/infiniband/hw/hns/hns_roce_device.h | 3 - 2 files changed, 36 insertions(+), 58 deletions(-) (limited to 'drivers/infiniband/hw') diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 9174d33bdfee..af1d8823b3f0 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -39,40 +39,6 @@ #include #include "hns_roce_common.h" -static void hns_roce_ib_cq_comp(struct hns_roce_cq *hr_cq) -{ - struct ib_cq *ibcq = &hr_cq->ib_cq; - - ibcq->comp_handler(ibcq, ibcq->cq_context); -} - -static void hns_roce_ib_cq_event(struct hns_roce_cq *hr_cq, - enum hns_roce_event event_type) -{ - struct hns_roce_dev *hr_dev; - struct ib_event event; - struct ib_cq *ibcq; - - ibcq = &hr_cq->ib_cq; - hr_dev = to_hr_dev(ibcq->device); - - if (event_type != HNS_ROCE_EVENT_TYPE_CQ_ID_INVALID && - event_type != HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR && - event_type != HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW) { - dev_err(hr_dev->dev, - "hns_roce_ib: Unexpected event type 0x%x on CQ %06lx\n", - event_type, hr_cq->cqn); - return; - } - - if (ibcq->event_handler) { - event.device = ibcq->device; - event.event = IB_EVENT_CQ_ERR; - event.element.cq = ibcq; - ibcq->event_handler(&event, ibcq->cq_context); - } -} - static int hns_roce_alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) { @@ -434,10 +400,6 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, if (!udata && hr_cq->tptr_addr) *hr_cq->tptr_addr = 0; - /* Get created cq handler and carry out event */ - hr_cq->comp = hns_roce_ib_cq_comp; - hr_cq->event = hns_roce_ib_cq_event; - if (udata) { resp.cqn = hr_cq->cqn; ret = ib_copy_to_udata(udata, &resp, sizeof(resp)); @@ -491,38 +453,57 @@ void hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn) { - struct device *dev = hr_dev->dev; - struct hns_roce_cq *cq; + struct hns_roce_cq *hr_cq; + struct ib_cq *ibcq; - cq = xa_load(&hr_dev->cq_table.array, cqn & (hr_dev->caps.num_cqs - 1)); - if (!cq) { - dev_warn(dev, "Completion event for bogus CQ 0x%08x\n", cqn); + hr_cq = xa_load(&hr_dev->cq_table.array, + cqn & (hr_dev->caps.num_cqs - 1)); + if (!hr_cq) { + dev_warn(hr_dev->dev, "Completion event for bogus CQ 0x%06x\n", + cqn); return; } - ++cq->arm_sn; - cq->comp(cq); + ++hr_cq->arm_sn; + ibcq = &hr_cq->ib_cq; + if (ibcq->comp_handler) + ibcq->comp_handler(ibcq, ibcq->cq_context); } void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type) { - struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; struct device *dev = hr_dev->dev; - struct hns_roce_cq *cq; + struct hns_roce_cq *hr_cq; + struct ib_event event; + struct ib_cq *ibcq; - cq = xa_load(&cq_table->array, cqn & (hr_dev->caps.num_cqs - 1)); - if (cq) - atomic_inc(&cq->refcount); + hr_cq = xa_load(&hr_dev->cq_table.array, + cqn & (hr_dev->caps.num_cqs - 1)); + if (!hr_cq) { + dev_warn(dev, "Async event for bogus CQ 0x%06x\n", cqn); + return; + } - if (!cq) { - dev_warn(dev, "Async event for bogus CQ %08x\n", cqn); + if (event_type != HNS_ROCE_EVENT_TYPE_CQ_ID_INVALID && + event_type != HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR && + event_type != HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW) { + dev_err(dev, "Unexpected event type 0x%x on CQ 0x%06x\n", + event_type, cqn); return; } - cq->event(cq, (enum hns_roce_event)event_type); + atomic_inc(&hr_cq->refcount); - if (atomic_dec_and_test(&cq->refcount)) - complete(&cq->free); + ibcq = &hr_cq->ib_cq; + if (ibcq->event_handler) { + event.device = ibcq->device; + event.element.cq = ibcq; + event.event = IB_EVENT_CQ_ERR; + ibcq->event_handler(&event, ibcq->cq_context); + } + + if (atomic_dec_and_test(&hr_cq->refcount)) + complete(&hr_cq->free); } int hns_roce_init_cq_table(struct hns_roce_dev *hr_dev) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 152701e4a07f..5617434cbfb4 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -488,9 +488,6 @@ struct hns_roce_cq { u8 db_en; spinlock_t lock; struct ib_umem *umem; - void (*comp)(struct hns_roce_cq *cq); - void (*event)(struct hns_roce_cq *cq, enum hns_roce_event event_type); - u32 cq_depth; u32 cons_index; u32 *set_ci_db; -- cgit