diff options
Diffstat (limited to 'drivers/infiniband/hw')
53 files changed, 4979 insertions, 6111 deletions
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 95f6d493d1b9..5a7c090204c5 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -631,11 +631,12 @@ static u8 bnxt_re_stack_to_dev_nw_type(enum rdma_network_type ntype) return nw_type; } -int bnxt_re_create_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr, - u32 flags, struct ib_udata *udata) +int bnxt_re_create_ah(struct ib_ah *ib_ah, struct rdma_ah_init_attr *init_attr, + struct ib_udata *udata) { struct ib_pd *ib_pd = ib_ah->pd; struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd); + struct rdma_ah_attr *ah_attr = init_attr->ah_attr; const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); struct bnxt_re_dev *rdev = pd->rdev; const struct ib_gid_attr *sgid_attr; @@ -673,7 +674,8 @@ int bnxt_re_create_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr, memcpy(ah->qplib_ah.dmac, ah_attr->roce.dmac, ETH_ALEN); rc = bnxt_qplib_create_ah(&rdev->qplib_res, &ah->qplib_ah, - !(flags & RDMA_CREATE_AH_SLEEPABLE)); + !(init_attr->flags & + RDMA_CREATE_AH_SLEEPABLE)); if (rc) { ibdev_err(&rdev->ibdev, "Failed to allocate HW AH"); return rc; @@ -856,7 +858,7 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd, if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) return -EFAULT; - bytes = (qplib_qp->sq.max_wqe * BNXT_QPLIB_MAX_SQE_ENTRY_SIZE); + bytes = (qplib_qp->sq.max_wqe * qplib_qp->sq.wqe_size); /* Consider mapping PSN search memory only for RC QPs. */ if (qplib_qp->type == CMDQ_CREATE_QP_TYPE_RC) { psn_sz = bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) ? @@ -879,7 +881,7 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd, qplib_qp->qp_handle = ureq.qp_handle; if (!qp->qplib_qp.srq) { - bytes = (qplib_qp->rq.max_wqe * BNXT_QPLIB_MAX_RQE_ENTRY_SIZE); + bytes = (qplib_qp->rq.max_wqe * qplib_qp->rq.wqe_size); bytes = PAGE_ALIGN(bytes); umem = ib_umem_get(&rdev->ibdev, ureq.qprva, bytes, IB_ACCESS_LOCAL_WRITE); @@ -976,6 +978,7 @@ static struct bnxt_re_qp *bnxt_re_create_shadow_qp qp->qplib_qp.sig_type = true; /* Shadow QP SQ depth should be same as QP1 RQ depth */ + qp->qplib_qp.sq.wqe_size = bnxt_re_get_swqe_size(); qp->qplib_qp.sq.max_wqe = qp1_qp->rq.max_wqe; qp->qplib_qp.sq.max_sge = 2; /* Q full delta can be 1 since it is internal QP */ @@ -986,6 +989,7 @@ static struct bnxt_re_qp *bnxt_re_create_shadow_qp qp->qplib_qp.scq = qp1_qp->scq; qp->qplib_qp.rcq = qp1_qp->rcq; + qp->qplib_qp.rq.wqe_size = bnxt_re_get_rwqe_size(); qp->qplib_qp.rq.max_wqe = qp1_qp->rq.max_wqe; qp->qplib_qp.rq.max_sge = qp1_qp->rq.max_sge; /* Q full delta can be 1 since it is internal QP */ @@ -1021,10 +1025,12 @@ static int bnxt_re_init_rq_attr(struct bnxt_re_qp *qp, struct bnxt_qplib_dev_attr *dev_attr; struct bnxt_qplib_qp *qplqp; struct bnxt_re_dev *rdev; + struct bnxt_qplib_q *rq; int entries; rdev = qp->rdev; qplqp = &qp->qplib_qp; + rq = &qplqp->rq; dev_attr = &rdev->dev_attr; if (init_attr->srq) { @@ -1036,23 +1042,21 @@ static int bnxt_re_init_rq_attr(struct bnxt_re_qp *qp, return -EINVAL; } qplqp->srq = &srq->qplib_srq; - qplqp->rq.max_wqe = 0; + rq->max_wqe = 0; } else { + rq->wqe_size = bnxt_re_get_rwqe_size(); /* Allocate 1 more than what's provided so posting max doesn't * mean empty. */ entries = roundup_pow_of_two(init_attr->cap.max_recv_wr + 1); - qplqp->rq.max_wqe = min_t(u32, entries, - dev_attr->max_qp_wqes + 1); - - qplqp->rq.q_full_delta = qplqp->rq.max_wqe - - init_attr->cap.max_recv_wr; - qplqp->rq.max_sge = init_attr->cap.max_recv_sge; - if (qplqp->rq.max_sge > dev_attr->max_qp_sges) - qplqp->rq.max_sge = dev_attr->max_qp_sges; + rq->max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + 1); + rq->q_full_delta = rq->max_wqe - init_attr->cap.max_recv_wr; + rq->max_sge = init_attr->cap.max_recv_sge; + if (rq->max_sge > dev_attr->max_qp_sges) + rq->max_sge = dev_attr->max_qp_sges; } - qplqp->rq.sg_info.pgsize = PAGE_SIZE; - qplqp->rq.sg_info.pgshft = PAGE_SHIFT; + rq->sg_info.pgsize = PAGE_SIZE; + rq->sg_info.pgshft = PAGE_SHIFT; return 0; } @@ -1080,15 +1084,18 @@ static void bnxt_re_init_sq_attr(struct bnxt_re_qp *qp, struct bnxt_qplib_dev_attr *dev_attr; struct bnxt_qplib_qp *qplqp; struct bnxt_re_dev *rdev; + struct bnxt_qplib_q *sq; int entries; rdev = qp->rdev; qplqp = &qp->qplib_qp; + sq = &qplqp->sq; dev_attr = &rdev->dev_attr; - qplqp->sq.max_sge = init_attr->cap.max_send_sge; - if (qplqp->sq.max_sge > dev_attr->max_qp_sges) - qplqp->sq.max_sge = dev_attr->max_qp_sges; + sq->wqe_size = bnxt_re_get_swqe_size(); + sq->max_sge = init_attr->cap.max_send_sge; + if (sq->max_sge > dev_attr->max_qp_sges) + sq->max_sge = dev_attr->max_qp_sges; /* * Change the SQ depth if user has requested minimum using * configfs. Only supported for kernel consumers @@ -1096,9 +1103,9 @@ static void bnxt_re_init_sq_attr(struct bnxt_re_qp *qp, entries = init_attr->cap.max_send_wr; /* Allocate 128 + 1 more than what's provided */ entries = roundup_pow_of_two(entries + BNXT_QPLIB_RESERVED_QP_WRS + 1); - qplqp->sq.max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + - BNXT_QPLIB_RESERVED_QP_WRS + 1); - qplqp->sq.q_full_delta = BNXT_QPLIB_RESERVED_QP_WRS + 1; + sq->max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + + BNXT_QPLIB_RESERVED_QP_WRS + 1); + sq->q_full_delta = BNXT_QPLIB_RESERVED_QP_WRS + 1; /* * Reserving one slot for Phantom WQE. Application can * post one extra entry in this case. But allowing this to avoid @@ -1511,7 +1518,7 @@ static int bnxt_re_init_user_srq(struct bnxt_re_dev *rdev, if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) return -EFAULT; - bytes = (qplib_srq->max_wqe * BNXT_QPLIB_MAX_RQE_ENTRY_SIZE); + bytes = (qplib_srq->max_wqe * qplib_srq->wqe_size); bytes = PAGE_ALIGN(bytes); umem = ib_umem_get(&rdev->ibdev, ureq.srqva, bytes, IB_ACCESS_LOCAL_WRITE); @@ -1534,15 +1541,20 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq, struct ib_srq_init_attr *srq_init_attr, struct ib_udata *udata) { - struct ib_pd *ib_pd = ib_srq->pd; - struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd); - struct bnxt_re_dev *rdev = pd->rdev; - struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr; - struct bnxt_re_srq *srq = - container_of(ib_srq, struct bnxt_re_srq, ib_srq); + struct bnxt_qplib_dev_attr *dev_attr; struct bnxt_qplib_nq *nq = NULL; + struct bnxt_re_dev *rdev; + struct bnxt_re_srq *srq; + struct bnxt_re_pd *pd; + struct ib_pd *ib_pd; int rc, entries; + ib_pd = ib_srq->pd; + pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd); + rdev = pd->rdev; + dev_attr = &rdev->dev_attr; + srq = container_of(ib_srq, struct bnxt_re_srq, ib_srq); + if (srq_init_attr->attr.max_wr >= dev_attr->max_srq_wqes) { ibdev_err(&rdev->ibdev, "Create CQ failed - max exceeded"); rc = -EINVAL; @@ -1563,8 +1575,9 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq, entries = roundup_pow_of_two(srq_init_attr->attr.max_wr + 1); if (entries > dev_attr->max_srq_wqes + 1) entries = dev_attr->max_srq_wqes + 1; - srq->qplib_srq.max_wqe = entries; + + srq->qplib_srq.wqe_size = bnxt_re_get_rwqe_size(); srq->qplib_srq.max_sge = srq_init_attr->attr.max_sge; srq->qplib_srq.threshold = srq_init_attr->attr.srq_limit; srq->srq_limit = srq_init_attr->attr.srq_limit; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index 23d972da5652..204c0849ba28 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -142,6 +142,16 @@ struct bnxt_re_ucontext { spinlock_t sh_lock; /* protect shpg */ }; +static inline u16 bnxt_re_get_swqe_size(void) +{ + return sizeof(struct sq_send); +} + +static inline u16 bnxt_re_get_rwqe_size(void) +{ + return sizeof(struct rq_wqe); +} + int bnxt_re_query_device(struct ib_device *ibdev, struct ib_device_attr *ib_attr, struct ib_udata *udata); @@ -160,7 +170,7 @@ enum rdma_link_layer bnxt_re_get_link_layer(struct ib_device *ibdev, u8 port_num); int bnxt_re_alloc_pd(struct ib_pd *pd, struct ib_udata *udata); void bnxt_re_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); -int bnxt_re_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, u32 flags, +int bnxt_re_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata); int bnxt_re_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); int bnxt_re_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index 899a5d2c100e..c5e29577cd43 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -300,12 +300,12 @@ static void bnxt_qplib_service_nq(unsigned long data) { struct bnxt_qplib_nq *nq = (struct bnxt_qplib_nq *)data; struct bnxt_qplib_hwq *hwq = &nq->hwq; - struct nq_base *nqe, **nq_ptr; - struct bnxt_qplib_cq *cq; - int num_cqne_processed = 0; int num_srqne_processed = 0; + int num_cqne_processed = 0; + struct bnxt_qplib_cq *cq; int budget = nq->budget; u32 sw_cons, raw_cons; + struct nq_base *nqe; uintptr_t q_handle; u16 type; @@ -314,8 +314,7 @@ static void bnxt_qplib_service_nq(unsigned long data) raw_cons = hwq->cons; while (budget--) { sw_cons = HWQ_CMP(raw_cons, hwq); - nq_ptr = (struct nq_base **)hwq->pbl_ptr; - nqe = &nq_ptr[NQE_PG(sw_cons)][NQE_IDX(sw_cons)]; + nqe = bnxt_qplib_get_qe(hwq, sw_cons, NULL); if (!NQE_CMP_VALID(nqe, raw_cons, hwq->max_elements)) break; @@ -392,13 +391,11 @@ static irqreturn_t bnxt_qplib_nq_irq(int irq, void *dev_instance) { struct bnxt_qplib_nq *nq = dev_instance; struct bnxt_qplib_hwq *hwq = &nq->hwq; - struct nq_base **nq_ptr; u32 sw_cons; /* Prefetch the NQ element */ sw_cons = HWQ_CMP(hwq->cons, hwq); - nq_ptr = (struct nq_base **)nq->hwq.pbl_ptr; - prefetch(&nq_ptr[NQE_PG(sw_cons)][NQE_IDX(sw_cons)]); + prefetch(bnxt_qplib_get_qe(hwq, sw_cons, NULL)); /* Fan out to CPU affinitized kthreads? */ tasklet_schedule(&nq->nq_tasklet); @@ -612,12 +609,13 @@ int bnxt_qplib_create_srq(struct bnxt_qplib_res *res, struct cmdq_create_srq req; struct bnxt_qplib_pbl *pbl; u16 cmd_flags = 0; + u16 pg_sz_lvl; int rc, idx; hwq_attr.res = res; hwq_attr.sginfo = &srq->sg_info; hwq_attr.depth = srq->max_wqe; - hwq_attr.stride = BNXT_QPLIB_MAX_RQE_ENTRY_SIZE; + hwq_attr.stride = srq->wqe_size; hwq_attr.type = HWQ_TYPE_QUEUE; rc = bnxt_qplib_alloc_init_hwq(&srq->hwq, &hwq_attr); if (rc) @@ -638,22 +636,11 @@ int bnxt_qplib_create_srq(struct bnxt_qplib_res *res, req.srq_size = cpu_to_le16((u16)srq->hwq.max_elements); pbl = &srq->hwq.pbl[PBL_LVL_0]; - req.pg_size_lvl = cpu_to_le16((((u16)srq->hwq.level & - CMDQ_CREATE_SRQ_LVL_MASK) << - CMDQ_CREATE_SRQ_LVL_SFT) | - (pbl->pg_size == ROCE_PG_SIZE_4K ? - CMDQ_CREATE_SRQ_PG_SIZE_PG_4K : - pbl->pg_size == ROCE_PG_SIZE_8K ? - CMDQ_CREATE_SRQ_PG_SIZE_PG_8K : - pbl->pg_size == ROCE_PG_SIZE_64K ? - CMDQ_CREATE_SRQ_PG_SIZE_PG_64K : - pbl->pg_size == ROCE_PG_SIZE_2M ? - CMDQ_CREATE_SRQ_PG_SIZE_PG_2M : - pbl->pg_size == ROCE_PG_SIZE_8M ? - CMDQ_CREATE_SRQ_PG_SIZE_PG_8M : - pbl->pg_size == ROCE_PG_SIZE_1G ? - CMDQ_CREATE_SRQ_PG_SIZE_PG_1G : - CMDQ_CREATE_SRQ_PG_SIZE_PG_4K)); + pg_sz_lvl = ((u16)bnxt_qplib_base_pg_size(&srq->hwq) << + CMDQ_CREATE_SRQ_PG_SIZE_SFT); + pg_sz_lvl |= (srq->hwq.level & CMDQ_CREATE_SRQ_LVL_MASK) << + CMDQ_CREATE_SRQ_LVL_SFT; + req.pg_size_lvl = cpu_to_le16(pg_sz_lvl); req.pbl = cpu_to_le64(pbl->pg_map_arr[0]); req.pd_id = cpu_to_le32(srq->pd->id); req.eventq_id = cpu_to_le16(srq->eventq_hw_ring_id); @@ -740,7 +727,7 @@ int bnxt_qplib_post_srq_recv(struct bnxt_qplib_srq *srq, struct bnxt_qplib_swqe *wqe) { struct bnxt_qplib_hwq *srq_hwq = &srq->hwq; - struct rq_wqe *srqe, **srqe_ptr; + struct rq_wqe *srqe; struct sq_sge *hw_sge; u32 sw_prod, sw_cons, count = 0; int i, rc = 0, next; @@ -758,9 +745,8 @@ int bnxt_qplib_post_srq_recv(struct bnxt_qplib_srq *srq, spin_unlock(&srq_hwq->lock); sw_prod = HWQ_CMP(srq_hwq->prod, srq_hwq); - srqe_ptr = (struct rq_wqe **)srq_hwq->pbl_ptr; - srqe = &srqe_ptr[RQE_PG(sw_prod)][RQE_IDX(sw_prod)]; - memset(srqe, 0, BNXT_QPLIB_MAX_RQE_ENTRY_SIZE); + srqe = bnxt_qplib_get_qe(srq_hwq, sw_prod, NULL); + memset(srqe, 0, srq->wqe_size); /* Calculate wqe_size16 and data_len */ for (i = 0, hw_sge = (struct sq_sge *)srqe->data; i < wqe->num_sge; i++, hw_sge++) { @@ -809,6 +795,7 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) struct bnxt_qplib_pbl *pbl; u16 cmd_flags = 0; u32 qp_flags = 0; + u8 pg_sz_lvl; int rc; RCFW_CMD_PREP(req, CREATE_QP1, cmd_flags); @@ -822,7 +809,7 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) hwq_attr.res = res; hwq_attr.sginfo = &sq->sg_info; hwq_attr.depth = sq->max_wqe; - hwq_attr.stride = BNXT_QPLIB_MAX_SQE_ENTRY_SIZE; + hwq_attr.stride = sq->wqe_size; hwq_attr.type = HWQ_TYPE_QUEUE; rc = bnxt_qplib_alloc_init_hwq(&sq->hwq, &hwq_attr); if (rc) @@ -835,33 +822,18 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) } pbl = &sq->hwq.pbl[PBL_LVL_0]; req.sq_pbl = cpu_to_le64(pbl->pg_map_arr[0]); - req.sq_pg_size_sq_lvl = - ((sq->hwq.level & CMDQ_CREATE_QP1_SQ_LVL_MASK) - << CMDQ_CREATE_QP1_SQ_LVL_SFT) | - (pbl->pg_size == ROCE_PG_SIZE_4K ? - CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_4K : - pbl->pg_size == ROCE_PG_SIZE_8K ? - CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_8K : - pbl->pg_size == ROCE_PG_SIZE_64K ? - CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_64K : - pbl->pg_size == ROCE_PG_SIZE_2M ? - CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_2M : - pbl->pg_size == ROCE_PG_SIZE_8M ? - CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_8M : - pbl->pg_size == ROCE_PG_SIZE_1G ? - CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_1G : - CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_4K); + pg_sz_lvl = (bnxt_qplib_base_pg_size(&sq->hwq) << + CMDQ_CREATE_QP1_SQ_PG_SIZE_SFT); + pg_sz_lvl |= (sq->hwq.level & CMDQ_CREATE_QP1_SQ_LVL_MASK); + req.sq_pg_size_sq_lvl = pg_sz_lvl; if (qp->scq) req.scq_cid = cpu_to_le32(qp->scq->id); - - qp_flags |= CMDQ_CREATE_QP1_QP_FLAGS_RESERVED_LKEY_ENABLE; - /* RQ */ if (rq->max_wqe) { hwq_attr.res = res; hwq_attr.sginfo = &rq->sg_info; - hwq_attr.stride = BNXT_QPLIB_MAX_RQE_ENTRY_SIZE; + hwq_attr.stride = rq->wqe_size; hwq_attr.depth = qp->rq.max_wqe; hwq_attr.type = HWQ_TYPE_QUEUE; rc = bnxt_qplib_alloc_init_hwq(&rq->hwq, &hwq_attr); @@ -876,32 +848,20 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) } pbl = &rq->hwq.pbl[PBL_LVL_0]; req.rq_pbl = cpu_to_le64(pbl->pg_map_arr[0]); - req.rq_pg_size_rq_lvl = - ((rq->hwq.level & CMDQ_CREATE_QP1_RQ_LVL_MASK) << - CMDQ_CREATE_QP1_RQ_LVL_SFT) | - (pbl->pg_size == ROCE_PG_SIZE_4K ? - CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_4K : - pbl->pg_size == ROCE_PG_SIZE_8K ? - CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_8K : - pbl->pg_size == ROCE_PG_SIZE_64K ? - CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_64K : - pbl->pg_size == ROCE_PG_SIZE_2M ? - CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_2M : - pbl->pg_size == ROCE_PG_SIZE_8M ? - CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_8M : - pbl->pg_size == ROCE_PG_SIZE_1G ? - CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_1G : - CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_4K); + pg_sz_lvl = (bnxt_qplib_base_pg_size(&rq->hwq) << + CMDQ_CREATE_QP1_RQ_PG_SIZE_SFT); + pg_sz_lvl |= (rq->hwq.level & CMDQ_CREATE_QP1_RQ_LVL_MASK); + req.rq_pg_size_rq_lvl = pg_sz_lvl; if (qp->rcq) req.rcq_cid = cpu_to_le32(qp->rcq->id); } - /* Header buffer - allow hdr_buf pass in */ rc = bnxt_qplib_alloc_qp_hdr_buf(res, qp); if (rc) { rc = -ENOMEM; goto fail; } + qp_flags |= CMDQ_CREATE_QP1_QP_FLAGS_RESERVED_LKEY_ENABLE; req.qp_flags = cpu_to_le32(qp_flags); req.sq_size = cpu_to_le32(sq->hwq.max_elements); req.rq_size = cpu_to_le32(rq->hwq.max_elements); @@ -948,23 +908,47 @@ exit: return rc; } +static void bnxt_qplib_init_psn_ptr(struct bnxt_qplib_qp *qp, int size) +{ + struct bnxt_qplib_hwq *hwq; + struct bnxt_qplib_q *sq; + u64 fpsne, psne, psn_pg; + u16 indx_pad = 0, indx; + u16 pg_num, pg_indx; + u64 *page; + + sq = &qp->sq; + hwq = &sq->hwq; + + fpsne = (u64)bnxt_qplib_get_qe(hwq, hwq->max_elements, &psn_pg); + if (!IS_ALIGNED(fpsne, PAGE_SIZE)) + indx_pad = ALIGN(fpsne, PAGE_SIZE) / size; + + page = (u64 *)psn_pg; + for (indx = 0; indx < hwq->max_elements; indx++) { + pg_num = (indx + indx_pad) / (PAGE_SIZE / size); + pg_indx = (indx + indx_pad) % (PAGE_SIZE / size); + psne = page[pg_num] + pg_indx * size; + sq->swq[indx].psn_ext = (struct sq_psn_search_ext *)psne; + sq->swq[indx].psn_search = (struct sq_psn_search *)psne; + } +} + int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) { struct bnxt_qplib_rcfw *rcfw = res->rcfw; struct bnxt_qplib_hwq_attr hwq_attr = {}; - unsigned long int psn_search, poff = 0; struct bnxt_qplib_sg_info sginfo = {}; - struct sq_psn_search **psn_search_ptr; struct bnxt_qplib_q *sq = &qp->sq; struct bnxt_qplib_q *rq = &qp->rq; - int i, rc, req_size, psn_sz = 0; - struct sq_send **hw_sq_send_ptr; struct creq_create_qp_resp resp; + int rc, req_size, psn_sz = 0; struct bnxt_qplib_hwq *xrrq; u16 cmd_flags = 0, max_ssge; - struct cmdq_create_qp req; struct bnxt_qplib_pbl *pbl; + struct cmdq_create_qp req; u32 qp_flags = 0; + u8 pg_sz_lvl; u16 max_rsge; RCFW_CMD_PREP(req, CREATE_QP, cmd_flags); @@ -983,7 +967,7 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) hwq_attr.res = res; hwq_attr.sginfo = &sq->sg_info; - hwq_attr.stride = BNXT_QPLIB_MAX_SQE_ENTRY_SIZE; + hwq_attr.stride = sq->wqe_size; hwq_attr.depth = sq->max_wqe; hwq_attr.aux_stride = psn_sz; hwq_attr.aux_depth = hwq_attr.depth; @@ -997,64 +981,25 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) rc = -ENOMEM; goto fail_sq; } - hw_sq_send_ptr = (struct sq_send **)sq->hwq.pbl_ptr; - if (psn_sz) { - psn_search_ptr = (struct sq_psn_search **) - &hw_sq_send_ptr[get_sqe_pg - (sq->hwq.max_elements)]; - psn_search = (unsigned long int) - &hw_sq_send_ptr[get_sqe_pg(sq->hwq.max_elements)] - [get_sqe_idx(sq->hwq.max_elements)]; - if (psn_search & ~PAGE_MASK) { - /* If the psn_search does not start on a page boundary, - * then calculate the offset - */ - poff = (psn_search & ~PAGE_MASK) / - BNXT_QPLIB_MAX_PSNE_ENTRY_SIZE; - } - for (i = 0; i < sq->hwq.max_elements; i++) { - sq->swq[i].psn_search = - &psn_search_ptr[get_psne_pg(i + poff)] - [get_psne_idx(i + poff)]; - /*psns_ext will be used only for P5 chips. */ - sq->swq[i].psn_ext = - (struct sq_psn_search_ext *) - &psn_search_ptr[get_psne_pg(i + poff)] - [get_psne_idx(i + poff)]; - } - } + + if (psn_sz) + bnxt_qplib_init_psn_ptr(qp, psn_sz); + pbl = &sq->hwq.pbl[PBL_LVL_0]; req.sq_pbl = cpu_to_le64(pbl->pg_map_arr[0]); - req.sq_pg_size_sq_lvl = - ((sq->hwq.level & CMDQ_CREATE_QP_SQ_LVL_MASK) - << CMDQ_CREATE_QP_SQ_LVL_SFT) | - (pbl->pg_size == ROCE_PG_SIZE_4K ? - CMDQ_CREATE_QP_SQ_PG_SIZE_PG_4K : - pbl->pg_size == ROCE_PG_SIZE_8K ? - CMDQ_CREATE_QP_SQ_PG_SIZE_PG_8K : - pbl->pg_size == ROCE_PG_SIZE_64K ? - CMDQ_CREATE_QP_SQ_PG_SIZE_PG_64K : - pbl->pg_size == ROCE_PG_SIZE_2M ? - CMDQ_CREATE_QP_SQ_PG_SIZE_PG_2M : - pbl->pg_size == ROCE_PG_SIZE_8M ? - CMDQ_CREATE_QP_SQ_PG_SIZE_PG_8M : - pbl->pg_size == ROCE_PG_SIZE_1G ? - CMDQ_CREATE_QP_SQ_PG_SIZE_PG_1G : - CMDQ_CREATE_QP_SQ_PG_SIZE_PG_4K); + pg_sz_lvl = (bnxt_qplib_base_pg_size(&sq->hwq) << + CMDQ_CREATE_QP_SQ_PG_SIZE_SFT); + pg_sz_lvl |= (sq->hwq.level & CMDQ_CREATE_QP_SQ_LVL_MASK); + req.sq_pg_size_sq_lvl = pg_sz_lvl; if (qp->scq) req.scq_cid = cpu_to_le32(qp->scq->id); - qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_RESERVED_LKEY_ENABLE; - qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_FR_PMR_ENABLED; - if (qp->sig_type) - qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_FORCE_COMPLETION; - /* RQ */ if (rq->max_wqe) { hwq_attr.res = res; hwq_attr.sginfo = &rq->sg_info; - hwq_attr.stride = BNXT_QPLIB_MAX_RQE_ENTRY_SIZE; + hwq_attr.stride = rq->wqe_size; hwq_attr.depth = rq->max_wqe; hwq_attr.aux_stride = 0; hwq_attr.aux_depth = 0; @@ -1071,22 +1016,10 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) } pbl = &rq->hwq.pbl[PBL_LVL_0]; req.rq_pbl = cpu_to_le64(pbl->pg_map_arr[0]); - req.rq_pg_size_rq_lvl = - ((rq->hwq.level & CMDQ_CREATE_QP_RQ_LVL_MASK) << - CMDQ_CREATE_QP_RQ_LVL_SFT) | - (pbl->pg_size == ROCE_PG_SIZE_4K ? - CMDQ_CREATE_QP_RQ_PG_SIZE_PG_4K : - pbl->pg_size == ROCE_PG_SIZE_8K ? - CMDQ_CREATE_QP_RQ_PG_SIZE_PG_8K : - pbl->pg_size == ROCE_PG_SIZE_64K ? - CMDQ_CREATE_QP_RQ_PG_SIZE_PG_64K : - pbl->pg_size == ROCE_PG_SIZE_2M ? - CMDQ_CREATE_QP_RQ_PG_SIZE_PG_2M : - pbl->pg_size == ROCE_PG_SIZE_8M ? - CMDQ_CREATE_QP_RQ_PG_SIZE_PG_8M : - pbl->pg_size == ROCE_PG_SIZE_1G ? - CMDQ_CREATE_QP_RQ_PG_SIZE_PG_1G : - CMDQ_CREATE_QP_RQ_PG_SIZE_PG_4K); + pg_sz_lvl = (bnxt_qplib_base_pg_size(&rq->hwq) << + CMDQ_CREATE_QP_RQ_PG_SIZE_SFT); + pg_sz_lvl |= (rq->hwq.level & CMDQ_CREATE_QP_RQ_LVL_MASK); + req.rq_pg_size_rq_lvl = pg_sz_lvl; } else { /* SRQ */ if (qp->srq) { @@ -1097,7 +1030,13 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) if (qp->rcq) req.rcq_cid = cpu_to_le32(qp->rcq->id); + + qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_RESERVED_LKEY_ENABLE; + qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_FR_PMR_ENABLED; + if (qp->sig_type) + qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_FORCE_COMPLETION; req.qp_flags = cpu_to_le32(qp_flags); + req.sq_size = cpu_to_le32(sq->hwq.max_elements); req.rq_size = cpu_to_le32(rq->hwq.max_elements); qp->sq_hdr_buf = NULL; @@ -1483,12 +1422,11 @@ bail: static void __clean_cq(struct bnxt_qplib_cq *cq, u64 qp) { struct bnxt_qplib_hwq *cq_hwq = &cq->hwq; - struct cq_base *hw_cqe, **hw_cqe_ptr; + struct cq_base *hw_cqe; int i; for (i = 0; i < cq_hwq->max_elements; i++) { - hw_cqe_ptr = (struct cq_base **)cq_hwq->pbl_ptr; - hw_cqe = &hw_cqe_ptr[CQE_PG(i)][CQE_IDX(i)]; + hw_cqe = bnxt_qplib_get_qe(cq_hwq, i, NULL); if (!CQE_CMP_VALID(hw_cqe, i, cq_hwq->max_elements)) continue; /* @@ -1615,6 +1553,34 @@ void *bnxt_qplib_get_qp1_rq_buf(struct bnxt_qplib_qp *qp, return NULL; } +static void bnxt_qplib_fill_psn_search(struct bnxt_qplib_qp *qp, + struct bnxt_qplib_swqe *wqe, + struct bnxt_qplib_swq *swq) +{ + struct sq_psn_search_ext *psns_ext; + struct sq_psn_search *psns; + u32 flg_npsn; + u32 op_spsn; + + psns = swq->psn_search; + psns_ext = swq->psn_ext; + + op_spsn = ((swq->start_psn << SQ_PSN_SEARCH_START_PSN_SFT) & + SQ_PSN_SEARCH_START_PSN_MASK); + op_spsn |= ((wqe->type << SQ_PSN_SEARCH_OPCODE_SFT) & + SQ_PSN_SEARCH_OPCODE_MASK); + flg_npsn = ((swq->next_psn << SQ_PSN_SEARCH_NEXT_PSN_SFT) & + SQ_PSN_SEARCH_NEXT_PSN_MASK); + + if (bnxt_qplib_is_chip_gen_p5(qp->cctx)) { + psns_ext->opcode_start_psn = cpu_to_le32(op_spsn); + psns_ext->flags_next_psn = cpu_to_le32(flg_npsn); + } else { + psns->opcode_start_psn = cpu_to_le32(op_spsn); + psns->flags_next_psn = cpu_to_le32(flg_npsn); + } +} + void bnxt_qplib_post_send_db(struct bnxt_qplib_qp *qp) { struct bnxt_qplib_q *sq = &qp->sq; @@ -1625,16 +1591,16 @@ void bnxt_qplib_post_send_db(struct bnxt_qplib_qp *qp) int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, struct bnxt_qplib_swqe *wqe) { + struct bnxt_qplib_nq_work *nq_work = NULL; + int i, rc = 0, data_len = 0, pkt_num = 0; struct bnxt_qplib_q *sq = &qp->sq; + struct sq_send *hw_sq_send_hdr; struct bnxt_qplib_swq *swq; - struct sq_send *hw_sq_send_hdr, **hw_sq_send_ptr; - struct sq_sge *hw_sge; - struct bnxt_qplib_nq_work *nq_work = NULL; bool sch_handler = false; - u32 sw_prod; + struct sq_sge *hw_sge; u8 wqe_size16; - int i, rc = 0, data_len = 0, pkt_num = 0; __le32 temp32; + u32 sw_prod; if (qp->state != CMDQ_MODIFY_QP_NEW_STATE_RTS) { if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) { @@ -1663,11 +1629,8 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, swq->flags |= SQ_SEND_FLAGS_SIGNAL_COMP; swq->start_psn = sq->psn & BTH_PSN_MASK; - hw_sq_send_ptr = (struct sq_send **)sq->hwq.pbl_ptr; - hw_sq_send_hdr = &hw_sq_send_ptr[get_sqe_pg(sw_prod)] - [get_sqe_idx(sw_prod)]; - - memset(hw_sq_send_hdr, 0, BNXT_QPLIB_MAX_SQE_ENTRY_SIZE); + hw_sq_send_hdr = bnxt_qplib_get_qe(&sq->hwq, sw_prod, NULL); + memset(hw_sq_send_hdr, 0, sq->wqe_size); if (wqe->flags & BNXT_QPLIB_SWQE_FLAGS_INLINE) { /* Copy the inline data */ @@ -1854,28 +1817,8 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, goto done; } swq->next_psn = sq->psn & BTH_PSN_MASK; - if (swq->psn_search) { - u32 opcd_spsn; - u32 flg_npsn; - - opcd_spsn = ((swq->start_psn << SQ_PSN_SEARCH_START_PSN_SFT) & - SQ_PSN_SEARCH_START_PSN_MASK); - opcd_spsn |= ((wqe->type << SQ_PSN_SEARCH_OPCODE_SFT) & - SQ_PSN_SEARCH_OPCODE_MASK); - flg_npsn = ((swq->next_psn << SQ_PSN_SEARCH_NEXT_PSN_SFT) & - SQ_PSN_SEARCH_NEXT_PSN_MASK); - if (bnxt_qplib_is_chip_gen_p5(qp->cctx)) { - swq->psn_ext->opcode_start_psn = - cpu_to_le32(opcd_spsn); - swq->psn_ext->flags_next_psn = - cpu_to_le32(flg_npsn); - } else { - swq->psn_search->opcode_start_psn = - cpu_to_le32(opcd_spsn); - swq->psn_search->flags_next_psn = - cpu_to_le32(flg_npsn); - } - } + if (qp->type == CMDQ_CREATE_QP_TYPE_RC) + bnxt_qplib_fill_psn_search(qp, wqe, swq); queue_err: if (sch_handler) { /* Store the ULP info in the software structures */ @@ -1918,13 +1861,13 @@ void bnxt_qplib_post_recv_db(struct bnxt_qplib_qp *qp) int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp, struct bnxt_qplib_swqe *wqe) { - struct bnxt_qplib_q *rq = &qp->rq; - struct rq_wqe *rqe, **rqe_ptr; - struct sq_sge *hw_sge; struct bnxt_qplib_nq_work *nq_work = NULL; + struct bnxt_qplib_q *rq = &qp->rq; bool sch_handler = false; - u32 sw_prod; + struct sq_sge *hw_sge; + struct rq_wqe *rqe; int i, rc = 0; + u32 sw_prod; if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) { sch_handler = true; @@ -1941,10 +1884,8 @@ int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp, sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq); rq->swq[sw_prod].wr_id = wqe->wr_id; - rqe_ptr = (struct rq_wqe **)rq->hwq.pbl_ptr; - rqe = &rqe_ptr[RQE_PG(sw_prod)][RQE_IDX(sw_prod)]; - - memset(rqe, 0, BNXT_QPLIB_MAX_RQE_ENTRY_SIZE); + rqe = bnxt_qplib_get_qe(&rq->hwq, sw_prod, NULL); + memset(rqe, 0, rq->wqe_size); /* Calculate wqe_size16 and data_len */ for (i = 0, hw_sge = (struct sq_sge *)rqe->data; @@ -1997,9 +1938,10 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq) struct bnxt_qplib_rcfw *rcfw = res->rcfw; struct bnxt_qplib_hwq_attr hwq_attr = {}; struct creq_create_cq_resp resp; - struct cmdq_create_cq req; struct bnxt_qplib_pbl *pbl; + struct cmdq_create_cq req; u16 cmd_flags = 0; + u32 pg_sz_lvl; int rc; hwq_attr.res = res; @@ -2020,22 +1962,13 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq) } req.dpi = cpu_to_le32(cq->dpi->dpi); req.cq_handle = cpu_to_le64(cq->cq_handle); - req.cq_size = cpu_to_le32(cq->hwq.max_elements); pbl = &cq->hwq.pbl[PBL_LVL_0]; - req.pg_size_lvl = cpu_to_le32( - ((cq->hwq.level & CMDQ_CREATE_CQ_LVL_MASK) << - CMDQ_CREATE_CQ_LVL_SFT) | - (pbl->pg_size == ROCE_PG_SIZE_4K ? CMDQ_CREATE_CQ_PG_SIZE_PG_4K : - pbl->pg_size == ROCE_PG_SIZE_8K ? CMDQ_CREATE_CQ_PG_SIZE_PG_8K : - pbl->pg_size == ROCE_PG_SIZE_64K ? CMDQ_CREATE_CQ_PG_SIZE_PG_64K : - pbl->pg_size == ROCE_PG_SIZE_2M ? CMDQ_CREATE_CQ_PG_SIZE_PG_2M : - pbl->pg_size == ROCE_PG_SIZE_8M ? CMDQ_CREATE_CQ_PG_SIZE_PG_8M : - pbl->pg_size == ROCE_PG_SIZE_1G ? CMDQ_CREATE_CQ_PG_SIZE_PG_1G : - CMDQ_CREATE_CQ_PG_SIZE_PG_4K)); - + pg_sz_lvl = (bnxt_qplib_base_pg_size(&cq->hwq) << + CMDQ_CREATE_CQ_PG_SIZE_SFT); + pg_sz_lvl |= (cq->hwq.level & CMDQ_CREATE_CQ_LVL_MASK); + req.pg_size_lvl = cpu_to_le32(pg_sz_lvl); req.pbl = cpu_to_le64(pbl->pg_map_arr[0]); - req.cq_fco_cnq_id = cpu_to_le32( (cq->cnq_hw_ring_id & CMDQ_CREATE_CQ_CNQ_ID_MASK) << CMDQ_CREATE_CQ_CNQ_ID_SFT); @@ -2194,13 +2127,13 @@ void bnxt_qplib_mark_qp_error(void *qp_handle) static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq, u32 cq_cons, u32 sw_sq_cons, u32 cqe_sq_cons) { - struct bnxt_qplib_q *sq = &qp->sq; - struct bnxt_qplib_swq *swq; u32 peek_sw_cq_cons, peek_raw_cq_cons, peek_sq_cons_idx; - struct cq_base *peek_hwcqe, **peek_hw_cqe_ptr; + struct bnxt_qplib_q *sq = &qp->sq; struct cq_req *peek_req_hwcqe; struct bnxt_qplib_qp *peek_qp; struct bnxt_qplib_q *peek_sq; + struct bnxt_qplib_swq *swq; + struct cq_base *peek_hwcqe; int i, rc = 0; /* Normal mode */ @@ -2230,9 +2163,8 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq, i = cq->hwq.max_elements; while (i--) { peek_sw_cq_cons = HWQ_CMP((peek_sw_cq_cons), &cq->hwq); - peek_hw_cqe_ptr = (struct cq_base **)cq->hwq.pbl_ptr; - peek_hwcqe = &peek_hw_cqe_ptr[CQE_PG(peek_sw_cq_cons)] - [CQE_IDX(peek_sw_cq_cons)]; + peek_hwcqe = bnxt_qplib_get_qe(&cq->hwq, + peek_sw_cq_cons, NULL); /* If the next hwcqe is VALID */ if (CQE_CMP_VALID(peek_hwcqe, peek_raw_cq_cons, cq->hwq.max_elements)) { @@ -2294,11 +2226,11 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe **pcqe, int *budget, u32 cq_cons, struct bnxt_qplib_qp **lib_qp) { - struct bnxt_qplib_qp *qp; - struct bnxt_qplib_q *sq; - struct bnxt_qplib_cqe *cqe; u32 sw_sq_cons, cqe_sq_cons; struct bnxt_qplib_swq *swq; + struct bnxt_qplib_cqe *cqe; + struct bnxt_qplib_qp *qp; + struct bnxt_qplib_q *sq; int rc = 0; qp = (struct bnxt_qplib_qp *)((unsigned long) @@ -2408,10 +2340,10 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe **pcqe, int *budget) { - struct bnxt_qplib_qp *qp; - struct bnxt_qplib_q *rq; struct bnxt_qplib_srq *srq; struct bnxt_qplib_cqe *cqe; + struct bnxt_qplib_qp *qp; + struct bnxt_qplib_q *rq; u32 wr_id_idx; int rc = 0; @@ -2483,10 +2415,10 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe **pcqe, int *budget) { - struct bnxt_qplib_qp *qp; - struct bnxt_qplib_q *rq; struct bnxt_qplib_srq *srq; struct bnxt_qplib_cqe *cqe; + struct bnxt_qplib_qp *qp; + struct bnxt_qplib_q *rq; u32 wr_id_idx; int rc = 0; @@ -2561,15 +2493,13 @@ done: bool bnxt_qplib_is_cq_empty(struct bnxt_qplib_cq *cq) { - struct cq_base *hw_cqe, **hw_cqe_ptr; + struct cq_base *hw_cqe; u32 sw_cons, raw_cons; bool rc = true; raw_cons = cq->hwq.cons; sw_cons = HWQ_CMP(raw_cons, &cq->hwq); - hw_cqe_ptr = (struct cq_base **)cq->hwq.pbl_ptr; - hw_cqe = &hw_cqe_ptr[CQE_PG(sw_cons)][CQE_IDX(sw_cons)]; - + hw_cqe = bnxt_qplib_get_qe(&cq->hwq, sw_cons, NULL); /* Check for Valid bit. If the CQE is valid, return false */ rc = !CQE_CMP_VALID(hw_cqe, raw_cons, cq->hwq.max_elements); return rc; @@ -2813,7 +2743,7 @@ int bnxt_qplib_process_flush_list(struct bnxt_qplib_cq *cq, int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe, int num_cqes, struct bnxt_qplib_qp **lib_qp) { - struct cq_base *hw_cqe, **hw_cqe_ptr; + struct cq_base *hw_cqe; u32 sw_cons, raw_cons; int budget, rc = 0; @@ -2822,8 +2752,7 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe, while (budget) { sw_cons = HWQ_CMP(raw_cons, &cq->hwq); - hw_cqe_ptr = (struct cq_base **)cq->hwq.pbl_ptr; - hw_cqe = &hw_cqe_ptr[CQE_PG(sw_cons)][CQE_IDX(sw_cons)]; + hw_cqe = bnxt_qplib_get_qe(&cq->hwq, sw_cons, NULL); /* Check for Valid bit */ if (!CQE_CMP_VALID(hw_cqe, raw_cons, cq->hwq.max_elements)) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h index 7edb70b6bb16..568ca390322c 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h @@ -45,6 +45,7 @@ struct bnxt_qplib_srq { struct bnxt_qplib_db_info dbinfo; u64 srq_handle; u32 id; + u16 wqe_size; u32 max_wqe; u32 max_sge; u32 threshold; @@ -65,38 +66,7 @@ struct bnxt_qplib_sge { u32 size; }; -#define BNXT_QPLIB_MAX_SQE_ENTRY_SIZE sizeof(struct sq_send) - -#define SQE_CNT_PER_PG (PAGE_SIZE / BNXT_QPLIB_MAX_SQE_ENTRY_SIZE) -#define SQE_MAX_IDX_PER_PG (SQE_CNT_PER_PG - 1) - -static inline u32 get_sqe_pg(u32 val) -{ - return ((val & ~SQE_MAX_IDX_PER_PG) / SQE_CNT_PER_PG); -} - -static inline u32 get_sqe_idx(u32 val) -{ - return (val & SQE_MAX_IDX_PER_PG); -} - -#define BNXT_QPLIB_MAX_PSNE_ENTRY_SIZE sizeof(struct sq_psn_search) - -#define PSNE_CNT_PER_PG (PAGE_SIZE / BNXT_QPLIB_MAX_PSNE_ENTRY_SIZE) -#define PSNE_MAX_IDX_PER_PG (PSNE_CNT_PER_PG - 1) - -static inline u32 get_psne_pg(u32 val) -{ - return ((val & ~PSNE_MAX_IDX_PER_PG) / PSNE_CNT_PER_PG); -} - -static inline u32 get_psne_idx(u32 val) -{ - return (val & PSNE_MAX_IDX_PER_PG); -} - #define BNXT_QPLIB_QP_MAX_SGL 6 - struct bnxt_qplib_swq { u64 wr_id; int next_idx; @@ -226,19 +196,13 @@ struct bnxt_qplib_swqe { }; }; -#define BNXT_QPLIB_MAX_RQE_ENTRY_SIZE sizeof(struct rq_wqe) - -#define RQE_CNT_PER_PG (PAGE_SIZE / BNXT_QPLIB_MAX_RQE_ENTRY_SIZE) -#define RQE_MAX_IDX_PER_PG (RQE_CNT_PER_PG - 1) -#define RQE_PG(x) (((x) & ~RQE_MAX_IDX_PER_PG) / RQE_CNT_PER_PG) -#define RQE_IDX(x) ((x) & RQE_MAX_IDX_PER_PG) - struct bnxt_qplib_q { struct bnxt_qplib_hwq hwq; struct bnxt_qplib_swq *swq; struct bnxt_qplib_db_info dbinfo; struct bnxt_qplib_sg_info sg_info; u32 max_wqe; + u16 wqe_size; u16 q_full_delta; u16 max_sge; u32 psn; @@ -256,7 +220,7 @@ struct bnxt_qplib_qp { struct bnxt_qplib_dpi *dpi; struct bnxt_qplib_chip_ctx *cctx; u64 qp_handle; -#define BNXT_QPLIB_QP_ID_INVALID 0xFFFFFFFF +#define BNXT_QPLIB_QP_ID_INVALID 0xFFFFFFFF u32 id; u8 type; u8 sig_type; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index f01e864bb611..4e211162acee 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -89,10 +89,9 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req, struct creq_base *resp, void *sb, u8 is_block) { struct bnxt_qplib_cmdq_ctx *cmdq = &rcfw->cmdq; - struct bnxt_qplib_cmdqe *cmdqe, **hwq_ptr; struct bnxt_qplib_hwq *hwq = &cmdq->hwq; struct bnxt_qplib_crsqe *crsqe; - u32 cmdq_depth = rcfw->cmdq_depth; + struct bnxt_qplib_cmdqe *cmdqe; u32 sw_prod, cmdq_prod; struct pci_dev *pdev; unsigned long flags; @@ -163,13 +162,11 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req, BNXT_QPLIB_CMDQE_UNITS; } - hwq_ptr = (struct bnxt_qplib_cmdqe **)hwq->pbl_ptr; preq = (u8 *)req; do { /* Locate the next cmdq slot */ sw_prod = HWQ_CMP(hwq->prod, hwq); - cmdqe = &hwq_ptr[get_cmdq_pg(sw_prod, cmdq_depth)] - [get_cmdq_idx(sw_prod, cmdq_depth)]; + cmdqe = bnxt_qplib_get_qe(hwq, sw_prod, NULL); if (!cmdqe) { dev_err(&pdev->dev, "RCFW request failed with no cmdqe!\n"); @@ -378,7 +375,7 @@ static void bnxt_qplib_service_creq(unsigned long data) struct bnxt_qplib_creq_ctx *creq = &rcfw->creq; u32 type, budget = CREQ_ENTRY_POLL_BUDGET; struct bnxt_qplib_hwq *hwq = &creq->hwq; - struct creq_base *creqe, **hwq_ptr; + struct creq_base *creqe; u32 sw_cons, raw_cons; unsigned long flags; @@ -387,8 +384,7 @@ static void bnxt_qplib_service_creq(unsigned long data) raw_cons = hwq->cons; while (budget > 0) { sw_cons = HWQ_CMP(raw_cons, hwq); - hwq_ptr = (struct creq_base **)hwq->pbl_ptr; - creqe = &hwq_ptr[get_creq_pg(sw_cons)][get_creq_idx(sw_cons)]; + creqe = bnxt_qplib_get_qe(hwq, sw_cons, NULL); if (!CREQ_CMP_VALID(creqe, raw_cons, hwq->max_elements)) break; /* The valid test of the entry must be done first before @@ -434,7 +430,6 @@ static irqreturn_t bnxt_qplib_creq_irq(int irq, void *dev_instance) { struct bnxt_qplib_rcfw *rcfw = dev_instance; struct bnxt_qplib_creq_ctx *creq; - struct creq_base **creq_ptr; struct bnxt_qplib_hwq *hwq; u32 sw_cons; @@ -442,8 +437,7 @@ static irqreturn_t bnxt_qplib_creq_irq(int irq, void *dev_instance) hwq = &creq->hwq; /* Prefetch the CREQ element */ sw_cons = HWQ_CMP(hwq->cons, hwq); - creq_ptr = (struct creq_base **)creq->hwq.pbl_ptr; - prefetch(&creq_ptr[get_creq_pg(sw_cons)][get_creq_idx(sw_cons)]); + prefetch(bnxt_qplib_get_qe(hwq, sw_cons, NULL)); tasklet_schedule(&creq->creq_tasklet); @@ -468,29 +462,13 @@ int bnxt_qplib_deinit_rcfw(struct bnxt_qplib_rcfw *rcfw) return 0; } -static int __get_pbl_pg_idx(struct bnxt_qplib_pbl *pbl) -{ - return (pbl->pg_size == ROCE_PG_SIZE_4K ? - CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_4K : - pbl->pg_size == ROCE_PG_SIZE_8K ? - CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_8K : - pbl->pg_size == ROCE_PG_SIZE_64K ? - CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_64K : - pbl->pg_size == ROCE_PG_SIZE_2M ? - CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_2M : - pbl->pg_size == ROCE_PG_SIZE_8M ? - CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_8M : - pbl->pg_size == ROCE_PG_SIZE_1G ? - CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_1G : - CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_4K); -} - int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw, struct bnxt_qplib_ctx *ctx, int is_virtfn) { - struct cmdq_initialize_fw req; struct creq_initialize_fw_resp resp; - u16 cmd_flags = 0, level; + struct cmdq_initialize_fw req; + u16 cmd_flags = 0; + u8 pgsz, lvl; int rc; RCFW_CMD_PREP(req, INITIALIZE_FW, cmd_flags); @@ -511,32 +489,30 @@ int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw, if (bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx)) goto config_vf_res; - level = ctx->qpc_tbl.level; - req.qpc_pg_size_qpc_lvl = (level << CMDQ_INITIALIZE_FW_QPC_LVL_SFT) | - __get_pbl_pg_idx(&ctx->qpc_tbl.pbl[level]); - level = ctx->mrw_tbl.level; - req.mrw_pg_size_mrw_lvl = (level << CMDQ_INITIALIZE_FW_MRW_LVL_SFT) | - __get_pbl_pg_idx(&ctx->mrw_tbl.pbl[level]); - level = ctx->srqc_tbl.level; - req.srq_pg_size_srq_lvl = (level << CMDQ_INITIALIZE_FW_SRQ_LVL_SFT) | - __get_pbl_pg_idx(&ctx->srqc_tbl.pbl[level]); - level = ctx->cq_tbl.level; - req.cq_pg_size_cq_lvl = (level << CMDQ_INITIALIZE_FW_CQ_LVL_SFT) | - __get_pbl_pg_idx(&ctx->cq_tbl.pbl[level]); - level = ctx->srqc_tbl.level; - req.srq_pg_size_srq_lvl = (level << CMDQ_INITIALIZE_FW_SRQ_LVL_SFT) | - __get_pbl_pg_idx(&ctx->srqc_tbl.pbl[level]); - level = ctx->cq_tbl.level; - req.cq_pg_size_cq_lvl = (level << CMDQ_INITIALIZE_FW_CQ_LVL_SFT) | - __get_pbl_pg_idx(&ctx->cq_tbl.pbl[level]); - level = ctx->tim_tbl.level; - req.tim_pg_size_tim_lvl = (level << CMDQ_INITIALIZE_FW_TIM_LVL_SFT) | - __get_pbl_pg_idx(&ctx->tim_tbl.pbl[level]); - level = ctx->tqm_ctx.pde.level; - req.tqm_pg_size_tqm_lvl = - (level << CMDQ_INITIALIZE_FW_TQM_LVL_SFT) | - __get_pbl_pg_idx(&ctx->tqm_ctx.pde.pbl[level]); - + lvl = ctx->qpc_tbl.level; + pgsz = bnxt_qplib_base_pg_size(&ctx->qpc_tbl); + req.qpc_pg_size_qpc_lvl = (pgsz << CMDQ_INITIALIZE_FW_QPC_PG_SIZE_SFT) | + lvl; + lvl = ctx->mrw_tbl.level; + pgsz = bnxt_qplib_base_pg_size(&ctx->mrw_tbl); + req.mrw_pg_size_mrw_lvl = (pgsz << CMDQ_INITIALIZE_FW_QPC_PG_SIZE_SFT) | + lvl; + lvl = ctx->srqc_tbl.level; + pgsz = bnxt_qplib_base_pg_size(&ctx->srqc_tbl); + req.srq_pg_size_srq_lvl = (pgsz << CMDQ_INITIALIZE_FW_QPC_PG_SIZE_SFT) | + lvl; + lvl = ctx->cq_tbl.level; + pgsz = bnxt_qplib_base_pg_size(&ctx->cq_tbl); + req.cq_pg_size_cq_lvl = (pgsz << CMDQ_INITIALIZE_FW_QPC_PG_SIZE_SFT) | + lvl; + lvl = ctx->tim_tbl.level; + pgsz = bnxt_qplib_base_pg_size(&ctx->tim_tbl); + req.tim_pg_size_tim_lvl = (pgsz << CMDQ_INITIALIZE_FW_QPC_PG_SIZE_SFT) | + lvl; + lvl = ctx->tqm_ctx.pde.level; + pgsz = bnxt_qplib_base_pg_size(&ctx->tqm_ctx.pde); + req.tqm_pg_size_tqm_lvl = (pgsz << CMDQ_INITIALIZE_FW_QPC_PG_SIZE_SFT) | + lvl; req.qpc_page_dir = cpu_to_le64(ctx->qpc_tbl.pbl[PBL_LVL_0].pg_map_arr[0]); req.mrw_page_dir = diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h index 411fce3493b6..157387636d00 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h @@ -87,12 +87,6 @@ static inline u32 bnxt_qplib_cmdqe_page_size(u32 depth) return (bnxt_qplib_cmdqe_npages(depth) * PAGE_SIZE); } -static inline u32 bnxt_qplib_cmdqe_cnt_per_pg(u32 depth) -{ - return (bnxt_qplib_cmdqe_page_size(depth) / - BNXT_QPLIB_CMDQE_UNITS); -} - /* Set the cmd_size to a factor of CMDQE unit */ static inline void bnxt_qplib_set_cmd_slots(struct cmdq_base *req) { @@ -100,30 +94,12 @@ static inline void bnxt_qplib_set_cmd_slots(struct cmdq_base *req) BNXT_QPLIB_CMDQE_UNITS; } -#define MAX_CMDQ_IDX(depth) ((depth) - 1) - -static inline u32 bnxt_qplib_max_cmdq_idx_per_pg(u32 depth) -{ - return (bnxt_qplib_cmdqe_cnt_per_pg(depth) - 1); -} - #define RCFW_MAX_COOKIE_VALUE 0x7FFF #define RCFW_CMD_IS_BLOCKING 0x8000 #define RCFW_BLOCKED_CMD_WAIT_COUNT 0x4E20 #define HWRM_VERSION_RCFW_CMDQ_DEPTH_CHECK 0x1000900020011ULL -static inline u32 get_cmdq_pg(u32 val, u32 depth) -{ - return (val & ~(bnxt_qplib_max_cmdq_idx_per_pg(depth))) / - (bnxt_qplib_cmdqe_cnt_per_pg(depth)); -} - -static inline u32 get_cmdq_idx(u32 val, u32 depth) -{ - return val & (bnxt_qplib_max_cmdq_idx_per_pg(depth)); -} - /* Crsq buf is 1024-Byte */ struct bnxt_qplib_crsbe { u8 data[1024]; @@ -133,76 +109,9 @@ struct bnxt_qplib_crsbe { /* Allocate 1 per QP for async error notification for now */ #define BNXT_QPLIB_CREQE_MAX_CNT (64 * 1024) #define BNXT_QPLIB_CREQE_UNITS 16 /* 16-Bytes per prod unit */ -#define BNXT_QPLIB_CREQE_CNT_PER_PG (PAGE_SIZE / BNXT_QPLIB_CREQE_UNITS) - -#define MAX_CREQ_IDX (BNXT_QPLIB_CREQE_MAX_CNT - 1) -#define MAX_CREQ_IDX_PER_PG (BNXT_QPLIB_CREQE_CNT_PER_PG - 1) - -static inline u32 get_creq_pg(u32 val) -{ - return (val & ~MAX_CREQ_IDX_PER_PG) / BNXT_QPLIB_CREQE_CNT_PER_PG; -} - -static inline u32 get_creq_idx(u32 val) -{ - return val & MAX_CREQ_IDX_PER_PG; -} - -#define BNXT_QPLIB_CREQE_PER_PG (PAGE_SIZE / sizeof(struct creq_base)) - #define CREQ_CMP_VALID(hdr, raw_cons, cp_bit) \ (!!((hdr)->v & CREQ_BASE_V) == \ !((raw_cons) & (cp_bit))) - -#define CREQ_DB_KEY_CP (0x2 << CMPL_DOORBELL_KEY_SFT) -#define CREQ_DB_IDX_VALID CMPL_DOORBELL_IDX_VALID -#define CREQ_DB_IRQ_DIS CMPL_DOORBELL_MASK -#define CREQ_DB_CP_FLAGS_REARM (CREQ_DB_KEY_CP | \ - CREQ_DB_IDX_VALID) -#define CREQ_DB_CP_FLAGS (CREQ_DB_KEY_CP | \ - CREQ_DB_IDX_VALID | \ - CREQ_DB_IRQ_DIS) - -static inline void bnxt_qplib_ring_creq_db64(void __iomem *db, u32 index, - u32 xid, bool arm) -{ - u64 val = 0; - - val = xid & DBC_DBC_XID_MASK; - val |= DBC_DBC_PATH_ROCE; - val |= arm ? DBC_DBC_TYPE_NQ_ARM : DBC_DBC_TYPE_NQ; - val <<= 32; - val |= index & DBC_DBC_INDEX_MASK; - - writeq(val, db); -} - -static inline void bnxt_qplib_ring_creq_db_rearm(void __iomem *db, u32 raw_cons, - u32 max_elements, u32 xid, - bool gen_p5) -{ - u32 index = raw_cons & (max_elements - 1); - - if (gen_p5) - bnxt_qplib_ring_creq_db64(db, index, xid, true); - else - writel(CREQ_DB_CP_FLAGS_REARM | (index & DBC_DBC32_XID_MASK), - db); -} - -static inline void bnxt_qplib_ring_creq_db(void __iomem *db, u32 raw_cons, - u32 max_elements, u32 xid, - bool gen_p5) -{ - u32 index = raw_cons & (max_elements - 1); - - if (gen_p5) - bnxt_qplib_ring_creq_db64(db, index, xid, true); - else - writel(CREQ_DB_CP_FLAGS | (index & DBC_DBC32_XID_MASK), - db); -} - #define CREQ_ENTRY_POLL_BUDGET 0x100 /* HWQ */ diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c index cab1adf1fed9..7efa6e5dce62 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c @@ -347,6 +347,7 @@ done: hwq->depth = hwq_attr->depth; hwq->max_elements = depth; hwq->element_size = stride; + hwq->qe_ppg = pg_size / stride; /* For direct access to the elements */ lvl = hwq->level; if (hwq_attr->sginfo->nopte && hwq->level) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index 95b645dbbc2d..c29cbd3a2d7b 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -80,6 +80,15 @@ enum bnxt_qplib_pbl_lvl { #define ROCE_PG_SIZE_8M (8 * 1024 * 1024) #define ROCE_PG_SIZE_1G (1024 * 1024 * 1024) +enum bnxt_qplib_hwrm_pg_size { + BNXT_QPLIB_HWRM_PG_SIZE_4K = 0, + BNXT_QPLIB_HWRM_PG_SIZE_8K = 1, + BNXT_QPLIB_HWRM_PG_SIZE_64K = 2, + BNXT_QPLIB_HWRM_PG_SIZE_2M = 3, + BNXT_QPLIB_HWRM_PG_SIZE_8M = 4, + BNXT_QPLIB_HWRM_PG_SIZE_1G = 5, +}; + struct bnxt_qplib_reg_desc { u8 bar_id; resource_size_t bar_base; @@ -126,6 +135,7 @@ struct bnxt_qplib_hwq { u32 max_elements; u32 depth; u16 element_size; /* Size of each entry */ + u16 qe_ppg; /* queue entry per page */ u32 prod; /* raw */ u32 cons; /* raw */ @@ -263,6 +273,49 @@ static inline u8 bnxt_qplib_get_ring_type(struct bnxt_qplib_chip_ctx *cctx) RING_ALLOC_REQ_RING_TYPE_ROCE_CMPL; } +static inline u8 bnxt_qplib_base_pg_size(struct bnxt_qplib_hwq *hwq) +{ + u8 pg_size = BNXT_QPLIB_HWRM_PG_SIZE_4K; + struct bnxt_qplib_pbl *pbl; + + pbl = &hwq->pbl[PBL_LVL_0]; + switch (pbl->pg_size) { + case ROCE_PG_SIZE_4K: + pg_size = BNXT_QPLIB_HWRM_PG_SIZE_4K; + break; + case ROCE_PG_SIZE_8K: + pg_size = BNXT_QPLIB_HWRM_PG_SIZE_8K; + break; + case ROCE_PG_SIZE_64K: + pg_size = BNXT_QPLIB_HWRM_PG_SIZE_64K; + break; + case ROCE_PG_SIZE_2M: + pg_size = BNXT_QPLIB_HWRM_PG_SIZE_2M; + break; + case ROCE_PG_SIZE_8M: + pg_size = BNXT_QPLIB_HWRM_PG_SIZE_8M; + break; + case ROCE_PG_SIZE_1G: + pg_size = BNXT_QPLIB_HWRM_PG_SIZE_1G; + break; + default: + break; + } + + return pg_size; +} + +static inline void *bnxt_qplib_get_qe(struct bnxt_qplib_hwq *hwq, + u32 indx, u64 *pg) +{ + u32 pg_num, pg_idx; + + pg_num = (indx / hwq->qe_ppg); + pg_idx = (indx % hwq->qe_ppg); + if (pg) + *pg = (u64)&hwq->pbl_ptr[pg_num]; + return (void *)(hwq->pbl_ptr[pg_num] + hwq->element_size * pg_idx); +} #define to_bnxt_qplib(ptr, type, member) \ container_of(ptr, type, member) diff --git a/drivers/infiniband/hw/bnxt_re/roce_hsi.h b/drivers/infiniband/hw/bnxt_re/roce_hsi.h index e4b09e7c2175..6f00f07420b7 100644 --- a/drivers/infiniband/hw/bnxt_re/roce_hsi.h +++ b/drivers/infiniband/hw/bnxt_re/roce_hsi.h @@ -210,6 +210,20 @@ struct sq_send { __le32 data[24]; }; +/* sq_send_hdr (size:256b/32B) */ +struct sq_send_hdr { + u8 wqe_type; + u8 flags; + u8 wqe_size; + u8 reserved8_1; + __le32 inv_key_or_imm_data; + __le32 length; + __le32 q_key; + __le32 dst_qp; + __le32 avid; + __le64 reserved64; +}; + /* Send Raw Ethernet and QP1 SQ WQE (40 bytes) */ struct sq_send_raweth_qp1 { u8 wqe_type; @@ -265,6 +279,21 @@ struct sq_send_raweth_qp1 { __le32 data[24]; }; +/* sq_send_raweth_qp1_hdr (size:256b/32B) */ +struct sq_send_raweth_qp1_hdr { + u8 wqe_type; + u8 flags; + u8 wqe_size; + u8 reserved8; + __le16 lflags; + __le16 cfa_action; + __le32 length; + __le32 reserved32_1; + __le32 cfa_meta; + __le32 reserved32_2; + __le64 reserved64; +}; + /* RDMA SQ WQE (40 bytes) */ struct sq_rdma { u8 wqe_type; @@ -288,6 +317,20 @@ struct sq_rdma { __le32 data[24]; }; +/* sq_rdma_hdr (size:256b/32B) */ +struct sq_rdma_hdr { + u8 wqe_type; + u8 flags; + u8 wqe_size; + u8 reserved8; + __le32 imm_data; + __le32 length; + __le32 reserved32_1; + __le64 remote_va; + __le32 remote_key; + __le32 reserved32_2; +}; + /* Atomic SQ WQE (40 bytes) */ struct sq_atomic { u8 wqe_type; @@ -307,6 +350,17 @@ struct sq_atomic { __le32 data[24]; }; +/* sq_atomic_hdr (size:256b/32B) */ +struct sq_atomic_hdr { + u8 wqe_type; + u8 flags; + __le16 reserved16; + __le32 remote_key; + __le64 remote_va; + __le64 swap_data; + __le64 cmp_data; +}; + /* Local Invalidate SQ WQE (40 bytes) */ struct sq_localinvalidate { u8 wqe_type; @@ -324,6 +378,16 @@ struct sq_localinvalidate { __le32 data[24]; }; +/* sq_localinvalidate_hdr (size:256b/32B) */ +struct sq_localinvalidate_hdr { + u8 wqe_type; + u8 flags; + __le16 reserved16; + __le32 inv_l_key; + __le64 reserved64; + u8 reserved128[16]; +}; + /* FR-PMR SQ WQE (40 bytes) */ struct sq_fr_pmr { u8 wqe_type; @@ -380,6 +444,21 @@ struct sq_fr_pmr { __le32 data[24]; }; +/* sq_fr_pmr_hdr (size:256b/32B) */ +struct sq_fr_pmr_hdr { + u8 wqe_type; + u8 flags; + u8 access_cntl; + u8 zero_based_page_size_log; + __le32 l_key; + u8 length[5]; + u8 reserved8_1; + u8 reserved8_2; + u8 numlevels_pbl_page_size_log; + __le64 pblptr; + __le64 va; +}; + /* Bind SQ WQE (40 bytes) */ struct sq_bind { u8 wqe_type; @@ -417,6 +496,22 @@ struct sq_bind { #define SQ_BIND_DATA_SFT 0 }; +/* sq_bind_hdr (size:256b/32B) */ +struct sq_bind_hdr { + u8 wqe_type; + u8 flags; + u8 access_cntl; + u8 reserved8_1; + u8 mw_type_zero_based; + u8 reserved8_2; + __le16 reserved16; + __le32 parent_l_key; + __le32 l_key; + __le64 va; + u8 length[5]; + u8 reserved24[3]; +}; + /* RQ/SRQ WQE Structures */ /* RQ/SRQ WQE (40 bytes) */ struct rq_wqe { @@ -435,6 +530,17 @@ struct rq_wqe { __le32 data[24]; }; +/* rq_wqe_hdr (size:256b/32B) */ +struct rq_wqe_hdr { + u8 wqe_type; + u8 flags; + u8 wqe_size; + u8 reserved8; + __le32 reserved32; + __le32 wr_id[2]; + u8 reserved128[16]; +}; + /* CQ CQE Structures */ /* Base CQE (32 bytes) */ struct cq_base { diff --git a/drivers/infiniband/hw/efa/efa.h b/drivers/infiniband/hw/efa/efa.h index aa7396a1588a..1889dd172a25 100644 --- a/drivers/infiniband/hw/efa/efa.h +++ b/drivers/infiniband/hw/efa/efa.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ /* - * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef _EFA_H_ @@ -40,6 +40,7 @@ struct efa_sw_stats { atomic64_t reg_mr_err; atomic64_t alloc_ucontext_err; atomic64_t create_ah_err; + atomic64_t mmap_err; }; /* Don't use anything other than atomic64 */ @@ -153,8 +154,7 @@ int efa_mmap(struct ib_ucontext *ibucontext, struct vm_area_struct *vma); void efa_mmap_free(struct rdma_user_mmap_entry *rdma_entry); int efa_create_ah(struct ib_ah *ibah, - struct rdma_ah_attr *ah_attr, - u32 flags, + struct rdma_ah_init_attr *init_attr, struct ib_udata *udata); void efa_destroy_ah(struct ib_ah *ibah, u32 flags); int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, diff --git a/drivers/infiniband/hw/efa/efa_com.c b/drivers/infiniband/hw/efa/efa_com.c index 7fce69f5568f..336bc2c57bb1 100644 --- a/drivers/infiniband/hw/efa/efa_com.c +++ b/drivers/infiniband/hw/efa/efa_com.c @@ -631,17 +631,20 @@ int efa_com_cmd_exec(struct efa_com_admin_queue *aq, cmd->aq_common_descriptor.opcode, PTR_ERR(comp_ctx)); up(&aq->avail_cmds); + atomic64_inc(&aq->stats.cmd_err); return PTR_ERR(comp_ctx); } err = efa_com_wait_and_process_admin_cq(comp_ctx, aq); - if (err) + if (err) { ibdev_err_ratelimited( aq->efa_dev, "Failed to process command %s (opcode %u) comp_status %d err %d\n", efa_com_cmd_str(cmd->aq_common_descriptor.opcode), cmd->aq_common_descriptor.opcode, comp_ctx->comp_status, err); + atomic64_inc(&aq->stats.cmd_err); + } up(&aq->avail_cmds); diff --git a/drivers/infiniband/hw/efa/efa_com.h b/drivers/infiniband/hw/efa/efa_com.h index c67dd8109d1c..5e4c88877ddb 100644 --- a/drivers/infiniband/hw/efa/efa_com.h +++ b/drivers/infiniband/hw/efa/efa_com.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ /* - * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef _EFA_COM_H_ @@ -47,6 +47,7 @@ struct efa_com_admin_sq { struct efa_com_stats_admin { atomic64_t submitted_cmd; atomic64_t completed_cmd; + atomic64_t cmd_err; atomic64_t no_completion; }; diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index 5c57098a4aee..08313f7c73bc 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -37,13 +37,16 @@ struct efa_user_mmap_entry { op(EFA_RX_DROPS, "rx_drops") \ op(EFA_SUBMITTED_CMDS, "submitted_cmds") \ op(EFA_COMPLETED_CMDS, "completed_cmds") \ + op(EFA_CMDS_ERR, "cmds_err") \ op(EFA_NO_COMPLETION_CMDS, "no_completion_cmds") \ op(EFA_KEEP_ALIVE_RCVD, "keep_alive_rcvd") \ op(EFA_ALLOC_PD_ERR, "alloc_pd_err") \ op(EFA_CREATE_QP_ERR, "create_qp_err") \ + op(EFA_CREATE_CQ_ERR, "create_cq_err") \ op(EFA_REG_MR_ERR, "reg_mr_err") \ op(EFA_ALLOC_UCONTEXT_ERR, "alloc_ucontext_err") \ - op(EFA_CREATE_AH_ERR, "create_ah_err") + op(EFA_CREATE_AH_ERR, "create_ah_err") \ + op(EFA_MMAP_ERR, "mmap_err") #define EFA_STATS_ENUM(ename, name) ename, #define EFA_STATS_STR(ename, name) [ename] = name, @@ -1568,6 +1571,7 @@ static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext, ibdev_dbg(&dev->ibdev, "pgoff[%#lx] does not have valid entry\n", vma->vm_pgoff); + atomic64_inc(&dev->stats.sw_stats.mmap_err); return -EINVAL; } entry = to_emmap(rdma_entry); @@ -1603,12 +1607,14 @@ static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext, err = -EINVAL; } - if (err) + if (err) { ibdev_dbg( &dev->ibdev, "Couldn't mmap address[%#llx] length[%#zx] mmap_flag[%d] err[%d]\n", entry->address, rdma_entry->npages * PAGE_SIZE, entry->mmap_flag, err); + atomic64_inc(&dev->stats.sw_stats.mmap_err); + } rdma_user_mmap_entry_put(rdma_entry); return err; @@ -1639,10 +1645,10 @@ static int efa_ah_destroy(struct efa_dev *dev, struct efa_ah *ah) } int efa_create_ah(struct ib_ah *ibah, - struct rdma_ah_attr *ah_attr, - u32 flags, + struct rdma_ah_init_attr *init_attr, struct ib_udata *udata) { + struct rdma_ah_attr *ah_attr = init_attr->ah_attr; struct efa_dev *dev = to_edev(ibah->device); struct efa_com_create_ah_params params = {}; struct efa_ibv_create_ah_resp resp = {}; @@ -1650,7 +1656,7 @@ int efa_create_ah(struct ib_ah *ibah, struct efa_ah *ah = to_eah(ibah); int err; - if (!(flags & RDMA_CREATE_AH_SLEEPABLE)) { + if (!(init_attr->flags & RDMA_CREATE_AH_SLEEPABLE)) { ibdev_dbg(&dev->ibdev, "Create address handle is not supported in atomic context\n"); err = -EOPNOTSUPP; @@ -1747,15 +1753,18 @@ int efa_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, as = &dev->edev.aq.stats; stats->value[EFA_SUBMITTED_CMDS] = atomic64_read(&as->submitted_cmd); stats->value[EFA_COMPLETED_CMDS] = atomic64_read(&as->completed_cmd); + stats->value[EFA_CMDS_ERR] = atomic64_read(&as->cmd_err); stats->value[EFA_NO_COMPLETION_CMDS] = atomic64_read(&as->no_completion); s = &dev->stats; stats->value[EFA_KEEP_ALIVE_RCVD] = atomic64_read(&s->keep_alive_rcvd); stats->value[EFA_ALLOC_PD_ERR] = atomic64_read(&s->sw_stats.alloc_pd_err); stats->value[EFA_CREATE_QP_ERR] = atomic64_read(&s->sw_stats.create_qp_err); + stats->value[EFA_CREATE_CQ_ERR] = atomic64_read(&s->sw_stats.create_cq_err); stats->value[EFA_REG_MR_ERR] = atomic64_read(&s->sw_stats.reg_mr_err); stats->value[EFA_ALLOC_UCONTEXT_ERR] = atomic64_read(&s->sw_stats.alloc_ucontext_err); stats->value[EFA_CREATE_AH_ERR] = atomic64_read(&s->sw_stats.create_ah_err); + stats->value[EFA_MMAP_ERR] = atomic64_read(&s->sw_stats.mmap_err); return ARRAY_SIZE(efa_stats_names); } diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index 8a522e14ef62..5b2f9314edd3 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -39,13 +39,14 @@ #define HNS_ROCE_VLAN_SL_BIT_MASK 7 #define HNS_ROCE_VLAN_SL_SHIFT 13 -int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr, - u32 flags, struct ib_udata *udata) +int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, + struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibah->device); const struct ib_gid_attr *gid_attr; struct device *dev = hr_dev->dev; struct hns_roce_ah *ah = to_hr_ah(ibah); + struct rdma_ah_attr *ah_attr = init_attr->ah_attr; const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); u16 vlan_id = 0xffff; bool vlan_en = false; diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c index da574c26e063..365e7db6c498 100644 --- a/drivers/infiniband/hw/hns/hns_roce_alloc.c +++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c @@ -157,84 +157,78 @@ void hns_roce_bitmap_cleanup(struct hns_roce_bitmap *bitmap) kfree(bitmap->table); } -void hns_roce_buf_free(struct hns_roce_dev *hr_dev, u32 size, - struct hns_roce_buf *buf) +void hns_roce_buf_free(struct hns_roce_dev *hr_dev, struct hns_roce_buf *buf) { - int i; struct device *dev = hr_dev->dev; + u32 size = buf->size; + int i; + + if (size == 0) + return; - if (buf->nbufs == 1) { + buf->size = 0; + + if (hns_roce_buf_is_direct(buf)) { dma_free_coherent(dev, size, buf->direct.buf, buf->direct.map); } else { - for (i = 0; i < buf->nbufs; ++i) + for (i = 0; i < buf->npages; ++i) if (buf->page_list[i].buf) dma_free_coherent(dev, 1 << buf->page_shift, buf->page_list[i].buf, buf->page_list[i].map); kfree(buf->page_list); + buf->page_list = NULL; } } int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct, struct hns_roce_buf *buf, u32 page_shift) { - int i = 0; - dma_addr_t t; + struct hns_roce_buf_list *buf_list; struct device *dev = hr_dev->dev; - u32 page_size = 1 << page_shift; - u32 order; + u32 page_size; + int i; - /* SQ/RQ buf lease than one page, SQ + RQ = 8K */ + /* The minimum shift of the page accessed by hw is PAGE_ADDR_SHIFT */ + buf->page_shift = max_t(int, PAGE_ADDR_SHIFT, page_shift); + + page_size = 1 << buf->page_shift; + buf->npages = DIV_ROUND_UP(size, page_size); + + /* required size is not bigger than one trunk size */ if (size <= max_direct) { - buf->nbufs = 1; - /* Npages calculated by page_size */ - order = get_order(size); - if (order <= page_shift - PAGE_SHIFT) - order = 0; - else - order -= page_shift - PAGE_SHIFT; - buf->npages = 1 << order; - buf->page_shift = page_shift; - /* MTT PA must be recorded in 4k alignment, t is 4k aligned */ - buf->direct.buf = dma_alloc_coherent(dev, size, &t, + buf->page_list = NULL; + buf->direct.buf = dma_alloc_coherent(dev, size, + &buf->direct.map, GFP_KERNEL); if (!buf->direct.buf) return -ENOMEM; - - buf->direct.map = t; - - while (t & ((1 << buf->page_shift) - 1)) { - --buf->page_shift; - buf->npages *= 2; - } } else { - buf->nbufs = (size + page_size - 1) / page_size; - buf->npages = buf->nbufs; - buf->page_shift = page_shift; - buf->page_list = kcalloc(buf->nbufs, sizeof(*buf->page_list), - GFP_KERNEL); - - if (!buf->page_list) + buf_list = kcalloc(buf->npages, sizeof(*buf_list), GFP_KERNEL); + if (!buf_list) return -ENOMEM; - for (i = 0; i < buf->nbufs; ++i) { - buf->page_list[i].buf = dma_alloc_coherent(dev, - page_size, - &t, - GFP_KERNEL); - - if (!buf->page_list[i].buf) - goto err_free; + for (i = 0; i < buf->npages; i++) { + buf_list[i].buf = dma_alloc_coherent(dev, page_size, + &buf_list[i].map, + GFP_KERNEL); + if (!buf_list[i].buf) + break; + } - buf->page_list[i].map = t; + if (i != buf->npages && i > 0) { + while (i-- > 0) + dma_free_coherent(dev, page_size, + buf_list[i].buf, + buf_list[i].map); + kfree(buf_list); + return -ENOMEM; } + buf->page_list = buf_list; } + buf->size = size; return 0; - -err_free: - hns_roce_buf_free(hr_dev, size, buf); - return -ENOMEM; } int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, @@ -246,18 +240,14 @@ int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, end = start + buf_cnt; if (end > buf->npages) { dev_err(hr_dev->dev, - "invalid kmem region,offset %d,buf_cnt %d,total %d!\n", + "Failed to check kmem bufs, end %d + %d total %d!\n", start, buf_cnt, buf->npages); return -EINVAL; } total = 0; for (i = start; i < end; i++) - if (buf->nbufs == 1) - bufs[total++] = buf->direct.map + - ((dma_addr_t)i << buf->page_shift); - else - bufs[total++] = buf->page_list[i].map; + bufs[total++] = hns_roce_buf_page(buf, i); return total; } @@ -271,8 +261,9 @@ int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, int idx = 0; u64 addr; - if (page_shift < PAGE_SHIFT) { - dev_err(hr_dev->dev, "invalid page shift %d!\n", page_shift); + if (page_shift < PAGE_ADDR_SHIFT) { + dev_err(hr_dev->dev, "Failed to check umem page shift %d!\n", + page_shift); return -EINVAL; } @@ -292,49 +283,6 @@ done: return total; } -void hns_roce_init_buf_region(struct hns_roce_buf_region *region, int hopnum, - int offset, int buf_cnt) -{ - if (hopnum == HNS_ROCE_HOP_NUM_0) - region->hopnum = 0; - else - region->hopnum = hopnum; - - region->offset = offset; - region->count = buf_cnt; -} - -void hns_roce_free_buf_list(dma_addr_t **bufs, int region_cnt) -{ - int i; - - for (i = 0; i < region_cnt; i++) { - kfree(bufs[i]); - bufs[i] = NULL; - } -} - -int hns_roce_alloc_buf_list(struct hns_roce_buf_region *regions, - dma_addr_t **bufs, int region_cnt) -{ - struct hns_roce_buf_region *r; - int i; - - for (i = 0; i < region_cnt; i++) { - r = ®ions[i]; - bufs[i] = kcalloc(r->count, sizeof(dma_addr_t), GFP_KERNEL); - if (!bufs[i]) - goto err_alloc; - } - - return 0; - -err_alloc: - hns_roce_free_buf_list(bufs, i); - - return -ENOMEM; -} - void hns_roce_cleanup_bitmap(struct hns_roce_dev *hr_dev) { if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 5bfb52ffd590..d2d7074bbe69 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -39,51 +39,40 @@ #include <rdma/hns-abi.h> #include "hns_roce_common.h" -static int hns_roce_alloc_cqc(struct hns_roce_dev *hr_dev, - struct hns_roce_cq *hr_cq) +static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) { struct hns_roce_cmd_mailbox *mailbox; - struct hns_roce_hem_table *mtt_table; struct hns_roce_cq_table *cq_table; - struct device *dev = hr_dev->dev; + struct ib_device *ibdev = &hr_dev->ib_dev; + u64 mtts[MTT_MIN_COUNT] = { 0 }; dma_addr_t dma_handle; - u64 *mtts; int ret; - cq_table = &hr_dev->cq_table; - - /* Get the physical address of cq buf */ - if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) - mtt_table = &hr_dev->mr_table.mtt_cqe_table; - else - mtt_table = &hr_dev->mr_table.mtt_table; - - mtts = hns_roce_table_find(hr_dev, mtt_table, hr_cq->mtt.first_seg, - &dma_handle); - - if (!mtts) { - dev_err(dev, "Failed to find mtt for CQ buf.\n"); + ret = hns_roce_mtr_find(hr_dev, &hr_cq->mtr, 0, mtts, ARRAY_SIZE(mtts), + &dma_handle); + if (ret < 1) { + ibdev_err(ibdev, "Failed to find CQ mtr\n"); return -EINVAL; } + cq_table = &hr_dev->cq_table; ret = hns_roce_bitmap_alloc(&cq_table->bitmap, &hr_cq->cqn); if (ret) { - dev_err(dev, "Num of CQ out of range.\n"); + ibdev_err(ibdev, "Failed to alloc CQ bitmap, err %d\n", ret); return ret; } /* Get CQC memory HEM(Hardware Entry Memory) table */ ret = hns_roce_table_get(hr_dev, &cq_table->table, hr_cq->cqn); if (ret) { - dev_err(dev, - "Get context mem failed(%d) when CQ(0x%lx) alloc.\n", - ret, hr_cq->cqn); + ibdev_err(ibdev, "Failed to get CQ(0x%lx) context, err %d\n", + hr_cq->cqn, ret); goto err_out; } ret = xa_err(xa_store(&cq_table->array, hr_cq->cqn, hr_cq, GFP_KERNEL)); if (ret) { - dev_err(dev, "Failed to xa_store CQ.\n"); + ibdev_err(ibdev, "Failed to xa_store CQ\n"); goto err_put; } @@ -101,9 +90,9 @@ static int hns_roce_alloc_cqc(struct hns_roce_dev *hr_dev, HNS_ROCE_CMD_CREATE_CQC, HNS_ROCE_CMD_TIMEOUT_MSECS); hns_roce_free_cmd_mailbox(hr_dev, mailbox); if (ret) { - dev_err(dev, - "Send cmd mailbox failed(%d) when CQ(0x%lx) alloc.\n", - ret, hr_cq->cqn); + ibdev_err(ibdev, + "Failed to send create cmd for CQ(0x%lx), err %d\n", + hr_cq->cqn, ret); goto err_xa; } @@ -126,7 +115,7 @@ err_out: return ret; } -void hns_roce_free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) +static void free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) { struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; struct device *dev = hr_dev->dev; @@ -153,190 +142,86 @@ void hns_roce_free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) hns_roce_bitmap_free(&cq_table->bitmap, hr_cq->cqn, BITMAP_NO_RR); } -static int get_cq_umem(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, - struct hns_roce_ib_create_cq ucmd, - struct ib_udata *udata) -{ - struct hns_roce_buf *buf = &hr_cq->buf; - struct hns_roce_mtt *mtt = &hr_cq->mtt; - struct ib_umem **umem = &hr_cq->umem; - u32 npages; - int ret; - - *umem = ib_umem_get(&hr_dev->ib_dev, ucmd.buf_addr, buf->size, - IB_ACCESS_LOCAL_WRITE); - if (IS_ERR(*umem)) - return PTR_ERR(*umem); - - if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) - mtt->mtt_type = MTT_TYPE_CQE; - else - mtt->mtt_type = MTT_TYPE_WQE; - - npages = DIV_ROUND_UP(ib_umem_page_count(*umem), - 1 << hr_dev->caps.cqe_buf_pg_sz); - ret = hns_roce_mtt_init(hr_dev, npages, buf->page_shift, mtt); - if (ret) - goto err_buf; - - ret = hns_roce_ib_umem_write_mtt(hr_dev, mtt, *umem); - if (ret) - goto err_mtt; - - return 0; - -err_mtt: - hns_roce_mtt_cleanup(hr_dev, mtt); - -err_buf: - ib_umem_release(*umem); - return ret; -} - -static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) +static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, + struct ib_udata *udata, unsigned long addr) { - struct hns_roce_buf *buf = &hr_cq->buf; - struct hns_roce_mtt *mtt = &hr_cq->mtt; - int ret; - - ret = hns_roce_buf_alloc(hr_dev, buf->size, (1 << buf->page_shift) * 2, - buf, buf->page_shift); - if (ret) - goto out; - - if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) - mtt->mtt_type = MTT_TYPE_CQE; - else - mtt->mtt_type = MTT_TYPE_WQE; - - ret = hns_roce_mtt_init(hr_dev, buf->npages, buf->page_shift, mtt); - if (ret) - goto err_buf; - - ret = hns_roce_buf_write_mtt(hr_dev, mtt, buf); - if (ret) - goto err_mtt; - - return 0; - -err_mtt: - hns_roce_mtt_cleanup(hr_dev, mtt); - -err_buf: - hns_roce_buf_free(hr_dev, buf->size, buf); - -out: - return ret; + struct ib_device *ibdev = &hr_dev->ib_dev; + struct hns_roce_buf_attr buf_attr = {}; + int err; + + buf_attr.page_shift = hr_dev->caps.cqe_buf_pg_sz + PAGE_ADDR_SHIFT; + buf_attr.region[0].size = hr_cq->cq_depth * hr_dev->caps.cq_entry_sz; + buf_attr.region[0].hopnum = hr_dev->caps.cqe_hop_num; + buf_attr.region_count = 1; + buf_attr.fixed_page = true; + + err = hns_roce_mtr_create(hr_dev, &hr_cq->mtr, &buf_attr, + hr_dev->caps.cqe_ba_pg_sz + PAGE_ADDR_SHIFT, + udata, addr); + if (err) + ibdev_err(ibdev, "Failed to alloc CQ mtr, err %d\n", err); + + return err; } static void free_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) { - hns_roce_buf_free(hr_dev, hr_cq->buf.size, &hr_cq->buf); + hns_roce_mtr_destroy(hr_dev, &hr_cq->mtr); } -static int create_user_cq(struct hns_roce_dev *hr_dev, - struct hns_roce_cq *hr_cq, - struct ib_udata *udata, - struct hns_roce_ib_create_cq_resp *resp) +static int alloc_cq_db(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, + struct ib_udata *udata, unsigned long addr, + struct hns_roce_ib_create_cq_resp *resp) { - struct hns_roce_ib_create_cq ucmd; - struct device *dev = hr_dev->dev; - int ret; - struct hns_roce_ucontext *context = rdma_udata_to_drv_context( - udata, struct hns_roce_ucontext, ibucontext); - - if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { - dev_err(dev, "Failed to copy_from_udata.\n"); - return -EFAULT; - } + bool has_db = hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB; + struct hns_roce_ucontext *uctx; + int err; - /* Get user space address, write it into mtt table */ - ret = get_cq_umem(hr_dev, hr_cq, ucmd, udata); - if (ret) { - dev_err(dev, "Failed to get_cq_umem.\n"); - return ret; - } - - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB && - udata->outlen >= offsetofend(typeof(*resp), cap_flags)) { - ret = hns_roce_db_map_user(context, udata, ucmd.db_addr, - &hr_cq->db); - if (ret) { - dev_err(dev, "cq record doorbell map failed!\n"); - goto err_mtt; + if (udata) { + if (has_db && + udata->outlen >= offsetofend(typeof(*resp), cap_flags)) { + uctx = rdma_udata_to_drv_context(udata, + struct hns_roce_ucontext, ibucontext); + err = hns_roce_db_map_user(uctx, udata, addr, + &hr_cq->db); + if (err) + return err; + hr_cq->db_en = 1; + resp->cap_flags |= HNS_ROCE_SUPPORT_CQ_RECORD_DB; } - hr_cq->db_en = 1; - resp->cap_flags |= HNS_ROCE_SUPPORT_CQ_RECORD_DB; - } - - return 0; - -err_mtt: - hns_roce_mtt_cleanup(hr_dev, &hr_cq->mtt); - ib_umem_release(hr_cq->umem); - - return ret; -} - -static int create_kernel_cq(struct hns_roce_dev *hr_dev, - struct hns_roce_cq *hr_cq) -{ - struct device *dev = hr_dev->dev; - int ret; - - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) { - ret = hns_roce_alloc_db(hr_dev, &hr_cq->db, 1); - if (ret) - return ret; - - hr_cq->set_ci_db = hr_cq->db.db_record; - *hr_cq->set_ci_db = 0; - hr_cq->db_en = 1; - } - - /* Init mtt table and write buff address to mtt table */ - ret = alloc_cq_buf(hr_dev, hr_cq); - if (ret) { - dev_err(dev, "Failed to alloc_cq_buf.\n"); - goto err_db; + } else { + if (has_db) { + err = hns_roce_alloc_db(hr_dev, &hr_cq->db, 1); + if (err) + return err; + hr_cq->set_ci_db = hr_cq->db.db_record; + *hr_cq->set_ci_db = 0; + hr_cq->db_en = 1; + } + hr_cq->cq_db_l = hr_dev->reg_base + hr_dev->odb_offset + + DB_REG_OFFSET * hr_dev->priv_uar.index; } - hr_cq->cq_db_l = hr_dev->reg_base + hr_dev->odb_offset + - DB_REG_OFFSET * hr_dev->priv_uar.index; - return 0; - -err_db: - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) - hns_roce_free_db(hr_dev, &hr_cq->db); - - return ret; } -static void destroy_user_cq(struct hns_roce_dev *hr_dev, - struct hns_roce_cq *hr_cq, - struct ib_udata *udata, - struct hns_roce_ib_create_cq_resp *resp) +static void free_cq_db(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, + struct ib_udata *udata) { - struct hns_roce_ucontext *context = rdma_udata_to_drv_context( - udata, struct hns_roce_ucontext, ibucontext); + struct hns_roce_ucontext *uctx; - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB && - udata->outlen >= offsetofend(typeof(*resp), cap_flags)) - hns_roce_db_unmap_user(context, &hr_cq->db); - - hns_roce_mtt_cleanup(hr_dev, &hr_cq->mtt); - ib_umem_release(hr_cq->umem); -} - -static void destroy_kernel_cq(struct hns_roce_dev *hr_dev, - struct hns_roce_cq *hr_cq) -{ - hns_roce_mtt_cleanup(hr_dev, &hr_cq->mtt); - free_cq_buf(hr_dev, hr_cq); + if (!hr_cq->db_en) + return; - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) + hr_cq->db_en = 0; + if (udata) { + uctx = rdma_udata_to_drv_context(udata, + struct hns_roce_ucontext, + ibucontext); + hns_roce_db_unmap_user(uctx, &hr_cq->db); + } else { hns_roce_free_db(hr_dev, &hr_cq->db); + } } int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, @@ -345,20 +230,21 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device); struct hns_roce_ib_create_cq_resp resp = {}; struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq); - struct device *dev = hr_dev->dev; + struct ib_device *ibdev = &hr_dev->ib_dev; + struct hns_roce_ib_create_cq ucmd = {}; int vector = attr->comp_vector; u32 cq_entries = attr->cqe; int ret; if (cq_entries < 1 || cq_entries > hr_dev->caps.max_cqes) { - dev_err(dev, "Create CQ failed. entries=%d, max=%d\n", - cq_entries, hr_dev->caps.max_cqes); + ibdev_err(ibdev, "Failed to check CQ count %d max=%d\n", + cq_entries, hr_dev->caps.max_cqes); return -EINVAL; } if (vector >= hr_dev->caps.num_comp_vectors) { - dev_err(dev, "Create CQ failed, vector=%d, max=%d\n", - vector, hr_dev->caps.num_comp_vectors); + ibdev_err(ibdev, "Failed to check CQ vector=%d max=%d\n", + vector, hr_dev->caps.num_comp_vectors); return -EINVAL; } @@ -367,30 +253,35 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, hr_cq->ib_cq.cqe = cq_entries - 1; /* used as cqe index */ hr_cq->cq_depth = cq_entries; hr_cq->vector = vector; - hr_cq->buf.size = hr_cq->cq_depth * hr_dev->caps.cq_entry_sz; - hr_cq->buf.page_shift = PAGE_SHIFT + hr_dev->caps.cqe_buf_pg_sz; spin_lock_init(&hr_cq->lock); INIT_LIST_HEAD(&hr_cq->sq_list); INIT_LIST_HEAD(&hr_cq->rq_list); if (udata) { - ret = create_user_cq(hr_dev, hr_cq, udata, &resp); - if (ret) { - dev_err(dev, "Create cq failed in user mode!\n"); - goto err_cq; - } - } else { - ret = create_kernel_cq(hr_dev, hr_cq); + ret = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); if (ret) { - dev_err(dev, "Create cq failed in kernel mode!\n"); - goto err_cq; + ibdev_err(ibdev, "Failed to copy CQ udata, err %d\n", + ret); + return ret; } } - ret = hns_roce_alloc_cqc(hr_dev, hr_cq); + ret = alloc_cq_buf(hr_dev, hr_cq, udata, ucmd.buf_addr); + if (ret) { + ibdev_err(ibdev, "Failed to alloc CQ buf, err %d\n", ret); + return ret; + } + + ret = alloc_cq_db(hr_dev, hr_cq, udata, ucmd.db_addr, &resp); if (ret) { - dev_err(dev, "Alloc CQ failed(%d).\n", ret); - goto err_dbmap; + ibdev_err(ibdev, "Failed to alloc CQ db, err %d\n", ret); + goto err_cq_buf; + } + + ret = alloc_cqc(hr_dev, hr_cq); + if (ret) { + ibdev_err(ibdev, "Failed to alloc CQ context, err %d\n", ret); + goto err_cq_db; } /* @@ -412,15 +303,11 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, return 0; err_cqc: - hns_roce_free_cqc(hr_dev, hr_cq); - -err_dbmap: - if (udata) - destroy_user_cq(hr_dev, hr_cq, udata, &resp); - else - destroy_kernel_cq(hr_dev, hr_cq); - -err_cq: + free_cqc(hr_dev, hr_cq); +err_cq_db: + free_cq_db(hr_dev, hr_cq, udata); +err_cq_buf: + free_cq_buf(hr_dev, hr_cq); return ret; } @@ -429,28 +316,12 @@ void hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device); struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq); - if (hr_dev->hw->destroy_cq) { + if (hr_dev->hw->destroy_cq) hr_dev->hw->destroy_cq(ib_cq, udata); - return; - } - - hns_roce_free_cqc(hr_dev, hr_cq); - hns_roce_mtt_cleanup(hr_dev, &hr_cq->mtt); - ib_umem_release(hr_cq->umem); - if (udata) { - if (hr_cq->db_en == 1) - hns_roce_db_unmap_user(rdma_udata_to_drv_context( - udata, - struct hns_roce_ucontext, - ibucontext), - &hr_cq->db); - } else { - /* Free the buff of stored cq */ - free_cq_buf(hr_dev, hr_cq); - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) - hns_roce_free_db(hr_dev, &hr_cq->db); - } + free_cq_buf(hr_dev, hr_cq); + free_cq_db(hr_dev, hr_cq, udata); + free_cqc(hr_dev, hr_cq); } void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index f6b3cf6b95d6..4fcd608ee55f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -131,8 +131,8 @@ enum { }; enum { - HNS_ROCE_SUPPORT_RQ_RECORD_DB = 1 << 0, - HNS_ROCE_SUPPORT_SQ_RECORD_DB = 1 << 1, + HNS_ROCE_QP_CAP_RQ_RECORD_DB = BIT(0), + HNS_ROCE_QP_CAP_SQ_RECORD_DB = BIT(1), }; enum { @@ -209,6 +209,8 @@ enum { HNS_ROCE_OPCODE_RDMA_WITH_IMM_RECEIVE = 0x07, }; +#define HNS_ROCE_CAP_FLAGS_EX_SHIFT 12 + enum { HNS_ROCE_CAP_FLAG_REREG_MR = BIT(0), HNS_ROCE_CAP_FLAG_ROCE_V1_V2 = BIT(1), @@ -222,13 +224,6 @@ enum { HNS_ROCE_CAP_FLAG_ATOMIC = BIT(10), }; -enum hns_roce_mtt_type { - MTT_TYPE_WQE, - MTT_TYPE_CQE, - MTT_TYPE_SRQWQE, - MTT_TYPE_IDX -}; - #define HNS_ROCE_DB_TYPE_COUNT 2 #define HNS_ROCE_DB_UNIT_SIZE 4 @@ -267,10 +262,11 @@ enum { #define HNS_ROCE_PORT_DOWN 0 #define HNS_ROCE_PORT_UP 1 -#define HNS_ROCE_MTT_ENTRY_PER_SEG 8 - #define PAGE_ADDR_SHIFT 12 +/* The minimum page count for hardware access page directly. */ +#define HNS_HW_DIRECT_PAGE_COUNT 2 + struct hns_roce_uar { u64 pfn; unsigned long index; @@ -300,22 +296,6 @@ struct hns_roce_bitmap { unsigned long *table; }; -/* Order bitmap length -- bit num compute formula: 1 << (max_order - order) */ -/* Order = 0: bitmap is biggest, order = max bitmap is least (only a bit) */ -/* Every bit repesent to a partner free/used status in bitmap */ -/* - * Initial, bits of other bitmap are all 0 except that a bit of max_order is 1 - * Bit = 1 represent to idle and available; bit = 0: not available - */ -struct hns_roce_buddy { - /* Members point to every order level bitmap */ - unsigned long **bits; - /* Represent to avail bits of the order level bitmap */ - u32 *num_free; - int max_order; - spinlock_t lock; -}; - /* For Hardware Entry Memory */ struct hns_roce_hem_table { /* HEM type: 0 = qpc, 1 = mtt, 2 = cqc, 3 = srq, 4 = other */ @@ -336,13 +316,6 @@ struct hns_roce_hem_table { dma_addr_t *bt_l0_dma_addr; }; -struct hns_roce_mtt { - unsigned long first_seg; - int order; - int page_shift; - enum hns_roce_mtt_type mtt_type; -}; - struct hns_roce_buf_region { int offset; /* page offset */ u32 count; /* page count */ @@ -357,13 +330,32 @@ struct hns_roce_hem_list { struct list_head mid_bt[HNS_ROCE_MAX_BT_REGION][HNS_ROCE_MAX_BT_LEVEL]; struct list_head btm_bt; /* link all bottom bt in @mid_bt */ dma_addr_t root_ba; /* pointer to the root ba table */ - int bt_pg_shift; +}; + +struct hns_roce_buf_attr { + struct { + size_t size; /* region size */ + int hopnum; /* multi-hop addressing hop num */ + } region[HNS_ROCE_MAX_BT_REGION]; + int region_count; /* valid region count */ + int page_shift; /* buffer page shift */ + bool fixed_page; /* decide page shift is fixed-size or maximum size */ + int user_access; /* umem access flag */ + bool mtt_only; /* only alloc buffer-required MTT memory */ }; /* memory translate region */ struct hns_roce_mtr { - struct hns_roce_hem_list hem_list; - int buf_pg_shift; + struct hns_roce_hem_list hem_list; /* multi-hop addressing resource */ + struct ib_umem *umem; /* user space buffer */ + struct hns_roce_buf *kmem; /* kernel space buffer */ + struct { + dma_addr_t root_ba; /* root BA table's address */ + bool is_direct; /* addressing without BA table */ + int ba_pg_shift; /* BA table page shift */ + int buf_pg_shift; /* buffer page shift */ + int buf_pg_count; /* buffer page count */ + } hem_cfg; /* config for hardware addressing */ }; struct hns_roce_mw { @@ -381,43 +373,22 @@ struct hns_roce_mw { struct hns_roce_mr { struct ib_mr ibmr; - struct ib_umem *umem; u64 iova; /* MR's virtual orignal addr */ u64 size; /* Address range of MR */ u32 key; /* Key of MR */ u32 pd; /* PD num of MR */ u32 access; /* Access permission of MR */ - u32 npages; int enabled; /* MR's active status */ int type; /* MR's register type */ - u64 *pbl_buf; /* MR's PBL space */ - dma_addr_t pbl_dma_addr; /* MR's PBL space PA */ - u32 pbl_size; /* PA number in the PBL */ - u64 pbl_ba; /* page table address */ - u32 l0_chunk_last_num; /* L0 last number */ - u32 l1_chunk_last_num; /* L1 last number */ - u64 **pbl_bt_l2; /* PBL BT L2 */ - u64 **pbl_bt_l1; /* PBL BT L1 */ - u64 *pbl_bt_l0; /* PBL BT L0 */ - dma_addr_t *pbl_l2_dma_addr; /* PBL BT L2 dma addr */ - dma_addr_t *pbl_l1_dma_addr; /* PBL BT L1 dma addr */ - dma_addr_t pbl_l0_dma_addr; /* PBL BT L0 dma addr */ - u32 pbl_ba_pg_sz; /* BT chunk page size */ - u32 pbl_buf_pg_sz; /* buf chunk page size */ u32 pbl_hop_num; /* multi-hop number */ + struct hns_roce_mtr pbl_mtr; + u32 npages; + dma_addr_t *page_list; }; struct hns_roce_mr_table { struct hns_roce_bitmap mtpt_bitmap; - struct hns_roce_buddy mtt_buddy; - struct hns_roce_hem_table mtt_table; struct hns_roce_hem_table mtpt_table; - struct hns_roce_buddy mtt_cqe_buddy; - struct hns_roce_hem_table mtt_cqe_table; - struct hns_roce_buddy mtt_srqwqe_buddy; - struct hns_roce_hem_table mtt_srqwqe_table; - struct hns_roce_buddy mtt_idx_buddy; - struct hns_roce_hem_table mtt_idx_table; }; struct hns_roce_wq { @@ -446,7 +417,6 @@ struct hns_roce_buf_list { struct hns_roce_buf { struct hns_roce_buf_list direct; struct hns_roce_buf_list *page_list; - int nbufs; u32 npages; u32 size; int page_shift; @@ -482,12 +452,10 @@ struct hns_roce_db { struct hns_roce_cq { struct ib_cq ib_cq; - struct hns_roce_buf buf; - struct hns_roce_mtt mtt; + struct hns_roce_mtr mtr; struct hns_roce_db db; u8 db_en; spinlock_t lock; - struct ib_umem *umem; u32 cq_depth; u32 cons_index; u32 *set_ci_db; @@ -505,11 +473,8 @@ struct hns_roce_cq { }; struct hns_roce_idx_que { - struct hns_roce_buf idx_buf; - int entry_sz; - u32 buf_size; - struct ib_umem *umem; - struct hns_roce_mtt mtt; + struct hns_roce_mtr mtr; + int entry_shift; unsigned long *bitmap; }; @@ -524,10 +489,9 @@ struct hns_roce_srq { atomic_t refcount; struct completion free; - struct hns_roce_buf buf; + struct hns_roce_mtr buf_mtr; + u64 *wrid; - struct ib_umem *umem; - struct hns_roce_mtt mtt; struct hns_roce_idx_que idx_que; spinlock_t lock; int head; @@ -656,20 +620,15 @@ struct hns_roce_work { struct hns_roce_qp { struct ib_qp ibqp; - struct hns_roce_buf hr_buf; struct hns_roce_wq rq; struct hns_roce_db rdb; struct hns_roce_db sdb; - u8 rdb_en; - u8 sdb_en; + unsigned long en_flags; u32 doorbell_qpn; u32 sq_signal_bits; struct hns_roce_wq sq; - struct ib_umem *umem; - struct hns_roce_mtt mtt; struct hns_roce_mtr mtr; - int wqe_bt_pg_shift; u32 buff_size; struct mutex mutex; @@ -769,17 +728,11 @@ struct hns_roce_eq { int over_ignore; int coalesce; int arm_st; - u64 eqe_ba; - int eqe_ba_pg_sz; - int eqe_buf_pg_sz; int hop_num; struct hns_roce_mtr mtr; - struct hns_roce_buf buf; int eq_max_cnt; int eq_period; int shift; - dma_addr_t cur_eqe_ba; - dma_addr_t nxt_eqe_ba; int event_type; int sub_type; }; @@ -1102,15 +1055,64 @@ static inline struct hns_roce_qp return xa_load(&hr_dev->qp_table_xa, qpn & (hr_dev->caps.num_qps - 1)); } +static inline bool hns_roce_buf_is_direct(struct hns_roce_buf *buf) +{ + if (buf->page_list) + return false; + + return true; +} + static inline void *hns_roce_buf_offset(struct hns_roce_buf *buf, int offset) { - u32 page_size = 1 << buf->page_shift; + if (hns_roce_buf_is_direct(buf)) + return (char *)(buf->direct.buf) + (offset & (buf->size - 1)); - if (buf->nbufs == 1) - return (char *)(buf->direct.buf) + offset; + return (char *)(buf->page_list[offset >> buf->page_shift].buf) + + (offset & ((1 << buf->page_shift) - 1)); +} + +static inline dma_addr_t hns_roce_buf_page(struct hns_roce_buf *buf, int idx) +{ + if (hns_roce_buf_is_direct(buf)) + return buf->direct.map + ((dma_addr_t)idx << buf->page_shift); else - return (char *)(buf->page_list[offset >> buf->page_shift].buf) + - (offset & (page_size - 1)); + return buf->page_list[idx].map; +} + +#define hr_hw_page_align(x) ALIGN(x, 1 << PAGE_ADDR_SHIFT) + +static inline u64 to_hr_hw_page_addr(u64 addr) +{ + return addr >> PAGE_ADDR_SHIFT; +} + +static inline u32 to_hr_hw_page_shift(u32 page_shift) +{ + return page_shift - PAGE_ADDR_SHIFT; +} + +static inline u32 to_hr_hem_hopnum(u32 hopnum, u32 count) +{ + if (count > 0) + return hopnum == HNS_ROCE_HOP_NUM_0 ? 0 : hopnum; + + return 0; +} + +static inline u32 to_hr_hem_entries_size(u32 count, u32 buf_shift) +{ + return hr_hw_page_align(count << buf_shift); +} + +static inline u32 to_hr_hem_entries_count(u32 count, u32 buf_shift) +{ + return hr_hw_page_align(count << buf_shift) >> buf_shift; +} + +static inline u32 to_hr_hem_entries_shift(u32 count, u32 buf_shift) +{ + return ilog2(to_hr_hem_entries_count(count, buf_shift)); } int hns_roce_init_uar_table(struct hns_roce_dev *dev); @@ -1125,25 +1127,18 @@ void hns_roce_cmd_event(struct hns_roce_dev *hr_dev, u16 token, u8 status, int hns_roce_cmd_use_events(struct hns_roce_dev *hr_dev); void hns_roce_cmd_use_polling(struct hns_roce_dev *hr_dev); -int hns_roce_mtt_init(struct hns_roce_dev *hr_dev, int npages, int page_shift, - struct hns_roce_mtt *mtt); -void hns_roce_mtt_cleanup(struct hns_roce_dev *hr_dev, - struct hns_roce_mtt *mtt); -int hns_roce_buf_write_mtt(struct hns_roce_dev *hr_dev, - struct hns_roce_mtt *mtt, struct hns_roce_buf *buf); - -void hns_roce_mtr_init(struct hns_roce_mtr *mtr, int bt_pg_shift, - int buf_pg_shift); -int hns_roce_mtr_attach(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, - dma_addr_t **bufs, struct hns_roce_buf_region *regions, - int region_cnt); -void hns_roce_mtr_cleanup(struct hns_roce_dev *hr_dev, - struct hns_roce_mtr *mtr); - /* hns roce hw need current block and next block addr from mtt */ #define MTT_MIN_COUNT 2 int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, int offset, u64 *mtt_buf, int mtt_max, u64 *base_addr); +int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + struct hns_roce_buf_attr *buf_attr, int page_shift, + struct ib_udata *udata, unsigned long user_addr); +void hns_roce_mtr_destroy(struct hns_roce_dev *hr_dev, + struct hns_roce_mtr *mtr); +int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + struct hns_roce_buf_region *regions, int region_cnt, + dma_addr_t *pages, int page_cnt); int hns_roce_init_pd_table(struct hns_roce_dev *hr_dev); int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev); @@ -1171,8 +1166,8 @@ void hns_roce_bitmap_free_range(struct hns_roce_bitmap *bitmap, unsigned long obj, int cnt, int rr); -int hns_roce_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, - u32 flags, struct ib_udata *udata); +int hns_roce_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr, + struct ib_udata *udata); int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); void hns_roce_destroy_ah(struct ib_ah *ah, u32 flags); @@ -1200,20 +1195,10 @@ struct ib_mw *hns_roce_alloc_mw(struct ib_pd *pd, enum ib_mw_type, struct ib_udata *udata); int hns_roce_dealloc_mw(struct ib_mw *ibmw); -void hns_roce_buf_free(struct hns_roce_dev *hr_dev, u32 size, - struct hns_roce_buf *buf); +void hns_roce_buf_free(struct hns_roce_dev *hr_dev, struct hns_roce_buf *buf); int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct, struct hns_roce_buf *buf, u32 page_shift); -int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev, - struct hns_roce_mtt *mtt, struct ib_umem *umem); - -void hns_roce_init_buf_region(struct hns_roce_buf_region *region, int hopnum, - int offset, int buf_cnt); -int hns_roce_alloc_buf_list(struct hns_roce_buf_region *regions, - dma_addr_t **bufs, int count); -void hns_roce_free_buf_list(dma_addr_t **bufs, int count); - int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, int buf_cnt, int start, struct hns_roce_buf *buf); int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, @@ -1254,8 +1239,6 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, struct ib_udata *udata); void hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata); -void hns_roce_free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq); - int hns_roce_db_map_user(struct hns_roce_ucontext *context, struct ib_udata *udata, unsigned long virt, struct hns_roce_db *db); diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index 263338b90d7a..37d101eec181 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -75,18 +75,6 @@ bool hns_roce_check_whether_mhop(struct hns_roce_dev *hr_dev, u32 type) case HEM_TYPE_CQC_TIMER: hop_num = hr_dev->caps.cqc_timer_hop_num; break; - case HEM_TYPE_CQE: - hop_num = hr_dev->caps.cqe_hop_num; - break; - case HEM_TYPE_MTT: - hop_num = hr_dev->caps.mtt_hop_num; - break; - case HEM_TYPE_SRQWQE: - hop_num = hr_dev->caps.srqwqe_hop_num; - break; - case HEM_TYPE_IDX: - hop_num = hr_dev->caps.idx_hop_num; - break; default: return false; } @@ -195,38 +183,6 @@ static int get_hem_table_config(struct hns_roce_dev *hr_dev, mhop->ba_l0_num = hr_dev->caps.srqc_bt_num; mhop->hop_num = hr_dev->caps.srqc_hop_num; break; - case HEM_TYPE_MTT: - mhop->buf_chunk_size = 1 << (hr_dev->caps.mtt_buf_pg_sz - + PAGE_SHIFT); - mhop->bt_chunk_size = 1 << (hr_dev->caps.mtt_ba_pg_sz - + PAGE_SHIFT); - mhop->ba_l0_num = mhop->bt_chunk_size / BA_BYTE_LEN; - mhop->hop_num = hr_dev->caps.mtt_hop_num; - break; - case HEM_TYPE_CQE: - mhop->buf_chunk_size = 1 << (hr_dev->caps.cqe_buf_pg_sz - + PAGE_SHIFT); - mhop->bt_chunk_size = 1 << (hr_dev->caps.cqe_ba_pg_sz - + PAGE_SHIFT); - mhop->ba_l0_num = mhop->bt_chunk_size / BA_BYTE_LEN; - mhop->hop_num = hr_dev->caps.cqe_hop_num; - break; - case HEM_TYPE_SRQWQE: - mhop->buf_chunk_size = 1 << (hr_dev->caps.srqwqe_buf_pg_sz - + PAGE_SHIFT); - mhop->bt_chunk_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz - + PAGE_SHIFT); - mhop->ba_l0_num = mhop->bt_chunk_size / BA_BYTE_LEN; - mhop->hop_num = hr_dev->caps.srqwqe_hop_num; - break; - case HEM_TYPE_IDX: - mhop->buf_chunk_size = 1 << (hr_dev->caps.idx_buf_pg_sz - + PAGE_SHIFT); - mhop->bt_chunk_size = 1 << (hr_dev->caps.idx_ba_pg_sz - + PAGE_SHIFT); - mhop->ba_l0_num = mhop->bt_chunk_size / BA_BYTE_LEN; - mhop->hop_num = hr_dev->caps.idx_hop_num; - break; default: dev_err(dev, "Table %d not support multi-hop addressing!\n", type); @@ -899,57 +855,6 @@ out: return addr; } -int hns_roce_table_get_range(struct hns_roce_dev *hr_dev, - struct hns_roce_hem_table *table, - unsigned long start, unsigned long end) -{ - struct hns_roce_hem_mhop mhop; - unsigned long inc = table->table_chunk_size / table->obj_size; - unsigned long i = 0; - int ret; - - if (hns_roce_check_whether_mhop(hr_dev, table->type)) { - ret = hns_roce_calc_hem_mhop(hr_dev, table, NULL, &mhop); - if (ret) - goto fail; - inc = mhop.bt_chunk_size / table->obj_size; - } - - /* Allocate MTT entry memory according to chunk(128K) */ - for (i = start; i <= end; i += inc) { - ret = hns_roce_table_get(hr_dev, table, i); - if (ret) - goto fail; - } - - return 0; - -fail: - while (i > start) { - i -= inc; - hns_roce_table_put(hr_dev, table, i); - } - return ret; -} - -void hns_roce_table_put_range(struct hns_roce_dev *hr_dev, - struct hns_roce_hem_table *table, - unsigned long start, unsigned long end) -{ - struct hns_roce_hem_mhop mhop; - unsigned long inc = table->table_chunk_size / table->obj_size; - unsigned long i; - - if (hns_roce_check_whether_mhop(hr_dev, table->type)) { - if (hns_roce_calc_hem_mhop(hr_dev, table, NULL, &mhop)) - return; - inc = mhop.bt_chunk_size / table->obj_size; - } - - for (i = start; i <= end; i += inc) - hns_roce_table_put(hr_dev, table, i); -} - int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table, u32 type, unsigned long obj_size, unsigned long nobj, @@ -1112,12 +1017,6 @@ void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev, void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev) { - if ((hr_dev->caps.num_idx_segs)) - hns_roce_cleanup_hem_table(hr_dev, - &hr_dev->mr_table.mtt_idx_table); - if (hr_dev->caps.num_srqwqe_segs) - hns_roce_cleanup_hem_table(hr_dev, - &hr_dev->mr_table.mtt_srqwqe_table); if (hr_dev->caps.srqc_entry_sz) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->srq_table.table); @@ -1137,10 +1036,6 @@ void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.irrl_table); hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.qp_table); hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtpt_table); - if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) - hns_roce_cleanup_hem_table(hr_dev, - &hr_dev->mr_table.mtt_cqe_table); - hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtt_table); } struct roce_hem_item { @@ -1505,7 +1400,7 @@ err_exit: int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev, struct hns_roce_hem_list *hem_list, const struct hns_roce_buf_region *regions, - int region_cnt) + int region_cnt, int bt_pg_shift) { const struct hns_roce_buf_region *r; int ofs, end; @@ -1519,7 +1414,7 @@ int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev, return -EINVAL; } - unit = (1 << hem_list->bt_pg_shift) / BA_BYTE_LEN; + unit = (1 << bt_pg_shift) / BA_BYTE_LEN; for (i = 0; i < region_cnt; i++) { r = ®ions[i]; if (!r->count) @@ -1566,8 +1461,7 @@ void hns_roce_hem_list_release(struct hns_roce_dev *hr_dev, hem_list->root_ba = 0; } -void hns_roce_hem_list_init(struct hns_roce_hem_list *hem_list, - int bt_page_order) +void hns_roce_hem_list_init(struct hns_roce_hem_list *hem_list) { int i, j; @@ -1576,8 +1470,6 @@ void hns_roce_hem_list_init(struct hns_roce_hem_list *hem_list, for (i = 0; i < HNS_ROCE_MAX_BT_REGION; i++) for (j = 0; j < HNS_ROCE_MAX_BT_LEVEL; j++) INIT_LIST_HEAD(&hem_list->mid_bt[i][j]); - - hem_list->bt_pg_shift = bt_page_order; } void *hns_roce_hem_list_find_mtt(struct hns_roce_dev *hr_dev, diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h index 3bb8f78fb7b0..1fa0bdcb1989 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.h +++ b/drivers/infiniband/hw/hns/hns_roce_hem.h @@ -115,12 +115,6 @@ void hns_roce_table_put(struct hns_roce_dev *hr_dev, void *hns_roce_table_find(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table, unsigned long obj, dma_addr_t *dma_handle); -int hns_roce_table_get_range(struct hns_roce_dev *hr_dev, - struct hns_roce_hem_table *table, - unsigned long start, unsigned long end); -void hns_roce_table_put_range(struct hns_roce_dev *hr_dev, - struct hns_roce_hem_table *table, - unsigned long start, unsigned long end); int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table, u32 type, unsigned long obj_size, unsigned long nobj, @@ -133,14 +127,13 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev, struct hns_roce_hem_mhop *mhop); bool hns_roce_check_whether_mhop(struct hns_roce_dev *hr_dev, u32 type); -void hns_roce_hem_list_init(struct hns_roce_hem_list *hem_list, - int bt_page_order); +void hns_roce_hem_list_init(struct hns_roce_hem_list *hem_list); int hns_roce_hem_list_calc_root_ba(const struct hns_roce_buf_region *regions, int region_cnt, int unit); int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev, struct hns_roce_hem_list *hem_list, const struct hns_roce_buf_region *regions, - int region_cnt); + int region_cnt, int bt_pg_shift); void hns_roce_hem_list_release(struct hns_roce_dev *hr_dev, struct hns_roce_hem_list *hem_list); void *hns_roce_hem_list_find_mtt(struct hns_roce_dev *hr_dev, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 5ff028d77be3..b4b98e818328 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -1099,7 +1099,6 @@ static int hns_roce_v1_dereg_mr(struct hns_roce_dev *hr_dev, struct completion comp; long end = HNS_ROCE_V1_FREE_MR_TIMEOUT_MSECS; unsigned long start = jiffies; - int npages; int ret = 0; priv = (struct hns_roce_v1_priv *)hr_dev->priv; @@ -1146,17 +1145,9 @@ free_mr: dev_dbg(dev, "Free mr 0x%x use 0x%x us.\n", mr->key, jiffies_to_usecs(jiffies) - jiffies_to_usecs(start)); - if (mr->size != ~0ULL) { - npages = ib_umem_page_count(mr->umem); - dma_free_coherent(dev, npages * 8, mr->pbl_buf, - mr->pbl_dma_addr); - } - hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap, key_to_hw_index(mr->key), 0); - - ib_umem_release(mr->umem); - + hns_roce_mtr_destroy(hr_dev, &mr->pbl_mtr); kfree(mr); return ret; @@ -1826,9 +1817,12 @@ static void hns_roce_v1_set_mtu(struct hns_roce_dev *hr_dev, u8 phy_port, static int hns_roce_v1_write_mtpt(void *mb_buf, struct hns_roce_mr *mr, unsigned long mtpt_idx) { + struct hns_roce_dev *hr_dev = to_hr_dev(mr->ibmr.device); + u64 pages[HNS_ROCE_MAX_INNER_MTPT_NUM] = { 0 }; + struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_v1_mpt_entry *mpt_entry; - struct sg_dma_page_iter sg_iter; - u64 *pages; + dma_addr_t pbl_ba; + int count; int i; /* MPT filled into mailbox buf */ @@ -1878,22 +1872,15 @@ static int hns_roce_v1_write_mtpt(void *mb_buf, struct hns_roce_mr *mr, if (mr->type == MR_TYPE_DMA) return 0; - pages = (u64 *) __get_free_page(GFP_KERNEL); - if (!pages) - return -ENOMEM; - - i = 0; - for_each_sg_dma_page(mr->umem->sg_head.sgl, &sg_iter, mr->umem->nmap, 0) { - pages[i] = ((u64)sg_page_iter_dma_address(&sg_iter)) >> 12; - - /* Directly record to MTPT table firstly 7 entry */ - if (i >= HNS_ROCE_MAX_INNER_MTPT_NUM) - break; - i++; + count = hns_roce_mtr_find(hr_dev, &mr->pbl_mtr, 0, pages, + ARRAY_SIZE(pages), &pbl_ba); + if (count < 1) { + ibdev_err(ibdev, "failed to find PBL mtr, count = %d.", count); + return -ENOBUFS; } /* Register user mr */ - for (i = 0; i < HNS_ROCE_MAX_INNER_MTPT_NUM; i++) { + for (i = 0; i < count; i++) { switch (i) { case 0: mpt_entry->pa0_l = cpu_to_le32((u32)(pages[i])); @@ -1959,20 +1946,17 @@ static int hns_roce_v1_write_mtpt(void *mb_buf, struct hns_roce_mr *mr, } } - free_page((unsigned long) pages); - - mpt_entry->pbl_addr_l = cpu_to_le32((u32)(mr->pbl_dma_addr)); - + mpt_entry->pbl_addr_l = cpu_to_le32(pbl_ba); roce_set_field(mpt_entry->mpt_byte_12, MPT_BYTE_12_PBL_ADDR_H_M, - MPT_BYTE_12_PBL_ADDR_H_S, - ((u32)(mr->pbl_dma_addr >> 32))); + MPT_BYTE_12_PBL_ADDR_H_S, upper_32_bits(pbl_ba)); return 0; } static void *get_cqe(struct hns_roce_cq *hr_cq, int n) { - return hns_roce_buf_offset(&hr_cq->buf, n * HNS_ROCE_V1_CQE_ENTRY_SIZE); + return hns_roce_buf_offset(hr_cq->mtr.kmem, + n * HNS_ROCE_V1_CQE_ENTRY_SIZE); } static void *get_sw_cqe(struct hns_roce_cq *hr_cq, int n) @@ -2479,7 +2463,6 @@ static int hns_roce_v1_clear_hem(struct hns_roce_dev *hr_dev, } static int hns_roce_v1_qp_modify(struct hns_roce_dev *hr_dev, - struct hns_roce_mtt *mtt, enum hns_roce_qp_state cur_state, enum hns_roce_qp_state new_state, struct hns_roce_qp_context *context, @@ -2560,6 +2543,29 @@ static int hns_roce_v1_qp_modify(struct hns_roce_dev *hr_dev, return ret; } +static int find_wqe_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, + u64 *sq_ba, u64 *rq_ba, dma_addr_t *bt_ba) +{ + struct ib_device *ibdev = &hr_dev->ib_dev; + int rq_pa_start; + int count; + + count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, 0, sq_ba, 1, bt_ba); + if (count < 1) { + ibdev_err(ibdev, "Failed to find SQ ba\n"); + return -ENOBUFS; + } + rq_pa_start = hr_qp->rq.offset >> hr_qp->mtr.hem_cfg.buf_pg_shift; + count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, rq_pa_start, rq_ba, 1, + NULL); + if (!count) { + ibdev_err(ibdev, "Failed to find RQ ba\n"); + return -ENOBUFS; + } + + return 0; +} + static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, enum ib_qp_state cur_state, enum ib_qp_state new_state) @@ -2567,25 +2573,20 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); struct hns_roce_sqp_context *context; - struct device *dev = &hr_dev->pdev->dev; dma_addr_t dma_handle = 0; u32 __iomem *addr; - int rq_pa_start; + u64 sq_ba = 0; + u64 rq_ba = 0; __le32 tmp; u32 reg_val; - u64 *mtts; context = kzalloc(sizeof(*context), GFP_KERNEL); if (!context) return -ENOMEM; /* Search QP buf's MTTs */ - mtts = hns_roce_table_find(hr_dev, &hr_dev->mr_table.mtt_table, - hr_qp->mtt.first_seg, &dma_handle); - if (!mtts) { - dev_err(dev, "qp buf pa find failed\n"); + if (find_wqe_mtt(hr_dev, hr_qp, &sq_ba, &rq_ba, &dma_handle)) goto out; - } if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { roce_set_field(context->qp1c_bytes_4, @@ -2599,11 +2600,11 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, roce_set_field(context->qp1c_bytes_4, QP1C_BYTES_4_PD_M, QP1C_BYTES_4_PD_S, to_hr_pd(ibqp->pd)->pdn); - context->sq_rq_bt_l = cpu_to_le32((u32)(dma_handle)); + context->sq_rq_bt_l = cpu_to_le32(dma_handle); roce_set_field(context->qp1c_bytes_12, QP1C_BYTES_12_SQ_RQ_BT_H_M, QP1C_BYTES_12_SQ_RQ_BT_H_S, - ((u32)(dma_handle >> 32))); + upper_32_bits(dma_handle)); roce_set_field(context->qp1c_bytes_16, QP1C_BYTES_16_RQ_HEAD_M, QP1C_BYTES_16_RQ_HEAD_S, hr_qp->rq.head); @@ -2624,14 +2625,12 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, roce_set_field(context->qp1c_bytes_20, QP1C_BYTES_20_PKEY_IDX_M, QP1C_BYTES_20_PKEY_IDX_S, attr->pkey_index); - rq_pa_start = (u32)hr_qp->rq.offset / PAGE_SIZE; - context->cur_rq_wqe_ba_l = - cpu_to_le32((u32)(mtts[rq_pa_start])); + context->cur_rq_wqe_ba_l = cpu_to_le32(rq_ba); roce_set_field(context->qp1c_bytes_28, QP1C_BYTES_28_CUR_RQ_WQE_BA_H_M, QP1C_BYTES_28_CUR_RQ_WQE_BA_H_S, - (mtts[rq_pa_start]) >> 32); + upper_32_bits(rq_ba)); roce_set_field(context->qp1c_bytes_28, QP1C_BYTES_28_RQ_CUR_IDX_M, QP1C_BYTES_28_RQ_CUR_IDX_S, 0); @@ -2645,12 +2644,12 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, QP1C_BYTES_32_TX_CQ_NUM_S, to_hr_cq(ibqp->send_cq)->cqn); - context->cur_sq_wqe_ba_l = cpu_to_le32((u32)mtts[0]); + context->cur_sq_wqe_ba_l = cpu_to_le32(sq_ba); roce_set_field(context->qp1c_bytes_40, QP1C_BYTES_40_CUR_SQ_WQE_BA_H_M, QP1C_BYTES_40_CUR_SQ_WQE_BA_H_S, - (mtts[0]) >> 32); + upper_32_bits(sq_ba)); roce_set_field(context->qp1c_bytes_40, QP1C_BYTES_40_SQ_CUR_IDX_M, QP1C_BYTES_40_SQ_CUR_IDX_S, 0); @@ -2704,6 +2703,28 @@ out: return -EINVAL; } +static bool check_qp_state(enum ib_qp_state cur_state, + enum ib_qp_state new_state) +{ + static const bool sm[][IB_QPS_ERR + 1] = { + [IB_QPS_RESET] = { [IB_QPS_RESET] = true, + [IB_QPS_INIT] = true }, + [IB_QPS_INIT] = { [IB_QPS_RESET] = true, + [IB_QPS_INIT] = true, + [IB_QPS_RTR] = true, + [IB_QPS_ERR] = true }, + [IB_QPS_RTR] = { [IB_QPS_RESET] = true, + [IB_QPS_RTS] = true, + [IB_QPS_ERR] = true }, + [IB_QPS_RTS] = { [IB_QPS_RESET] = true, [IB_QPS_ERR] = true }, + [IB_QPS_SQD] = {}, + [IB_QPS_SQE] = {}, + [IB_QPS_ERR] = { [IB_QPS_RESET] = true, [IB_QPS_ERR] = true } + }; + + return sm[cur_state][new_state]; +} + static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, enum ib_qp_state cur_state, enum ib_qp_state new_state) @@ -2716,26 +2737,29 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, dma_addr_t dma_handle_2 = 0; dma_addr_t dma_handle = 0; __le32 doorbell[2] = {0}; - int rq_pa_start = 0; u64 *mtts_2 = NULL; int ret = -EINVAL; - u64 *mtts = NULL; + u64 sq_ba = 0; + u64 rq_ba = 0; int port; u8 port_num; u8 *dmac; u8 *smac; + if (!check_qp_state(cur_state, new_state)) { + ibdev_err(ibqp->device, + "not support QP(%u) status from %d to %d\n", + ibqp->qp_num, cur_state, new_state); + return -EINVAL; + } + context = kzalloc(sizeof(*context), GFP_KERNEL); if (!context) return -ENOMEM; /* Search qp buf's mtts */ - mtts = hns_roce_table_find(hr_dev, &hr_dev->mr_table.mtt_table, - hr_qp->mtt.first_seg, &dma_handle); - if (mtts == NULL) { - dev_err(dev, "qp buf pa find failed\n"); + if (find_wqe_mtt(hr_dev, hr_qp, &sq_ba, &rq_ba, &dma_handle)) goto out; - } /* Search IRRL's mtts */ mtts_2 = hns_roce_table_find(hr_dev, &hr_dev->qp_table.irrl_table, @@ -2890,11 +2914,11 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, dmac = (u8 *)attr->ah_attr.roce.dmac; - context->sq_rq_bt_l = cpu_to_le32((u32)(dma_handle)); + context->sq_rq_bt_l = cpu_to_le32(dma_handle); roce_set_field(context->qpc_bytes_24, QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_M, QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_S, - ((u32)(dma_handle >> 32))); + upper_32_bits(dma_handle)); roce_set_bit(context->qpc_bytes_24, QP_CONTEXT_QPC_BYTE_24_REMOTE_ENABLE_E2E_CREDITS_S, 1); @@ -2993,14 +3017,12 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, QP_CONTEXT_QPC_BYTES_68_RQ_CUR_INDEX_M, QP_CONTEXT_QPC_BYTES_68_RQ_CUR_INDEX_S, 0); - rq_pa_start = (u32)hr_qp->rq.offset / PAGE_SIZE; - context->cur_rq_wqe_ba_l = - cpu_to_le32((u32)(mtts[rq_pa_start])); + context->cur_rq_wqe_ba_l = cpu_to_le32(rq_ba); roce_set_field(context->qpc_bytes_76, QP_CONTEXT_QPC_BYTES_76_CUR_RQ_WQE_BA_H_M, QP_CONTEXT_QPC_BYTES_76_CUR_RQ_WQE_BA_H_S, - mtts[rq_pa_start] >> 32); + upper_32_bits(rq_ba)); roce_set_field(context->qpc_bytes_76, QP_CONTEXT_QPC_BYTES_76_RX_REQ_MSN_M, QP_CONTEXT_QPC_BYTES_76_RX_REQ_MSN_S, 0); @@ -3062,8 +3084,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, QP_CONTEXT_QPC_BYTES_156_SL_S, rdma_ah_get_sl(&attr->ah_attr)); hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr); - } else if (cur_state == IB_QPS_RTR && - new_state == IB_QPS_RTS) { + } else if (cur_state == IB_QPS_RTR && new_state == IB_QPS_RTS) { /* If exist optional param, return error */ if ((attr_mask & IB_QP_ALT_PATH) || (attr_mask & IB_QP_ACCESS_FLAGS) || @@ -3075,12 +3096,12 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, goto out; } - context->rx_cur_sq_wqe_ba_l = cpu_to_le32((u32)(mtts[0])); + context->rx_cur_sq_wqe_ba_l = cpu_to_le32(sq_ba); roce_set_field(context->qpc_bytes_120, QP_CONTEXT_QPC_BYTES_120_RX_CUR_SQ_WQE_BA_H_M, QP_CONTEXT_QPC_BYTES_120_RX_CUR_SQ_WQE_BA_H_S, - (mtts[0]) >> 32); + upper_32_bits(sq_ba)); roce_set_field(context->qpc_bytes_124, QP_CONTEXT_QPC_BYTES_124_RX_ACK_MSN_M, @@ -3223,28 +3244,18 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_M, QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_S, 0); - context->tx_cur_sq_wqe_ba_l = cpu_to_le32((u32)(mtts[0])); + context->tx_cur_sq_wqe_ba_l = cpu_to_le32(sq_ba); roce_set_field(context->qpc_bytes_188, QP_CONTEXT_QPC_BYTES_188_TX_CUR_SQ_WQE_BA_H_M, QP_CONTEXT_QPC_BYTES_188_TX_CUR_SQ_WQE_BA_H_S, - (mtts[0]) >> 32); + upper_32_bits(sq_ba)); roce_set_bit(context->qpc_bytes_188, QP_CONTEXT_QPC_BYTES_188_PKT_RETRY_FLG_S, 0); roce_set_field(context->qpc_bytes_188, QP_CONTEXT_QPC_BYTES_188_TX_RETRY_CUR_INDEX_M, QP_CONTEXT_QPC_BYTES_188_TX_RETRY_CUR_INDEX_S, 0); - } else if (!((cur_state == IB_QPS_INIT && new_state == IB_QPS_RESET) || - (cur_state == IB_QPS_INIT && new_state == IB_QPS_ERR) || - (cur_state == IB_QPS_RTR && new_state == IB_QPS_RESET) || - (cur_state == IB_QPS_RTR && new_state == IB_QPS_ERR) || - (cur_state == IB_QPS_RTS && new_state == IB_QPS_RESET) || - (cur_state == IB_QPS_RTS && new_state == IB_QPS_ERR) || - (cur_state == IB_QPS_ERR && new_state == IB_QPS_RESET) || - (cur_state == IB_QPS_ERR && new_state == IB_QPS_ERR))) { - dev_err(dev, "not support this status migration\n"); - goto out; } /* Every status migrate must change state */ @@ -3253,8 +3264,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, QP_CONTEXT_QPC_BYTES_144_QP_STATE_S, new_state); /* SW pass context to HW */ - ret = hns_roce_v1_qp_modify(hr_dev, &hr_qp->mtt, - to_hns_roce_state(cur_state), + ret = hns_roce_v1_qp_modify(hr_dev, to_hns_roce_state(cur_state), to_hns_roce_state(new_state), context, hr_qp); if (ret) { @@ -3636,8 +3646,6 @@ static void hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) u32 cqe_cnt_cur; int wait_time = 0; - hns_roce_free_cqc(hr_dev, hr_cq); - /* * Before freeing cq buffer, we need to ensure that the outstanding CQE * have been written by checking the CQE counter. @@ -3660,14 +3668,6 @@ static void hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) } wait_time++; } - - hns_roce_mtt_cleanup(hr_dev, &hr_cq->mtt); - - ib_umem_release(hr_cq->umem); - if (!udata) { - /* Free the buff of stored cq */ - hns_roce_buf_free(hr_dev, hr_cq->buf.size, &hr_cq->buf); - } } static void set_eq_cons_index_v1(struct hns_roce_eq *eq, int req_not) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index c3316672b70e..ebe570aa2323 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -95,6 +95,7 @@ static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, { struct hns_roce_mr *mr = to_hr_mr(wr->mr); struct hns_roce_wqe_frmr_seg *fseg = wqe; + u64 pbl_ba; /* use ib_access_flags */ roce_set_bit(rc_sq_wqe->byte_4, V2_RC_FRMR_WQE_BYTE_4_BIND_EN_S, @@ -109,19 +110,20 @@ static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, wr->access & IB_ACCESS_LOCAL_WRITE ? 1 : 0); /* Data structure reuse may lead to confusion */ - rc_sq_wqe->msg_len = cpu_to_le32(mr->pbl_ba & 0xffffffff); - rc_sq_wqe->inv_key = cpu_to_le32(mr->pbl_ba >> 32); + pbl_ba = mr->pbl_mtr.hem_cfg.root_ba; + rc_sq_wqe->msg_len = cpu_to_le32(lower_32_bits(pbl_ba)); + rc_sq_wqe->inv_key = cpu_to_le32(upper_32_bits(pbl_ba)); rc_sq_wqe->byte_16 = cpu_to_le32(wr->mr->length & 0xffffffff); rc_sq_wqe->byte_20 = cpu_to_le32(wr->mr->length >> 32); rc_sq_wqe->rkey = cpu_to_le32(wr->key); rc_sq_wqe->va = cpu_to_le64(wr->mr->iova); - fseg->pbl_size = cpu_to_le32(mr->pbl_size); + fseg->pbl_size = cpu_to_le32(mr->npages); roce_set_field(fseg->mode_buf_pg_sz, V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_M, V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_S, - mr->pbl_buf_pg_sz + PG_SHIFT_OFFSET); + to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift)); roce_set_bit(fseg->mode_buf_pg_sz, V2_RC_FRMR_WQE_BYTE_40_BLK_MODE_S, 0); } @@ -152,47 +154,24 @@ static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr, unsigned int *sge_ind, int valid_num_sge) { struct hns_roce_v2_wqe_data_seg *dseg; - struct ib_sge *sg; - int num_in_wqe = 0; - int extend_sge_num; - int fi_sge_num; - int se_sge_num; - int shift; - int i; + struct ib_sge *sge = wr->sg_list; + unsigned int idx = *sge_ind; + int cnt = valid_num_sge; - if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) - num_in_wqe = HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE; - extend_sge_num = valid_num_sge - num_in_wqe; - sg = wr->sg_list + num_in_wqe; - shift = qp->hr_buf.page_shift; + if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) { + cnt -= HNS_ROCE_SGE_IN_WQE; + sge += HNS_ROCE_SGE_IN_WQE; + } - /* - * Check whether wr->num_sge sges are in the same page. If not, we - * should calculate how many sges in the first page and the second - * page. - */ - dseg = hns_roce_get_extend_sge(qp, (*sge_ind) & (qp->sge.sge_cnt - 1)); - fi_sge_num = (round_up((uintptr_t)dseg, 1 << shift) - - (uintptr_t)dseg) / - sizeof(struct hns_roce_v2_wqe_data_seg); - if (extend_sge_num > fi_sge_num) { - se_sge_num = extend_sge_num - fi_sge_num; - for (i = 0; i < fi_sge_num; i++) { - set_data_seg_v2(dseg++, sg + i); - (*sge_ind)++; - } - dseg = hns_roce_get_extend_sge(qp, - (*sge_ind) & (qp->sge.sge_cnt - 1)); - for (i = 0; i < se_sge_num; i++) { - set_data_seg_v2(dseg++, sg + fi_sge_num + i); - (*sge_ind)++; - } - } else { - for (i = 0; i < extend_sge_num; i++) { - set_data_seg_v2(dseg++, sg + i); - (*sge_ind)++; - } + while (cnt > 0) { + dseg = hns_roce_get_extend_sge(qp, idx & (qp->sge.sge_cnt - 1)); + set_data_seg_v2(dseg, sge); + idx++; + sge++; + cnt--; } + + *sge_ind = idx; } static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, @@ -230,7 +209,7 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_INLINE_S, 1); } else { - if (valid_num_sge <= HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) { + if (valid_num_sge <= HNS_ROCE_SGE_IN_WQE) { for (i = 0; i < wr->num_sge; i++) { if (likely(wr->sg_list[i].length)) { set_data_seg_v2(dseg, wr->sg_list + i); @@ -243,8 +222,8 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S, (*sge_ind) & (qp->sge.sge_cnt - 1)); - for (i = 0; i < wr->num_sge && - j < HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE; i++) { + for (i = 0; i < wr->num_sge && j < HNS_ROCE_SGE_IN_WQE; + i++) { if (likely(wr->sg_list[i].length)) { set_data_seg_v2(dseg, wr->sg_list + i); dseg++; @@ -673,7 +652,7 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, } /* rq support inline data */ - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) { + if (hr_qp->rq_inl_buf.wqe_cnt) { sge_list = hr_qp->rq_inl_buf.wqe_list[wqe_idx].sg_list; hr_qp->rq_inl_buf.wqe_list[wqe_idx].sge_cnt = (u32)wr->num_sge; @@ -715,6 +694,128 @@ out: return ret; } +static void *get_srq_wqe(struct hns_roce_srq *srq, int n) +{ + return hns_roce_buf_offset(srq->buf_mtr.kmem, n << srq->wqe_shift); +} + +static void *get_idx_buf(struct hns_roce_idx_que *idx_que, int n) +{ + return hns_roce_buf_offset(idx_que->mtr.kmem, + n << idx_que->entry_shift); +} + +static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, int wqe_index) +{ + /* always called with interrupts disabled. */ + spin_lock(&srq->lock); + + bitmap_clear(srq->idx_que.bitmap, wqe_index, 1); + srq->tail++; + + spin_unlock(&srq->lock); +} + +static int find_empty_entry(struct hns_roce_idx_que *idx_que, + unsigned long size) +{ + int wqe_idx; + + if (unlikely(bitmap_full(idx_que->bitmap, size))) + return -ENOSPC; + + wqe_idx = find_first_zero_bit(idx_que->bitmap, size); + + bitmap_set(idx_que->bitmap, wqe_idx, 1); + + return wqe_idx; +} + +static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, + const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device); + struct hns_roce_srq *srq = to_hr_srq(ibsrq); + struct hns_roce_v2_wqe_data_seg *dseg; + struct hns_roce_v2_db srq_db; + unsigned long flags; + __le32 *srq_idx; + int ret = 0; + int wqe_idx; + void *wqe; + int nreq; + int ind; + int i; + + spin_lock_irqsave(&srq->lock, flags); + + ind = srq->head & (srq->wqe_cnt - 1); + + for (nreq = 0; wr; ++nreq, wr = wr->next) { + if (unlikely(wr->num_sge >= srq->max_gs)) { + ret = -EINVAL; + *bad_wr = wr; + break; + } + + if (unlikely(srq->head == srq->tail)) { + ret = -ENOMEM; + *bad_wr = wr; + break; + } + + wqe_idx = find_empty_entry(&srq->idx_que, srq->wqe_cnt); + if (wqe_idx < 0) { + ret = -ENOMEM; + *bad_wr = wr; + break; + } + + wqe = get_srq_wqe(srq, wqe_idx); + dseg = (struct hns_roce_v2_wqe_data_seg *)wqe; + + for (i = 0; i < wr->num_sge; ++i) { + dseg[i].len = cpu_to_le32(wr->sg_list[i].length); + dseg[i].lkey = cpu_to_le32(wr->sg_list[i].lkey); + dseg[i].addr = cpu_to_le64(wr->sg_list[i].addr); + } + + if (i < srq->max_gs) { + dseg[i].len = 0; + dseg[i].lkey = cpu_to_le32(0x100); + dseg[i].addr = 0; + } + + srq_idx = get_idx_buf(&srq->idx_que, ind); + *srq_idx = cpu_to_le32(wqe_idx); + + srq->wrid[wqe_idx] = wr->wr_id; + ind = (ind + 1) & (srq->wqe_cnt - 1); + } + + if (likely(nreq)) { + srq->head += nreq; + + /* + * Make sure that descriptors are written before + * doorbell record. + */ + wmb(); + + srq_db.byte_4 = + cpu_to_le32(HNS_ROCE_V2_SRQ_DB << V2_DB_BYTE_4_CMD_S | + (srq->srqn & V2_DB_BYTE_4_TAG_M)); + srq_db.parameter = cpu_to_le32(srq->head); + + hns_roce_write64(hr_dev, (__le32 *)&srq_db, srq->db_reg_l); + } + + spin_unlock_irqrestore(&srq->lock, flags); + + return ret; +} + static int hns_roce_v2_cmd_hw_reseted(struct hns_roce_dev *hr_dev, unsigned long instance_stage, unsigned long reset_stage) @@ -1786,6 +1887,9 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev) caps->flags = roce_get_field(resp_c->cap_flags_num_pds, V2_QUERY_PF_CAPS_C_CAP_FLAGS_M, V2_QUERY_PF_CAPS_C_CAP_FLAGS_S); + caps->flags |= le16_to_cpu(resp_d->cap_flags_ex) << + HNS_ROCE_CAP_FLAGS_EX_SHIFT; + caps->num_cqs = 1 << roce_get_field(resp_c->max_gid_num_cqs, V2_QUERY_PF_CAPS_C_NUM_CQS_M, V2_QUERY_PF_CAPS_C_NUM_CQS_S); @@ -1978,11 +2082,6 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) hr_dev->vendor_part_id = hr_dev->pci_dev->device; hr_dev->sys_image_guid = be64_to_cpu(hr_dev->ib_dev.node_guid); - caps->num_mtt_segs = HNS_ROCE_V2_MAX_MTT_SEGS; - caps->num_cqe_segs = HNS_ROCE_V2_MAX_CQE_SEGS; - caps->num_srqwqe_segs = HNS_ROCE_V2_MAX_SRQWQE_SEGS; - caps->num_idx_segs = HNS_ROCE_V2_MAX_IDX_SEGS; - caps->pbl_ba_pg_sz = HNS_ROCE_BA_PG_SZ_SUPPORTED_16K; caps->pbl_buf_pg_sz = 0; caps->pbl_hop_num = HNS_ROCE_PBL_HOP_NUM; @@ -2040,8 +2139,6 @@ static int hns_roce_config_link_table(struct hns_roce_dev *hr_dev, page_num = link_tbl->npages; entry = link_tbl->table.buf; - memset(req_a, 0, sizeof(*req_a)); - memset(req_b, 0, sizeof(*req_b)); for (i = 0; i < 2; i++) { hns_roce_cmq_setup_basic_desc(&desc[i], opcode, false); @@ -2050,39 +2147,30 @@ static int hns_roce_config_link_table(struct hns_roce_dev *hr_dev, desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); else desc[i].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); - - if (i == 0) { - req_a->base_addr_l = - cpu_to_le32(link_tbl->table.map & 0xffffffff); - req_a->base_addr_h = - cpu_to_le32(link_tbl->table.map >> 32); - roce_set_field(req_a->depth_pgsz_init_en, - CFG_LLM_QUE_DEPTH_M, CFG_LLM_QUE_DEPTH_S, - link_tbl->npages); - roce_set_field(req_a->depth_pgsz_init_en, - CFG_LLM_QUE_PGSZ_M, CFG_LLM_QUE_PGSZ_S, - link_tbl->pg_sz); - req_a->head_ba_l = cpu_to_le32(entry[0].blk_ba0); - req_a->head_ba_h_nxtptr = - cpu_to_le32(entry[0].blk_ba1_nxt_ptr); - roce_set_field(req_a->head_ptr, CFG_LLM_HEAD_PTR_M, - CFG_LLM_HEAD_PTR_S, 0); - } else { - req_b->tail_ba_l = - cpu_to_le32(entry[page_num - 1].blk_ba0); - roce_set_field(req_b->tail_ba_h, CFG_LLM_TAIL_BA_H_M, - CFG_LLM_TAIL_BA_H_S, - entry[page_num - 1].blk_ba1_nxt_ptr & - HNS_ROCE_LINK_TABLE_BA1_M); - roce_set_field(req_b->tail_ptr, CFG_LLM_TAIL_PTR_M, - CFG_LLM_TAIL_PTR_S, - (entry[page_num - 2].blk_ba1_nxt_ptr & - HNS_ROCE_LINK_TABLE_NXT_PTR_M) >> - HNS_ROCE_LINK_TABLE_NXT_PTR_S); - } } + + req_a->base_addr_l = cpu_to_le32(link_tbl->table.map & 0xffffffff); + req_a->base_addr_h = cpu_to_le32(link_tbl->table.map >> 32); + roce_set_field(req_a->depth_pgsz_init_en, CFG_LLM_QUE_DEPTH_M, + CFG_LLM_QUE_DEPTH_S, link_tbl->npages); + roce_set_field(req_a->depth_pgsz_init_en, CFG_LLM_QUE_PGSZ_M, + CFG_LLM_QUE_PGSZ_S, link_tbl->pg_sz); roce_set_field(req_a->depth_pgsz_init_en, CFG_LLM_INIT_EN_M, CFG_LLM_INIT_EN_S, 1); + req_a->head_ba_l = cpu_to_le32(entry[0].blk_ba0); + req_a->head_ba_h_nxtptr = cpu_to_le32(entry[0].blk_ba1_nxt_ptr); + roce_set_field(req_a->head_ptr, CFG_LLM_HEAD_PTR_M, CFG_LLM_HEAD_PTR_S, + 0); + + req_b->tail_ba_l = cpu_to_le32(entry[page_num - 1].blk_ba0); + roce_set_field(req_b->tail_ba_h, CFG_LLM_TAIL_BA_H_M, + CFG_LLM_TAIL_BA_H_S, + entry[page_num - 1].blk_ba1_nxt_ptr & + HNS_ROCE_LINK_TABLE_BA1_M); + roce_set_field(req_b->tail_ptr, CFG_LLM_TAIL_PTR_M, CFG_LLM_TAIL_PTR_S, + (entry[page_num - 2].blk_ba1_nxt_ptr & + HNS_ROCE_LINK_TABLE_NXT_PTR_M) >> + HNS_ROCE_LINK_TABLE_NXT_PTR_S); return hns_roce_cmq_send(hr_dev, desc, 2); } @@ -2438,12 +2526,9 @@ static int hns_roce_v2_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port, reg_smac_l = *(u32 *)(&addr[0]); reg_smac_h = *(u16 *)(&addr[4]); - memset(smac_tb, 0, sizeof(*smac_tb)); - roce_set_field(smac_tb->tb_idx_rsv, - CFG_SMAC_TB_IDX_M, + roce_set_field(smac_tb->tb_idx_rsv, CFG_SMAC_TB_IDX_M, CFG_SMAC_TB_IDX_S, phy_port); - roce_set_field(smac_tb->vf_smac_h_rsv, - CFG_SMAC_TB_VF_SMAC_H_M, + roce_set_field(smac_tb->vf_smac_h_rsv, CFG_SMAC_TB_VF_SMAC_H_M, CFG_SMAC_TB_VF_SMAC_H_S, reg_smac_h); smac_tb->vf_smac_l = cpu_to_le32(reg_smac_l); @@ -2453,32 +2538,30 @@ static int hns_roce_v2_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port, static int set_mtpt_pbl(struct hns_roce_v2_mpt_entry *mpt_entry, struct hns_roce_mr *mr) { - struct sg_dma_page_iter sg_iter; - u64 page_addr; - u64 *pages; - int i; + struct hns_roce_dev *hr_dev = to_hr_dev(mr->ibmr.device); + u64 pages[HNS_ROCE_V2_MAX_INNER_MTPT_NUM] = { 0 }; + struct ib_device *ibdev = &hr_dev->ib_dev; + dma_addr_t pbl_ba; + int i, count; - mpt_entry->pbl_size = cpu_to_le32(mr->pbl_size); - mpt_entry->pbl_ba_l = cpu_to_le32(lower_32_bits(mr->pbl_ba >> 3)); - roce_set_field(mpt_entry->byte_48_mode_ba, - V2_MPT_BYTE_48_PBL_BA_H_M, V2_MPT_BYTE_48_PBL_BA_H_S, - upper_32_bits(mr->pbl_ba >> 3)); + count = hns_roce_mtr_find(hr_dev, &mr->pbl_mtr, 0, pages, + ARRAY_SIZE(pages), &pbl_ba); + if (count < 1) { + ibdev_err(ibdev, "failed to find PBL mtr, count = %d.\n", + count); + return -ENOBUFS; + } - pages = (u64 *)__get_free_page(GFP_KERNEL); - if (!pages) - return -ENOMEM; + /* Aligned to the hardware address access unit */ + for (i = 0; i < count; i++) + pages[i] >>= 6; - i = 0; - for_each_sg_dma_page(mr->umem->sg_head.sgl, &sg_iter, mr->umem->nmap, 0) { - page_addr = sg_page_iter_dma_address(&sg_iter); - pages[i] = page_addr >> 6; + mpt_entry->pbl_size = cpu_to_le32(mr->npages); + mpt_entry->pbl_ba_l = cpu_to_le32(pbl_ba >> 3); + roce_set_field(mpt_entry->byte_48_mode_ba, + V2_MPT_BYTE_48_PBL_BA_H_M, V2_MPT_BYTE_48_PBL_BA_H_S, + upper_32_bits(pbl_ba >> 3)); - /* Record the first 2 entry directly to MTPT table */ - if (i >= HNS_ROCE_V2_MAX_INNER_MTPT_NUM - 1) - goto found; - i++; - } -found: mpt_entry->pa0_l = cpu_to_le32(lower_32_bits(pages[0])); roce_set_field(mpt_entry->byte_56_pa0_h, V2_MPT_BYTE_56_PA0_H_M, V2_MPT_BYTE_56_PA0_H_S, upper_32_bits(pages[0])); @@ -2489,9 +2572,7 @@ found: roce_set_field(mpt_entry->byte_64_buf_pa1, V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M, V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S, - mr->pbl_buf_pg_sz + PG_SHIFT_OFFSET); - - free_page((unsigned long)pages); + to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift)); return 0; } @@ -2513,7 +2594,7 @@ static int hns_roce_v2_write_mtpt(void *mb_buf, struct hns_roce_mr *mr, roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PBL_BA_PG_SZ_M, V2_MPT_BYTE_4_PBL_BA_PG_SZ_S, - mr->pbl_ba_pg_sz + PG_SHIFT_OFFSET); + to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.ba_pg_shift)); roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M, V2_MPT_BYTE_4_PD_S, mr->pd); @@ -2599,11 +2680,19 @@ static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev, static int hns_roce_v2_frmr_write_mtpt(void *mb_buf, struct hns_roce_mr *mr) { + struct hns_roce_dev *hr_dev = to_hr_dev(mr->ibmr.device); + struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_v2_mpt_entry *mpt_entry; + dma_addr_t pbl_ba = 0; mpt_entry = mb_buf; memset(mpt_entry, 0, sizeof(*mpt_entry)); + if (hns_roce_mtr_find(hr_dev, &mr->pbl_mtr, 0, NULL, 0, &pbl_ba) < 0) { + ibdev_err(ibdev, "failed to find frmr mtr.\n"); + return -ENOBUFS; + } + roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M, V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_FREE); roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PBL_HOP_NUM_M, @@ -2611,7 +2700,7 @@ static int hns_roce_v2_frmr_write_mtpt(void *mb_buf, struct hns_roce_mr *mr) roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PBL_BA_PG_SZ_M, V2_MPT_BYTE_4_PBL_BA_PG_SZ_S, - mr->pbl_ba_pg_sz + PG_SHIFT_OFFSET); + to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.ba_pg_shift)); roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M, V2_MPT_BYTE_4_PD_S, mr->pd); @@ -2624,17 +2713,17 @@ static int hns_roce_v2_frmr_write_mtpt(void *mb_buf, struct hns_roce_mr *mr) roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_MR_MW_S, 0); roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_BPD_S, 1); - mpt_entry->pbl_size = cpu_to_le32(mr->pbl_size); + mpt_entry->pbl_size = cpu_to_le32(mr->npages); - mpt_entry->pbl_ba_l = cpu_to_le32(lower_32_bits(mr->pbl_ba >> 3)); + mpt_entry->pbl_ba_l = cpu_to_le32(lower_32_bits(pbl_ba >> 3)); roce_set_field(mpt_entry->byte_48_mode_ba, V2_MPT_BYTE_48_PBL_BA_H_M, V2_MPT_BYTE_48_PBL_BA_H_S, - upper_32_bits(mr->pbl_ba >> 3)); + upper_32_bits(pbl_ba >> 3)); roce_set_field(mpt_entry->byte_64_buf_pa1, V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M, V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S, - mr->pbl_buf_pg_sz + PG_SHIFT_OFFSET); + to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift)); return 0; } @@ -2680,7 +2769,8 @@ static int hns_roce_v2_mw_write_mtpt(void *mb_buf, struct hns_roce_mw *mw) static void *get_cqe_v2(struct hns_roce_cq *hr_cq, int n) { - return hns_roce_buf_offset(&hr_cq->buf, n * HNS_ROCE_V2_CQE_ENTRY_SIZE); + return hns_roce_buf_offset(hr_cq->mtr.kmem, + n * HNS_ROCE_V2_CQE_ENTRY_SIZE); } static void *get_sw_cqe_v2(struct hns_roce_cq *hr_cq, int n) @@ -2697,22 +2787,6 @@ static struct hns_roce_v2_cqe *next_cqe_sw_v2(struct hns_roce_cq *hr_cq) return get_sw_cqe_v2(hr_cq, hr_cq->cons_index); } -static void *get_srq_wqe(struct hns_roce_srq *srq, int n) -{ - return hns_roce_buf_offset(&srq->buf, n << srq->wqe_shift); -} - -static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, int wqe_index) -{ - /* always called with interrupts disabled. */ - spin_lock(&srq->lock); - - bitmap_clear(srq->idx_que.bitmap, wqe_index, 1); - srq->tail++; - - spin_unlock(&srq->lock); -} - static void hns_roce_v2_cq_set_ci(struct hns_roce_cq *hr_cq, u32 cons_index) { *hr_cq->set_ci_db = cons_index & V2_CQ_DB_PARAMETER_CONS_IDX_M; @@ -2801,30 +2875,30 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev, roce_set_field(cq_context->byte_8_cqn, V2_CQC_BYTE_8_CQN_M, V2_CQC_BYTE_8_CQN_S, hr_cq->cqn); - cq_context->cqe_cur_blk_addr = cpu_to_le32(mtts[0] >> PAGE_ADDR_SHIFT); + cq_context->cqe_cur_blk_addr = cpu_to_le32(to_hr_hw_page_addr(mtts[0])); roce_set_field(cq_context->byte_16_hop_addr, V2_CQC_BYTE_16_CQE_CUR_BLK_ADDR_M, V2_CQC_BYTE_16_CQE_CUR_BLK_ADDR_S, - mtts[0] >> (32 + PAGE_ADDR_SHIFT)); + upper_32_bits(to_hr_hw_page_addr(mtts[0]))); roce_set_field(cq_context->byte_16_hop_addr, V2_CQC_BYTE_16_CQE_HOP_NUM_M, V2_CQC_BYTE_16_CQE_HOP_NUM_S, hr_dev->caps.cqe_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 : hr_dev->caps.cqe_hop_num); - cq_context->cqe_nxt_blk_addr = cpu_to_le32(mtts[1] >> PAGE_ADDR_SHIFT); + cq_context->cqe_nxt_blk_addr = cpu_to_le32(to_hr_hw_page_addr(mtts[1])); roce_set_field(cq_context->byte_24_pgsz_addr, V2_CQC_BYTE_24_CQE_NXT_BLK_ADDR_M, V2_CQC_BYTE_24_CQE_NXT_BLK_ADDR_S, - mtts[1] >> (32 + PAGE_ADDR_SHIFT)); + upper_32_bits(to_hr_hw_page_addr(mtts[1]))); roce_set_field(cq_context->byte_24_pgsz_addr, V2_CQC_BYTE_24_CQE_BA_PG_SZ_M, V2_CQC_BYTE_24_CQE_BA_PG_SZ_S, - hr_dev->caps.cqe_ba_pg_sz + PG_SHIFT_OFFSET); + to_hr_hw_page_shift(hr_cq->mtr.hem_cfg.ba_pg_shift)); roce_set_field(cq_context->byte_24_pgsz_addr, V2_CQC_BYTE_24_CQE_BUF_PG_SZ_M, V2_CQC_BYTE_24_CQE_BUF_PG_SZ_S, - hr_dev->caps.cqe_buf_pg_sz + PG_SHIFT_OFFSET); + to_hr_hw_page_shift(hr_cq->mtr.hem_cfg.buf_pg_shift)); cq_context->cqe_ba = cpu_to_le32(dma_handle >> 3); @@ -2968,6 +3042,61 @@ out: return npolled; } +static void get_cqe_status(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp, + struct hns_roce_v2_cqe *cqe, struct ib_wc *wc) +{ + static const struct { + u32 cqe_status; + enum ib_wc_status wc_status; + } map[] = { + { HNS_ROCE_CQE_V2_SUCCESS, IB_WC_SUCCESS }, + { HNS_ROCE_CQE_V2_LOCAL_LENGTH_ERR, IB_WC_LOC_LEN_ERR }, + { HNS_ROCE_CQE_V2_LOCAL_QP_OP_ERR, IB_WC_LOC_QP_OP_ERR }, + { HNS_ROCE_CQE_V2_LOCAL_PROT_ERR, IB_WC_LOC_PROT_ERR }, + { HNS_ROCE_CQE_V2_WR_FLUSH_ERR, IB_WC_WR_FLUSH_ERR }, + { HNS_ROCE_CQE_V2_MW_BIND_ERR, IB_WC_MW_BIND_ERR }, + { HNS_ROCE_CQE_V2_BAD_RESP_ERR, IB_WC_BAD_RESP_ERR }, + { HNS_ROCE_CQE_V2_LOCAL_ACCESS_ERR, IB_WC_LOC_ACCESS_ERR }, + { HNS_ROCE_CQE_V2_REMOTE_INVAL_REQ_ERR, IB_WC_REM_INV_REQ_ERR }, + { HNS_ROCE_CQE_V2_REMOTE_ACCESS_ERR, IB_WC_REM_ACCESS_ERR }, + { HNS_ROCE_CQE_V2_REMOTE_OP_ERR, IB_WC_REM_OP_ERR }, + { HNS_ROCE_CQE_V2_TRANSPORT_RETRY_EXC_ERR, + IB_WC_RETRY_EXC_ERR }, + { HNS_ROCE_CQE_V2_RNR_RETRY_EXC_ERR, IB_WC_RNR_RETRY_EXC_ERR }, + { HNS_ROCE_CQE_V2_REMOTE_ABORT_ERR, IB_WC_REM_ABORT_ERR }, + }; + + u32 cqe_status = roce_get_field(cqe->byte_4, V2_CQE_BYTE_4_STATUS_M, + V2_CQE_BYTE_4_STATUS_S); + int i; + + wc->status = IB_WC_GENERAL_ERR; + for (i = 0; i < ARRAY_SIZE(map); i++) + if (cqe_status == map[i].cqe_status) { + wc->status = map[i].wc_status; + break; + } + + if (wc->status == IB_WC_SUCCESS || wc->status == IB_WC_WR_FLUSH_ERR) + return; + + ibdev_err(&hr_dev->ib_dev, "error cqe status 0x%x:\n", cqe_status); + print_hex_dump(KERN_ERR, "", DUMP_PREFIX_NONE, 16, 4, cqe, + sizeof(*cqe), false); + + /* + * Hip08 hardware cannot flush the WQEs in SQ/RQ if the QP state gets + * into errored mode. Hence, as a workaround to this hardware + * limitation, driver needs to assist in flushing. But the flushing + * operation uses mailbox to convey the QP state to the hardware and + * which can sleep due to the mutex protection around the mailbox calls. + * Hence, use the deferred flush for now. Once wc error detected, the + * flushing operation is needed. + */ + if (!test_and_set_bit(HNS_ROCE_FLUSH_FLAG, &qp->flush_flag)) + init_flush_work(hr_dev, qp); +} + static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, struct hns_roce_qp **cur_qp, struct ib_wc *wc) { @@ -2979,7 +3108,6 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, int is_send; u16 wqe_ctr; u32 opcode; - u32 status; int qpn; int ret; @@ -3009,7 +3137,6 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, *cur_qp = hr_qp; } - hr_qp = *cur_qp; wc->qp = &(*cur_qp)->ibqp; wc->vendor_err = 0; @@ -3044,77 +3171,8 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, ++wq->tail; } - status = roce_get_field(cqe->byte_4, V2_CQE_BYTE_4_STATUS_M, - V2_CQE_BYTE_4_STATUS_S); - switch (status & HNS_ROCE_V2_CQE_STATUS_MASK) { - case HNS_ROCE_CQE_V2_SUCCESS: - wc->status = IB_WC_SUCCESS; - break; - case HNS_ROCE_CQE_V2_LOCAL_LENGTH_ERR: - wc->status = IB_WC_LOC_LEN_ERR; - break; - case HNS_ROCE_CQE_V2_LOCAL_QP_OP_ERR: - wc->status = IB_WC_LOC_QP_OP_ERR; - break; - case HNS_ROCE_CQE_V2_LOCAL_PROT_ERR: - wc->status = IB_WC_LOC_PROT_ERR; - break; - case HNS_ROCE_CQE_V2_WR_FLUSH_ERR: - wc->status = IB_WC_WR_FLUSH_ERR; - break; - case HNS_ROCE_CQE_V2_MW_BIND_ERR: - wc->status = IB_WC_MW_BIND_ERR; - break; - case HNS_ROCE_CQE_V2_BAD_RESP_ERR: - wc->status = IB_WC_BAD_RESP_ERR; - break; - case HNS_ROCE_CQE_V2_LOCAL_ACCESS_ERR: - wc->status = IB_WC_LOC_ACCESS_ERR; - break; - case HNS_ROCE_CQE_V2_REMOTE_INVAL_REQ_ERR: - wc->status = IB_WC_REM_INV_REQ_ERR; - break; - case HNS_ROCE_CQE_V2_REMOTE_ACCESS_ERR: - wc->status = IB_WC_REM_ACCESS_ERR; - break; - case HNS_ROCE_CQE_V2_REMOTE_OP_ERR: - wc->status = IB_WC_REM_OP_ERR; - break; - case HNS_ROCE_CQE_V2_TRANSPORT_RETRY_EXC_ERR: - wc->status = IB_WC_RETRY_EXC_ERR; - break; - case HNS_ROCE_CQE_V2_RNR_RETRY_EXC_ERR: - wc->status = IB_WC_RNR_RETRY_EXC_ERR; - break; - case HNS_ROCE_CQE_V2_REMOTE_ABORT_ERR: - wc->status = IB_WC_REM_ABORT_ERR; - break; - default: - wc->status = IB_WC_GENERAL_ERR; - break; - } - - /* - * Hip08 hardware cannot flush the WQEs in SQ/RQ if the QP state gets - * into errored mode. Hence, as a workaround to this hardware - * limitation, driver needs to assist in flushing. But the flushing - * operation uses mailbox to convey the QP state to the hardware and - * which can sleep due to the mutex protection around the mailbox calls. - * Hence, use the deferred flush for now. Once wc error detected, the - * flushing operation is needed. - */ - if (wc->status != IB_WC_SUCCESS && - wc->status != IB_WC_WR_FLUSH_ERR) { - ibdev_err(&hr_dev->ib_dev, "error cqe status is: 0x%x\n", - status & HNS_ROCE_V2_CQE_STATUS_MASK); - - if (!test_and_set_bit(HNS_ROCE_FLUSH_FLAG, &hr_qp->flush_flag)) - init_flush_work(hr_dev, hr_qp); - - return 0; - } - - if (wc->status == IB_WC_WR_FLUSH_ERR) + get_cqe_status(hr_dev, *cur_qp, cqe, wc); + if (wc->status != IB_WC_SUCCESS) return 0; if (is_send) { @@ -3514,29 +3572,18 @@ static void set_qpc_wqe_cnt(struct hns_roce_qp *hr_qp, struct hns_roce_v2_qp_context *context, struct hns_roce_v2_qp_context *qpc_mask) { - if (hr_qp->ibqp.qp_type == IB_QPT_GSI) - roce_set_field(context->byte_4_sqpn_tst, - V2_QPC_BYTE_4_SGE_SHIFT_M, - V2_QPC_BYTE_4_SGE_SHIFT_S, - ilog2((unsigned int)hr_qp->sge.sge_cnt)); - else - roce_set_field(context->byte_4_sqpn_tst, - V2_QPC_BYTE_4_SGE_SHIFT_M, - V2_QPC_BYTE_4_SGE_SHIFT_S, - hr_qp->sq.max_gs > - HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE ? - ilog2((unsigned int)hr_qp->sge.sge_cnt) : 0); + roce_set_field(context->byte_4_sqpn_tst, + V2_QPC_BYTE_4_SGE_SHIFT_M, V2_QPC_BYTE_4_SGE_SHIFT_S, + to_hr_hem_entries_shift(hr_qp->sge.sge_cnt, + hr_qp->sge.sge_shift)); roce_set_field(context->byte_20_smac_sgid_idx, V2_QPC_BYTE_20_SQ_SHIFT_M, V2_QPC_BYTE_20_SQ_SHIFT_S, - ilog2((unsigned int)hr_qp->sq.wqe_cnt)); + ilog2(hr_qp->sq.wqe_cnt)); roce_set_field(context->byte_20_smac_sgid_idx, V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, - (hr_qp->ibqp.qp_type == IB_QPT_XRC_INI || - hr_qp->ibqp.qp_type == IB_QPT_XRC_TGT || - hr_qp->ibqp.srq) ? 0 : - ilog2((unsigned int)hr_qp->rq.wqe_cnt)); + ilog2(hr_qp->rq.wqe_cnt)); } static void modify_qp_reset_to_init(struct ib_qp *ibqp, @@ -3572,7 +3619,7 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp, roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_VLAN_ID_M, V2_QPC_BYTE_24_VLAN_ID_S, 0xfff); - if (hr_qp->rdb_en) + if (hr_qp->en_flags & HNS_ROCE_QP_CAP_RQ_RECORD_DB) roce_set_bit(context->byte_68_rq_db, V2_QPC_BYTE_68_RQ_RECORD_EN_S, 1); @@ -3757,7 +3804,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, int port; /* Search qp buf's mtts */ - page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT); + page_size = 1 << hr_qp->mtr.hem_cfg.buf_pg_shift; count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, hr_qp->rq.offset / page_size, mtts, MTT_MIN_COUNT, &wqe_sge_ba); @@ -3804,17 +3851,16 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, roce_set_field(context->byte_12_sq_hop, V2_QPC_BYTE_12_SQ_HOP_NUM_M, V2_QPC_BYTE_12_SQ_HOP_NUM_S, - hr_dev->caps.wqe_sq_hop_num == HNS_ROCE_HOP_NUM_0 ? - 0 : hr_dev->caps.wqe_sq_hop_num); + to_hr_hem_hopnum(hr_dev->caps.wqe_sq_hop_num, + hr_qp->sq.wqe_cnt)); roce_set_field(qpc_mask->byte_12_sq_hop, V2_QPC_BYTE_12_SQ_HOP_NUM_M, V2_QPC_BYTE_12_SQ_HOP_NUM_S, 0); roce_set_field(context->byte_20_smac_sgid_idx, V2_QPC_BYTE_20_SGE_HOP_NUM_M, V2_QPC_BYTE_20_SGE_HOP_NUM_S, - ((ibqp->qp_type == IB_QPT_GSI) || - hr_qp->sq.max_gs > HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) ? - hr_dev->caps.wqe_sge_hop_num : 0); + to_hr_hem_hopnum(hr_dev->caps.wqe_sge_hop_num, + hr_qp->sge.sge_cnt)); roce_set_field(qpc_mask->byte_20_smac_sgid_idx, V2_QPC_BYTE_20_SGE_HOP_NUM_M, V2_QPC_BYTE_20_SGE_HOP_NUM_S, 0); @@ -3822,8 +3868,9 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, roce_set_field(context->byte_20_smac_sgid_idx, V2_QPC_BYTE_20_RQ_HOP_NUM_M, V2_QPC_BYTE_20_RQ_HOP_NUM_S, - hr_dev->caps.wqe_rq_hop_num == HNS_ROCE_HOP_NUM_0 ? - 0 : hr_dev->caps.wqe_rq_hop_num); + to_hr_hem_hopnum(hr_dev->caps.wqe_rq_hop_num, + hr_qp->rq.wqe_cnt)); + roce_set_field(qpc_mask->byte_20_smac_sgid_idx, V2_QPC_BYTE_20_RQ_HOP_NUM_M, V2_QPC_BYTE_20_RQ_HOP_NUM_S, 0); @@ -3831,7 +3878,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, roce_set_field(context->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_WQE_SGE_BA_PG_SZ_M, V2_QPC_BYTE_16_WQE_SGE_BA_PG_SZ_S, - hr_qp->wqe_bt_pg_shift + PG_SHIFT_OFFSET); + to_hr_hw_page_shift(hr_qp->mtr.hem_cfg.ba_pg_shift)); roce_set_field(qpc_mask->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_WQE_SGE_BA_PG_SZ_M, V2_QPC_BYTE_16_WQE_SGE_BA_PG_SZ_S, 0); @@ -3839,29 +3886,29 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, roce_set_field(context->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_M, V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_S, - hr_dev->caps.mtt_buf_pg_sz + PG_SHIFT_OFFSET); + to_hr_hw_page_shift(hr_qp->mtr.hem_cfg.buf_pg_shift)); roce_set_field(qpc_mask->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_M, V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_S, 0); - context->rq_cur_blk_addr = cpu_to_le32(mtts[0] >> PAGE_ADDR_SHIFT); + context->rq_cur_blk_addr = cpu_to_le32(to_hr_hw_page_addr(mtts[0])); qpc_mask->rq_cur_blk_addr = 0; roce_set_field(context->byte_92_srq_info, V2_QPC_BYTE_92_RQ_CUR_BLK_ADDR_M, V2_QPC_BYTE_92_RQ_CUR_BLK_ADDR_S, - mtts[0] >> (32 + PAGE_ADDR_SHIFT)); + upper_32_bits(to_hr_hw_page_addr(mtts[0]))); roce_set_field(qpc_mask->byte_92_srq_info, V2_QPC_BYTE_92_RQ_CUR_BLK_ADDR_M, V2_QPC_BYTE_92_RQ_CUR_BLK_ADDR_S, 0); - context->rq_nxt_blk_addr = cpu_to_le32(mtts[1] >> PAGE_ADDR_SHIFT); + context->rq_nxt_blk_addr = cpu_to_le32(to_hr_hw_page_addr(mtts[1])); qpc_mask->rq_nxt_blk_addr = 0; roce_set_field(context->byte_104_rq_sge, V2_QPC_BYTE_104_RQ_NXT_BLK_ADDR_M, V2_QPC_BYTE_104_RQ_NXT_BLK_ADDR_S, - mtts[1] >> (32 + PAGE_ADDR_SHIFT)); + upper_32_bits(to_hr_hw_page_addr(mtts[1]))); roce_set_field(qpc_mask->byte_104_rq_sge, V2_QPC_BYTE_104_RQ_NXT_BLK_ADDR_M, V2_QPC_BYTE_104_RQ_NXT_BLK_ADDR_S, 0); @@ -3928,7 +3975,8 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, /* mtu*(2^LP_PKTN_INI) should not bigger than 1 message length 64kb */ roce_set_field(context->byte_56_dqpn_err, V2_QPC_BYTE_56_LP_PKTN_INI_M, - V2_QPC_BYTE_56_LP_PKTN_INI_S, 0); + V2_QPC_BYTE_56_LP_PKTN_INI_S, + ilog2(hr_dev->caps.max_sq_inline / IB_MTU_4096)); roce_set_field(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_LP_PKTN_INI_M, V2_QPC_BYTE_56_LP_PKTN_INI_S, 0); @@ -3995,18 +4043,18 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, /* Search qp buf's mtts */ count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, 0, &sq_cur_blk, 1, NULL); if (count < 1) { - ibdev_err(ibdev, "failed to find buf pa of QP(0x%lx)\n", + ibdev_err(ibdev, "failed to find QP(0x%lx) SQ buf\n", hr_qp->qpn); return -EINVAL; } - if (hr_qp->sge.offset) { - page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT); + if (hr_qp->sge.sge_cnt > 0) { + page_size = 1 << hr_qp->mtr.hem_cfg.buf_pg_shift; count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, hr_qp->sge.offset / page_size, &sge_cur_blk, 1, NULL); if (count < 1) { - ibdev_err(ibdev, "failed to find sge pa of QP(0x%lx)\n", + ibdev_err(ibdev, "failed to find QP(0x%lx) SGE buf\n", hr_qp->qpn); return -EINVAL; } @@ -4024,38 +4072,33 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, * we should set all bits of the relevant fields in context mask to * 0 at the same time, else set them to 0x1. */ - context->sq_cur_blk_addr = cpu_to_le32(sq_cur_blk >> PAGE_ADDR_SHIFT); + context->sq_cur_blk_addr = cpu_to_le32(to_hr_hw_page_addr(sq_cur_blk)); roce_set_field(context->byte_168_irrl_idx, V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_M, V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_S, - sq_cur_blk >> (32 + PAGE_ADDR_SHIFT)); + upper_32_bits(to_hr_hw_page_addr(sq_cur_blk))); qpc_mask->sq_cur_blk_addr = 0; roce_set_field(qpc_mask->byte_168_irrl_idx, V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_M, V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_S, 0); - context->sq_cur_sge_blk_addr = ((ibqp->qp_type == IB_QPT_GSI) || - hr_qp->sq.max_gs > HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) ? - cpu_to_le32(sge_cur_blk >> - PAGE_ADDR_SHIFT) : 0; + context->sq_cur_sge_blk_addr = + cpu_to_le32(to_hr_hw_page_addr(sge_cur_blk)); roce_set_field(context->byte_184_irrl_idx, V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_M, V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_S, - ((ibqp->qp_type == IB_QPT_GSI) || hr_qp->sq.max_gs > - HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) ? - (sge_cur_blk >> - (32 + PAGE_ADDR_SHIFT)) : 0); + upper_32_bits(to_hr_hw_page_addr(sge_cur_blk))); qpc_mask->sq_cur_sge_blk_addr = 0; roce_set_field(qpc_mask->byte_184_irrl_idx, V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_M, V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_S, 0); context->rx_sq_cur_blk_addr = - cpu_to_le32(sq_cur_blk >> PAGE_ADDR_SHIFT); + cpu_to_le32(to_hr_hw_page_addr(sq_cur_blk)); roce_set_field(context->byte_232_irrl_sge, V2_QPC_BYTE_232_RX_SQ_CUR_BLK_ADDR_M, V2_QPC_BYTE_232_RX_SQ_CUR_BLK_ADDR_S, - sq_cur_blk >> (32 + PAGE_ADDR_SHIFT)); + upper_32_bits(to_hr_hw_page_addr(sq_cur_blk))); qpc_mask->rx_sq_cur_blk_addr = 0; roce_set_field(qpc_mask->byte_232_irrl_sge, V2_QPC_BYTE_232_RX_SQ_CUR_BLK_ADDR_M, @@ -4108,21 +4151,6 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, return 0; } -static inline bool hns_roce_v2_check_qp_stat(enum ib_qp_state cur_state, - enum ib_qp_state new_state) -{ - - if ((cur_state != IB_QPS_RESET && - (new_state == IB_QPS_ERR || new_state == IB_QPS_RESET)) || - ((cur_state == IB_QPS_RTS || cur_state == IB_QPS_SQD) && - (new_state == IB_QPS_RTS || new_state == IB_QPS_SQD)) || - (cur_state == IB_QPS_SQE && new_state == IB_QPS_RTS)) - return true; - - return false; - -} - static int hns_roce_v2_set_path(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, @@ -4226,6 +4254,28 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp, return 0; } +static bool check_qp_state(enum ib_qp_state cur_state, + enum ib_qp_state new_state) +{ + static const bool sm[][IB_QPS_ERR + 1] = { + [IB_QPS_RESET] = { [IB_QPS_RESET] = true, + [IB_QPS_INIT] = true }, + [IB_QPS_INIT] = { [IB_QPS_RESET] = true, + [IB_QPS_INIT] = true, + [IB_QPS_RTR] = true, + [IB_QPS_ERR] = true }, + [IB_QPS_RTR] = { [IB_QPS_RESET] = true, + [IB_QPS_RTS] = true, + [IB_QPS_ERR] = true }, + [IB_QPS_RTS] = { [IB_QPS_RESET] = true, [IB_QPS_ERR] = true }, + [IB_QPS_SQD] = {}, + [IB_QPS_SQE] = {}, + [IB_QPS_ERR] = { [IB_QPS_RESET] = true, [IB_QPS_ERR] = true } + }; + + return sm[cur_state][new_state]; +} + static int hns_roce_v2_set_abs_fields(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, @@ -4237,6 +4287,11 @@ static int hns_roce_v2_set_abs_fields(struct ib_qp *ibqp, struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); int ret = 0; + if (!check_qp_state(cur_state, new_state)) { + ibdev_err(&hr_dev->ib_dev, "Illegal state for QP!\n"); + return -EINVAL; + } + if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { memset(qpc_mask, 0, sizeof(*qpc_mask)); modify_qp_reset_to_init(ibqp, attr, attr_mask, context, @@ -4247,23 +4302,11 @@ static int hns_roce_v2_set_abs_fields(struct ib_qp *ibqp, } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) { ret = modify_qp_init_to_rtr(ibqp, attr, attr_mask, context, qpc_mask); - if (ret) - goto out; } else if (cur_state == IB_QPS_RTR && new_state == IB_QPS_RTS) { ret = modify_qp_rtr_to_rts(ibqp, attr, attr_mask, context, qpc_mask); - if (ret) - goto out; - } else if (hns_roce_v2_check_qp_stat(cur_state, new_state)) { - /* Nothing */ - ; - } else { - ibdev_err(&hr_dev->ib_dev, "Illegal state for QP!\n"); - ret = -EINVAL; - goto out; } -out: return ret; } @@ -4554,19 +4597,20 @@ out: return ret; } -static inline enum ib_qp_state to_ib_qp_st(enum hns_roce_v2_qp_state state) +static int to_ib_qp_st(enum hns_roce_v2_qp_state state) { - switch (state) { - case HNS_ROCE_QP_ST_RST: return IB_QPS_RESET; - case HNS_ROCE_QP_ST_INIT: return IB_QPS_INIT; - case HNS_ROCE_QP_ST_RTR: return IB_QPS_RTR; - case HNS_ROCE_QP_ST_RTS: return IB_QPS_RTS; - case HNS_ROCE_QP_ST_SQ_DRAINING: - case HNS_ROCE_QP_ST_SQD: return IB_QPS_SQD; - case HNS_ROCE_QP_ST_SQER: return IB_QPS_SQE; - case HNS_ROCE_QP_ST_ERR: return IB_QPS_ERR; - default: return -1; - } + static const enum ib_qp_state map[] = { + [HNS_ROCE_QP_ST_RST] = IB_QPS_RESET, + [HNS_ROCE_QP_ST_INIT] = IB_QPS_INIT, + [HNS_ROCE_QP_ST_RTR] = IB_QPS_RTR, + [HNS_ROCE_QP_ST_RTS] = IB_QPS_RTS, + [HNS_ROCE_QP_ST_SQD] = IB_QPS_SQD, + [HNS_ROCE_QP_ST_SQER] = IB_QPS_SQE, + [HNS_ROCE_QP_ST_ERR] = IB_QPS_ERR, + [HNS_ROCE_QP_ST_SQ_DRAINING] = IB_QPS_SQD + }; + + return (state < ARRAY_SIZE(map)) ? map[state] : -1; } static int hns_roce_v2_query_qpc(struct hns_roce_dev *hr_dev, @@ -4838,6 +4882,186 @@ out: return ret; } +static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev, + struct hns_roce_srq *srq, u32 pdn, u16 xrcd, + u32 cqn, void *mb_buf, u64 *mtts_wqe, + u64 *mtts_idx, dma_addr_t dma_handle_wqe, + dma_addr_t dma_handle_idx) +{ + struct hns_roce_srq_context *srq_context; + + srq_context = mb_buf; + memset(srq_context, 0, sizeof(*srq_context)); + + roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQ_ST_M, + SRQC_BYTE_4_SRQ_ST_S, 1); + + roce_set_field(srq_context->byte_4_srqn_srqst, + SRQC_BYTE_4_SRQ_WQE_HOP_NUM_M, + SRQC_BYTE_4_SRQ_WQE_HOP_NUM_S, + to_hr_hem_hopnum(hr_dev->caps.srqwqe_hop_num, + srq->wqe_cnt)); + roce_set_field(srq_context->byte_4_srqn_srqst, + SRQC_BYTE_4_SRQ_SHIFT_M, SRQC_BYTE_4_SRQ_SHIFT_S, + ilog2(srq->wqe_cnt)); + + roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQN_M, + SRQC_BYTE_4_SRQN_S, srq->srqn); + + roce_set_field(srq_context->byte_8_limit_wl, SRQC_BYTE_8_SRQ_LIMIT_WL_M, + SRQC_BYTE_8_SRQ_LIMIT_WL_S, 0); + + roce_set_field(srq_context->byte_12_xrcd, SRQC_BYTE_12_SRQ_XRCD_M, + SRQC_BYTE_12_SRQ_XRCD_S, xrcd); + + srq_context->wqe_bt_ba = cpu_to_le32((u32)(dma_handle_wqe >> 3)); + + roce_set_field(srq_context->byte_24_wqe_bt_ba, + SRQC_BYTE_24_SRQ_WQE_BT_BA_M, + SRQC_BYTE_24_SRQ_WQE_BT_BA_S, + dma_handle_wqe >> 35); + + roce_set_field(srq_context->byte_28_rqws_pd, SRQC_BYTE_28_PD_M, + SRQC_BYTE_28_PD_S, pdn); + roce_set_field(srq_context->byte_28_rqws_pd, SRQC_BYTE_28_RQWS_M, + SRQC_BYTE_28_RQWS_S, srq->max_gs <= 0 ? 0 : + fls(srq->max_gs - 1)); + + srq_context->idx_bt_ba = cpu_to_le32(dma_handle_idx >> 3); + roce_set_field(srq_context->rsv_idx_bt_ba, + SRQC_BYTE_36_SRQ_IDX_BT_BA_M, + SRQC_BYTE_36_SRQ_IDX_BT_BA_S, + dma_handle_idx >> 35); + + srq_context->idx_cur_blk_addr = + cpu_to_le32(to_hr_hw_page_addr(mtts_idx[0])); + roce_set_field(srq_context->byte_44_idxbufpgsz_addr, + SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_M, + SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_S, + upper_32_bits(to_hr_hw_page_addr(mtts_idx[0]))); + roce_set_field(srq_context->byte_44_idxbufpgsz_addr, + SRQC_BYTE_44_SRQ_IDX_HOP_NUM_M, + SRQC_BYTE_44_SRQ_IDX_HOP_NUM_S, + to_hr_hem_hopnum(hr_dev->caps.idx_hop_num, + srq->wqe_cnt)); + + roce_set_field(srq_context->byte_44_idxbufpgsz_addr, + SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_M, + SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_S, + to_hr_hw_page_shift(srq->idx_que.mtr.hem_cfg.ba_pg_shift)); + roce_set_field(srq_context->byte_44_idxbufpgsz_addr, + SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_M, + SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_S, + to_hr_hw_page_shift(srq->idx_que.mtr.hem_cfg.buf_pg_shift)); + + srq_context->idx_nxt_blk_addr = + cpu_to_le32(to_hr_hw_page_addr(mtts_idx[1])); + roce_set_field(srq_context->rsv_idxnxtblkaddr, + SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_M, + SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_S, + upper_32_bits(to_hr_hw_page_addr(mtts_idx[1]))); + roce_set_field(srq_context->byte_56_xrc_cqn, + SRQC_BYTE_56_SRQ_XRC_CQN_M, SRQC_BYTE_56_SRQ_XRC_CQN_S, + cqn); + roce_set_field(srq_context->byte_56_xrc_cqn, + SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_M, + SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_S, + to_hr_hw_page_shift(srq->buf_mtr.hem_cfg.ba_pg_shift)); + roce_set_field(srq_context->byte_56_xrc_cqn, + SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_M, + SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_S, + to_hr_hw_page_shift(srq->buf_mtr.hem_cfg.buf_pg_shift)); + + roce_set_bit(srq_context->db_record_addr_record_en, + SRQC_BYTE_60_SRQ_RECORD_EN_S, 0); +} + +static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq, + struct ib_srq_attr *srq_attr, + enum ib_srq_attr_mask srq_attr_mask, + struct ib_udata *udata) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device); + struct hns_roce_srq *srq = to_hr_srq(ibsrq); + struct hns_roce_srq_context *srq_context; + struct hns_roce_srq_context *srqc_mask; + struct hns_roce_cmd_mailbox *mailbox; + int ret; + + if (srq_attr_mask & IB_SRQ_LIMIT) { + if (srq_attr->srq_limit >= srq->wqe_cnt) + return -EINVAL; + + mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + srq_context = mailbox->buf; + srqc_mask = (struct hns_roce_srq_context *)mailbox->buf + 1; + + memset(srqc_mask, 0xff, sizeof(*srqc_mask)); + + roce_set_field(srq_context->byte_8_limit_wl, + SRQC_BYTE_8_SRQ_LIMIT_WL_M, + SRQC_BYTE_8_SRQ_LIMIT_WL_S, srq_attr->srq_limit); + roce_set_field(srqc_mask->byte_8_limit_wl, + SRQC_BYTE_8_SRQ_LIMIT_WL_M, + SRQC_BYTE_8_SRQ_LIMIT_WL_S, 0); + + ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, srq->srqn, 0, + HNS_ROCE_CMD_MODIFY_SRQC, + HNS_ROCE_CMD_TIMEOUT_MSECS); + hns_roce_free_cmd_mailbox(hr_dev, mailbox); + if (ret) { + ibdev_err(&hr_dev->ib_dev, + "failed to handle cmd of modifying SRQ, ret = %d.\n", + ret); + return ret; + } + } + + return 0; +} + +static int hns_roce_v2_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device); + struct hns_roce_srq *srq = to_hr_srq(ibsrq); + struct hns_roce_srq_context *srq_context; + struct hns_roce_cmd_mailbox *mailbox; + int limit_wl; + int ret; + + mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + srq_context = mailbox->buf; + ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, srq->srqn, 0, + HNS_ROCE_CMD_QUERY_SRQC, + HNS_ROCE_CMD_TIMEOUT_MSECS); + if (ret) { + ibdev_err(&hr_dev->ib_dev, + "failed to process cmd of querying SRQ, ret = %d.\n", + ret); + goto out; + } + + limit_wl = roce_get_field(srq_context->byte_8_limit_wl, + SRQC_BYTE_8_SRQ_LIMIT_WL_M, + SRQC_BYTE_8_SRQ_LIMIT_WL_S); + + attr->srq_limit = limit_wl; + attr->max_wr = srq->wqe_cnt; + attr->max_sge = srq->max_gs; + + memcpy(srq_context, mailbox->buf, sizeof(*srq_context)); + +out: + hns_roce_free_cmd_mailbox(hr_dev, mailbox); + return ret; +} + static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) { struct hns_roce_dev *hr_dev = to_hr_dev(cq->device); @@ -4989,24 +5213,14 @@ static void set_eq_cons_index_v2(struct hns_roce_eq *eq) hns_roce_write64(hr_dev, doorbell, eq->doorbell); } -static inline void *get_eqe_buf(struct hns_roce_eq *eq, unsigned long offset) -{ - u32 buf_chk_sz; - - buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT); - if (eq->buf.nbufs == 1) - return eq->buf.direct.buf + offset % buf_chk_sz; - else - return eq->buf.page_list[offset / buf_chk_sz].buf + - offset % buf_chk_sz; -} - static struct hns_roce_aeqe *next_aeqe_sw_v2(struct hns_roce_eq *eq) { struct hns_roce_aeqe *aeqe; - aeqe = get_eqe_buf(eq, (eq->cons_index & (eq->entries - 1)) * - HNS_ROCE_AEQ_ENTRY_SIZE); + aeqe = hns_roce_buf_offset(eq->mtr.kmem, + (eq->cons_index & (eq->entries - 1)) * + HNS_ROCE_AEQ_ENTRY_SIZE); + return (roce_get_bit(aeqe->asyn, HNS_ROCE_V2_AEQ_AEQE_OWNER_S) ^ !!(eq->cons_index & eq->entries)) ? aeqe : NULL; } @@ -5103,8 +5317,9 @@ static struct hns_roce_ceqe *next_ceqe_sw_v2(struct hns_roce_eq *eq) { struct hns_roce_ceqe *ceqe; - ceqe = get_eqe_buf(eq, (eq->cons_index & (eq->entries - 1)) * - HNS_ROCE_CEQ_ENTRY_SIZE); + ceqe = hns_roce_buf_offset(eq->mtr.kmem, + (eq->cons_index & (eq->entries - 1)) * + HNS_ROCE_CEQ_ENTRY_SIZE); return (!!(roce_get_bit(ceqe->comp, HNS_ROCE_V2_CEQ_CEQE_OWNER_S))) ^ (!!(eq->cons_index & eq->entries)) ? ceqe : NULL; } @@ -5263,17 +5478,15 @@ static void hns_roce_v2_destroy_eqc(struct hns_roce_dev *hr_dev, int eqn) static void free_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq) { - if (!eq->hop_num || eq->hop_num == HNS_ROCE_HOP_NUM_0) - hns_roce_mtr_cleanup(hr_dev, &eq->mtr); - hns_roce_buf_free(hr_dev, eq->buf.size, &eq->buf); + hns_roce_mtr_destroy(hr_dev, &eq->mtr); } -static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev, - struct hns_roce_eq *eq, - void *mb_buf) +static int config_eqc(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq, + void *mb_buf) { + u64 eqe_ba[MTT_MIN_COUNT] = { 0 }; struct hns_roce_eq_context *eqc; - u64 ba[MTT_MIN_COUNT] = { 0 }; + u64 bt_ba = 0; int count; eqc = mb_buf; @@ -5281,31 +5494,18 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev, /* init eqc */ eq->doorbell = hr_dev->reg_base + ROCEE_VF_EQ_DB_CFG0_REG; - eq->hop_num = hr_dev->caps.eqe_hop_num; eq->cons_index = 0; eq->over_ignore = HNS_ROCE_V2_EQ_OVER_IGNORE_0; eq->coalesce = HNS_ROCE_V2_EQ_COALESCE_0; eq->arm_st = HNS_ROCE_V2_EQ_ALWAYS_ARMED; - eq->eqe_ba_pg_sz = hr_dev->caps.eqe_ba_pg_sz; - eq->eqe_buf_pg_sz = hr_dev->caps.eqe_buf_pg_sz; eq->shift = ilog2((unsigned int)eq->entries); - /* if not muti-hop, eqe buffer only use one trunk */ - if (!eq->hop_num || eq->hop_num == HNS_ROCE_HOP_NUM_0) { - eq->eqe_ba = eq->buf.direct.map; - eq->cur_eqe_ba = eq->eqe_ba; - if (eq->buf.npages > 1) - eq->nxt_eqe_ba = eq->eqe_ba + (1 << eq->eqe_buf_pg_sz); - else - eq->nxt_eqe_ba = eq->eqe_ba; - } else { - count = hns_roce_mtr_find(hr_dev, &eq->mtr, 0, ba, - MTT_MIN_COUNT, &eq->eqe_ba); - eq->cur_eqe_ba = ba[0]; - if (count > 1) - eq->nxt_eqe_ba = ba[1]; - else - eq->nxt_eqe_ba = ba[0]; + /* if not multi-hop, eqe buffer only use one trunk */ + count = hns_roce_mtr_find(hr_dev, &eq->mtr, 0, eqe_ba, MTT_MIN_COUNT, + &bt_ba); + if (count < 1) { + dev_err(hr_dev->dev, "failed to find EQE mtr\n"); + return -ENOBUFS; } /* set eqc state */ @@ -5339,12 +5539,12 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev, /* set eqe_ba_pg_sz */ roce_set_field(eqc->byte_8, HNS_ROCE_EQC_BA_PG_SZ_M, HNS_ROCE_EQC_BA_PG_SZ_S, - eq->eqe_ba_pg_sz + PG_SHIFT_OFFSET); + to_hr_hw_page_shift(eq->mtr.hem_cfg.ba_pg_shift)); /* set eqe_buf_pg_sz */ roce_set_field(eqc->byte_8, HNS_ROCE_EQC_BUF_PG_SZ_M, HNS_ROCE_EQC_BUF_PG_SZ_S, - eq->eqe_buf_pg_sz + PG_SHIFT_OFFSET); + to_hr_hw_page_shift(eq->mtr.hem_cfg.buf_pg_shift)); /* set eq_producer_idx */ roce_set_field(eqc->byte_8, HNS_ROCE_EQC_PROD_INDX_M, @@ -5363,13 +5563,13 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev, HNS_ROCE_EQC_REPORT_TIMER_S, HNS_ROCE_EQ_INIT_REPORT_TIMER); - /* set eqe_ba [34:3] */ + /* set bt_ba [34:3] */ roce_set_field(eqc->eqe_ba0, HNS_ROCE_EQC_EQE_BA_L_M, - HNS_ROCE_EQC_EQE_BA_L_S, eq->eqe_ba >> 3); + HNS_ROCE_EQC_EQE_BA_L_S, bt_ba >> 3); - /* set eqe_ba [64:35] */ + /* set bt_ba [64:35] */ roce_set_field(eqc->eqe_ba1, HNS_ROCE_EQC_EQE_BA_H_M, - HNS_ROCE_EQC_EQE_BA_H_S, eq->eqe_ba >> 35); + HNS_ROCE_EQC_EQE_BA_H_S, bt_ba >> 35); /* set eq shift */ roce_set_field(eqc->byte_28, HNS_ROCE_EQC_SHIFT_M, HNS_ROCE_EQC_SHIFT_S, @@ -5381,15 +5581,15 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev, /* set cur_eqe_ba [27:12] */ roce_set_field(eqc->byte_28, HNS_ROCE_EQC_CUR_EQE_BA_L_M, - HNS_ROCE_EQC_CUR_EQE_BA_L_S, eq->cur_eqe_ba >> 12); + HNS_ROCE_EQC_CUR_EQE_BA_L_S, eqe_ba[0] >> 12); /* set cur_eqe_ba [59:28] */ roce_set_field(eqc->byte_32, HNS_ROCE_EQC_CUR_EQE_BA_M_M, - HNS_ROCE_EQC_CUR_EQE_BA_M_S, eq->cur_eqe_ba >> 28); + HNS_ROCE_EQC_CUR_EQE_BA_M_S, eqe_ba[0] >> 28); /* set cur_eqe_ba [63:60] */ roce_set_field(eqc->byte_36, HNS_ROCE_EQC_CUR_EQE_BA_H_M, - HNS_ROCE_EQC_CUR_EQE_BA_H_S, eq->cur_eqe_ba >> 60); + HNS_ROCE_EQC_CUR_EQE_BA_H_S, eqe_ba[0] >> 60); /* set eq consumer idx */ roce_set_field(eqc->byte_36, HNS_ROCE_EQC_CONS_INDX_M, @@ -5397,97 +5597,38 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev, /* set nex_eqe_ba[43:12] */ roce_set_field(eqc->nxt_eqe_ba0, HNS_ROCE_EQC_NXT_EQE_BA_L_M, - HNS_ROCE_EQC_NXT_EQE_BA_L_S, eq->nxt_eqe_ba >> 12); + HNS_ROCE_EQC_NXT_EQE_BA_L_S, eqe_ba[1] >> 12); /* set nex_eqe_ba[63:44] */ roce_set_field(eqc->nxt_eqe_ba1, HNS_ROCE_EQC_NXT_EQE_BA_H_M, - HNS_ROCE_EQC_NXT_EQE_BA_H_S, eq->nxt_eqe_ba >> 44); -} - -static int map_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq, - u32 page_shift) -{ - struct hns_roce_buf_region region = {}; - dma_addr_t *buf_list = NULL; - int ba_num; - int ret; - - ba_num = DIV_ROUND_UP(PAGE_ALIGN(eq->entries * eq->eqe_size), - 1 << page_shift); - hns_roce_init_buf_region(®ion, hr_dev->caps.eqe_hop_num, 0, ba_num); - - /* alloc a tmp list for storing eq buf address */ - ret = hns_roce_alloc_buf_list(®ion, &buf_list, 1); - if (ret) { - dev_err(hr_dev->dev, "alloc eq buf_list error\n"); - return ret; - } + HNS_ROCE_EQC_NXT_EQE_BA_H_S, eqe_ba[1] >> 44); - ba_num = hns_roce_get_kmem_bufs(hr_dev, buf_list, region.count, - region.offset, &eq->buf); - if (ba_num != region.count) { - dev_err(hr_dev->dev, "get eqe buf err,expect %d,ret %d.\n", - region.count, ba_num); - ret = -ENOBUFS; - goto done; - } - - hns_roce_mtr_init(&eq->mtr, PAGE_SHIFT + hr_dev->caps.eqe_ba_pg_sz, - page_shift); - ret = hns_roce_mtr_attach(hr_dev, &eq->mtr, &buf_list, ®ion, 1); - if (ret) - dev_err(hr_dev->dev, "mtr attach error for eqe\n"); - - goto done; - - hns_roce_mtr_cleanup(hr_dev, &eq->mtr); -done: - hns_roce_free_buf_list(&buf_list, 1); - - return ret; + return 0; } static int alloc_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq) { - struct hns_roce_buf *buf = &eq->buf; - bool is_mhop = false; - u32 page_shift; - u32 mhop_num; - u32 max_size; - int ret; + struct hns_roce_buf_attr buf_attr = {}; + int err; - page_shift = PAGE_SHIFT + hr_dev->caps.eqe_buf_pg_sz; - mhop_num = hr_dev->caps.eqe_hop_num; - if (!mhop_num) { - max_size = 1 << page_shift; - buf->size = max_size; - } else if (mhop_num == HNS_ROCE_HOP_NUM_0) { - max_size = eq->entries * eq->eqe_size; - buf->size = max_size; - } else { - max_size = 1 << page_shift; - buf->size = PAGE_ALIGN(eq->entries * eq->eqe_size); - is_mhop = true; - } + if (hr_dev->caps.eqe_hop_num == HNS_ROCE_HOP_NUM_0) + eq->hop_num = 0; + else + eq->hop_num = hr_dev->caps.eqe_hop_num; - ret = hns_roce_buf_alloc(hr_dev, buf->size, max_size, buf, page_shift); - if (ret) { - dev_err(hr_dev->dev, "alloc eq buf error\n"); - return ret; - } + buf_attr.page_shift = hr_dev->caps.eqe_buf_pg_sz + PAGE_ADDR_SHIFT; + buf_attr.region[0].size = eq->entries * eq->eqe_size; + buf_attr.region[0].hopnum = eq->hop_num; + buf_attr.region_count = 1; + buf_attr.fixed_page = true; - if (is_mhop) { - ret = map_eq_buf(hr_dev, eq, page_shift); - if (ret) { - dev_err(hr_dev->dev, "map roce buf error\n"); - goto err_alloc; - } - } + err = hns_roce_mtr_create(hr_dev, &eq->mtr, &buf_attr, + hr_dev->caps.srqwqe_ba_pg_sz + + PAGE_ADDR_SHIFT, NULL, 0); + if (err) + dev_err(hr_dev->dev, "Failed to alloc EQE mtr, err %d\n", err); - return 0; -err_alloc: - hns_roce_buf_free(hr_dev, buf->size, buf); - return ret; + return err; } static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev, @@ -5499,15 +5640,16 @@ static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev, /* Allocate mailbox memory */ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); - if (IS_ERR(mailbox)) - return PTR_ERR(mailbox); + if (IS_ERR_OR_NULL(mailbox)) + return -ENOMEM; ret = alloc_eq_buf(hr_dev, eq); - if (ret) { - ret = -ENOMEM; + if (ret) goto free_cmd_mbox; - } - hns_roce_config_eqc(hr_dev, eq, mailbox->buf); + + ret = config_eqc(hr_dev, eq, mailbox->buf); + if (ret) + goto err_cmd_mbox; ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, eq->eqn, 0, eq_cmd, HNS_ROCE_CMD_TIMEOUT_MSECS); @@ -5731,294 +5873,6 @@ static void hns_roce_v2_cleanup_eq_table(struct hns_roce_dev *hr_dev) destroy_workqueue(hr_dev->irq_workq); } -static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev, - struct hns_roce_srq *srq, u32 pdn, u16 xrcd, - u32 cqn, void *mb_buf, u64 *mtts_wqe, - u64 *mtts_idx, dma_addr_t dma_handle_wqe, - dma_addr_t dma_handle_idx) -{ - struct hns_roce_srq_context *srq_context; - - srq_context = mb_buf; - memset(srq_context, 0, sizeof(*srq_context)); - - roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQ_ST_M, - SRQC_BYTE_4_SRQ_ST_S, 1); - - roce_set_field(srq_context->byte_4_srqn_srqst, - SRQC_BYTE_4_SRQ_WQE_HOP_NUM_M, - SRQC_BYTE_4_SRQ_WQE_HOP_NUM_S, - (hr_dev->caps.srqwqe_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 : - hr_dev->caps.srqwqe_hop_num)); - roce_set_field(srq_context->byte_4_srqn_srqst, - SRQC_BYTE_4_SRQ_SHIFT_M, SRQC_BYTE_4_SRQ_SHIFT_S, - ilog2(srq->wqe_cnt)); - - roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQN_M, - SRQC_BYTE_4_SRQN_S, srq->srqn); - - roce_set_field(srq_context->byte_8_limit_wl, SRQC_BYTE_8_SRQ_LIMIT_WL_M, - SRQC_BYTE_8_SRQ_LIMIT_WL_S, 0); - - roce_set_field(srq_context->byte_12_xrcd, SRQC_BYTE_12_SRQ_XRCD_M, - SRQC_BYTE_12_SRQ_XRCD_S, xrcd); - - srq_context->wqe_bt_ba = cpu_to_le32((u32)(dma_handle_wqe >> 3)); - - roce_set_field(srq_context->byte_24_wqe_bt_ba, - SRQC_BYTE_24_SRQ_WQE_BT_BA_M, - SRQC_BYTE_24_SRQ_WQE_BT_BA_S, - dma_handle_wqe >> 35); - - roce_set_field(srq_context->byte_28_rqws_pd, SRQC_BYTE_28_PD_M, - SRQC_BYTE_28_PD_S, pdn); - roce_set_field(srq_context->byte_28_rqws_pd, SRQC_BYTE_28_RQWS_M, - SRQC_BYTE_28_RQWS_S, srq->max_gs <= 0 ? 0 : - fls(srq->max_gs - 1)); - - srq_context->idx_bt_ba = cpu_to_le32(dma_handle_idx >> 3); - roce_set_field(srq_context->rsv_idx_bt_ba, - SRQC_BYTE_36_SRQ_IDX_BT_BA_M, - SRQC_BYTE_36_SRQ_IDX_BT_BA_S, - dma_handle_idx >> 35); - - srq_context->idx_cur_blk_addr = - cpu_to_le32(mtts_idx[0] >> PAGE_ADDR_SHIFT); - roce_set_field(srq_context->byte_44_idxbufpgsz_addr, - SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_M, - SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_S, - mtts_idx[0] >> (32 + PAGE_ADDR_SHIFT)); - roce_set_field(srq_context->byte_44_idxbufpgsz_addr, - SRQC_BYTE_44_SRQ_IDX_HOP_NUM_M, - SRQC_BYTE_44_SRQ_IDX_HOP_NUM_S, - hr_dev->caps.idx_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 : - hr_dev->caps.idx_hop_num); - - roce_set_field(srq_context->byte_44_idxbufpgsz_addr, - SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_M, - SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_S, - hr_dev->caps.idx_ba_pg_sz + PG_SHIFT_OFFSET); - roce_set_field(srq_context->byte_44_idxbufpgsz_addr, - SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_M, - SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_S, - hr_dev->caps.idx_buf_pg_sz + PG_SHIFT_OFFSET); - - srq_context->idx_nxt_blk_addr = - cpu_to_le32(mtts_idx[1] >> PAGE_ADDR_SHIFT); - roce_set_field(srq_context->rsv_idxnxtblkaddr, - SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_M, - SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_S, - mtts_idx[1] >> (32 + PAGE_ADDR_SHIFT)); - roce_set_field(srq_context->byte_56_xrc_cqn, - SRQC_BYTE_56_SRQ_XRC_CQN_M, SRQC_BYTE_56_SRQ_XRC_CQN_S, - cqn); - roce_set_field(srq_context->byte_56_xrc_cqn, - SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_M, - SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_S, - hr_dev->caps.srqwqe_ba_pg_sz + PG_SHIFT_OFFSET); - roce_set_field(srq_context->byte_56_xrc_cqn, - SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_M, - SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_S, - hr_dev->caps.srqwqe_buf_pg_sz + PG_SHIFT_OFFSET); - - roce_set_bit(srq_context->db_record_addr_record_en, - SRQC_BYTE_60_SRQ_RECORD_EN_S, 0); -} - -static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq, - struct ib_srq_attr *srq_attr, - enum ib_srq_attr_mask srq_attr_mask, - struct ib_udata *udata) -{ - struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device); - struct hns_roce_srq *srq = to_hr_srq(ibsrq); - struct hns_roce_srq_context *srq_context; - struct hns_roce_srq_context *srqc_mask; - struct hns_roce_cmd_mailbox *mailbox; - int ret; - - if (srq_attr_mask & IB_SRQ_LIMIT) { - if (srq_attr->srq_limit >= srq->wqe_cnt) - return -EINVAL; - - mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); - if (IS_ERR(mailbox)) - return PTR_ERR(mailbox); - - srq_context = mailbox->buf; - srqc_mask = (struct hns_roce_srq_context *)mailbox->buf + 1; - - memset(srqc_mask, 0xff, sizeof(*srqc_mask)); - - roce_set_field(srq_context->byte_8_limit_wl, - SRQC_BYTE_8_SRQ_LIMIT_WL_M, - SRQC_BYTE_8_SRQ_LIMIT_WL_S, srq_attr->srq_limit); - roce_set_field(srqc_mask->byte_8_limit_wl, - SRQC_BYTE_8_SRQ_LIMIT_WL_M, - SRQC_BYTE_8_SRQ_LIMIT_WL_S, 0); - - ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, srq->srqn, 0, - HNS_ROCE_CMD_MODIFY_SRQC, - HNS_ROCE_CMD_TIMEOUT_MSECS); - hns_roce_free_cmd_mailbox(hr_dev, mailbox); - if (ret) { - ibdev_err(&hr_dev->ib_dev, - "failed to process cmd when modifying SRQ, ret = %d\n", - ret); - return ret; - } - } - - return 0; -} - -static int hns_roce_v2_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) -{ - struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device); - struct hns_roce_srq *srq = to_hr_srq(ibsrq); - struct hns_roce_srq_context *srq_context; - struct hns_roce_cmd_mailbox *mailbox; - int limit_wl; - int ret; - - mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); - if (IS_ERR(mailbox)) - return PTR_ERR(mailbox); - - srq_context = mailbox->buf; - ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, srq->srqn, 0, - HNS_ROCE_CMD_QUERY_SRQC, - HNS_ROCE_CMD_TIMEOUT_MSECS); - if (ret) { - ibdev_err(&hr_dev->ib_dev, - "failed to process cmd when querying SRQ, ret = %d\n", - ret); - goto out; - } - - limit_wl = roce_get_field(srq_context->byte_8_limit_wl, - SRQC_BYTE_8_SRQ_LIMIT_WL_M, - SRQC_BYTE_8_SRQ_LIMIT_WL_S); - - attr->srq_limit = limit_wl; - attr->max_wr = srq->wqe_cnt - 1; - attr->max_sge = srq->max_gs; - - memcpy(srq_context, mailbox->buf, sizeof(*srq_context)); - -out: - hns_roce_free_cmd_mailbox(hr_dev, mailbox); - return ret; -} - -static int find_empty_entry(struct hns_roce_idx_que *idx_que, - unsigned long size) -{ - int wqe_idx; - - if (unlikely(bitmap_full(idx_que->bitmap, size))) - return -ENOSPC; - - wqe_idx = find_first_zero_bit(idx_que->bitmap, size); - - bitmap_set(idx_que->bitmap, wqe_idx, 1); - - return wqe_idx; -} - -static void fill_idx_queue(struct hns_roce_idx_que *idx_que, - int cur_idx, int wqe_idx) -{ - unsigned int *addr; - - addr = (unsigned int *)hns_roce_buf_offset(&idx_que->idx_buf, - cur_idx * idx_que->entry_sz); - *addr = wqe_idx; -} - -static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, - const struct ib_recv_wr *wr, - const struct ib_recv_wr **bad_wr) -{ - struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device); - struct hns_roce_srq *srq = to_hr_srq(ibsrq); - struct hns_roce_v2_wqe_data_seg *dseg; - struct hns_roce_v2_db srq_db; - unsigned long flags; - int ret = 0; - int wqe_idx; - void *wqe; - int nreq; - int ind; - int i; - - spin_lock_irqsave(&srq->lock, flags); - - ind = srq->head & (srq->wqe_cnt - 1); - - for (nreq = 0; wr; ++nreq, wr = wr->next) { - if (unlikely(wr->num_sge > srq->max_gs)) { - ret = -EINVAL; - *bad_wr = wr; - break; - } - - if (unlikely(srq->head == srq->tail)) { - ret = -ENOMEM; - *bad_wr = wr; - break; - } - - wqe_idx = find_empty_entry(&srq->idx_que, srq->wqe_cnt); - if (wqe_idx < 0) { - ret = -ENOMEM; - *bad_wr = wr; - break; - } - - fill_idx_queue(&srq->idx_que, ind, wqe_idx); - wqe = get_srq_wqe(srq, wqe_idx); - dseg = (struct hns_roce_v2_wqe_data_seg *)wqe; - - for (i = 0; i < wr->num_sge; ++i) { - dseg[i].len = cpu_to_le32(wr->sg_list[i].length); - dseg[i].lkey = cpu_to_le32(wr->sg_list[i].lkey); - dseg[i].addr = cpu_to_le64(wr->sg_list[i].addr); - } - - if (i < srq->max_gs) { - dseg[i].len = 0; - dseg[i].lkey = cpu_to_le32(0x100); - dseg[i].addr = 0; - } - - srq->wrid[wqe_idx] = wr->wr_id; - ind = (ind + 1) & (srq->wqe_cnt - 1); - } - - if (likely(nreq)) { - srq->head += nreq; - - /* - * Make sure that descriptors are written before - * doorbell record. - */ - wmb(); - - srq_db.byte_4 = - cpu_to_le32(HNS_ROCE_V2_SRQ_DB << V2_DB_BYTE_4_CMD_S | - (srq->srqn & V2_DB_BYTE_4_TAG_M)); - srq_db.parameter = cpu_to_le32(srq->head); - - hns_roce_write64(hr_dev, (__le32 *)&srq_db, srq->db_reg_l); - - } - - spin_unlock_irqrestore(&srq->lock, flags); - - return ret; -} - static const struct hns_roce_dfx_hw hns_roce_dfx_hw_v2 = { .query_cqc_info = hns_roce_v2_query_cqc_info, }; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 82dd9f6f4845..938b7b522faf 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -1241,10 +1241,9 @@ struct hns_roce_func_clear { }; #define FUNC_CLEAR_RST_FUN_DONE_S 0 -/* Each physical function manages up to 248 virtual functionsï¼› - * it takes up to 100ms for each function to execute clearï¼› - * if an abnormal reset occurs, it is executed twice at most; - * so it takes up to 249 * 2 * 100ms. +/* Each physical function manages up to 248 virtual functions, it takes up to + * 100ms for each function to execute clear. If an abnormal reset occurs, it is + * executed twice at most, so it takes up to 249 * 2 * 100ms. */ #define HNS_ROCE_V2_FUNC_CLEAR_TIMEOUT_MSECS (249 * 2 * 100) #define HNS_ROCE_V2_READ_FUNC_CLEAR_FLAG_INTERVAL 40 @@ -1648,7 +1647,7 @@ struct hns_roce_query_pf_caps_c { struct hns_roce_query_pf_caps_d { __le32 wq_hop_num_max_srqs; __le16 srq_depth; - __le16 rsv; + __le16 cap_flags_ex; __le32 num_ceqs_ceq_depth; __le32 arm_st_aeq_depth; __le32 num_uars_rsv_pds; diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index d0031d559213..fd3581efe9a8 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -579,33 +579,12 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) int ret; struct device *dev = hr_dev->dev; - ret = hns_roce_init_hem_table(hr_dev, &hr_dev->mr_table.mtt_table, - HEM_TYPE_MTT, hr_dev->caps.mtt_entry_sz, - hr_dev->caps.num_mtt_segs, 1); - if (ret) { - dev_err(dev, "Failed to init MTT context memory, aborting.\n"); - return ret; - } - - if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) { - ret = hns_roce_init_hem_table(hr_dev, - &hr_dev->mr_table.mtt_cqe_table, - HEM_TYPE_CQE, - hr_dev->caps.mtt_entry_sz, - hr_dev->caps.num_cqe_segs, 1); - if (ret) { - dev_err(dev, - "Failed to init CQE context memory, aborting.\n"); - goto err_unmap_cqe; - } - } - ret = hns_roce_init_hem_table(hr_dev, &hr_dev->mr_table.mtpt_table, HEM_TYPE_MTPT, hr_dev->caps.mtpt_entry_sz, hr_dev->caps.num_mtpts, 1); if (ret) { dev_err(dev, "Failed to init MTPT context memory, aborting.\n"); - goto err_unmap_mtt; + return ret; } ret = hns_roce_init_hem_table(hr_dev, &hr_dev->qp_table.qp_table, @@ -660,32 +639,6 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) } } - if (hr_dev->caps.num_srqwqe_segs) { - ret = hns_roce_init_hem_table(hr_dev, - &hr_dev->mr_table.mtt_srqwqe_table, - HEM_TYPE_SRQWQE, - hr_dev->caps.mtt_entry_sz, - hr_dev->caps.num_srqwqe_segs, 1); - if (ret) { - dev_err(dev, - "Failed to init MTT srqwqe memory, aborting.\n"); - goto err_unmap_srq; - } - } - - if (hr_dev->caps.num_idx_segs) { - ret = hns_roce_init_hem_table(hr_dev, - &hr_dev->mr_table.mtt_idx_table, - HEM_TYPE_IDX, - hr_dev->caps.idx_entry_sz, - hr_dev->caps.num_idx_segs, 1); - if (ret) { - dev_err(dev, - "Failed to init MTT idx memory, aborting.\n"); - goto err_unmap_srqwqe; - } - } - if (hr_dev->caps.sccc_entry_sz) { ret = hns_roce_init_hem_table(hr_dev, &hr_dev->qp_table.sccc_table, @@ -695,7 +648,7 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) if (ret) { dev_err(dev, "Failed to init SCC context memory, aborting.\n"); - goto err_unmap_idx; + goto err_unmap_srq; } } @@ -733,17 +686,6 @@ err_unmap_ctx: if (hr_dev->caps.sccc_entry_sz) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.sccc_table); - -err_unmap_idx: - if (hr_dev->caps.num_idx_segs) - hns_roce_cleanup_hem_table(hr_dev, - &hr_dev->mr_table.mtt_idx_table); - -err_unmap_srqwqe: - if (hr_dev->caps.num_srqwqe_segs) - hns_roce_cleanup_hem_table(hr_dev, - &hr_dev->mr_table.mtt_srqwqe_table); - err_unmap_srq: if (hr_dev->caps.srqc_entry_sz) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->srq_table.table); @@ -765,14 +707,6 @@ err_unmap_qp: err_unmap_dmpt: hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtpt_table); -err_unmap_mtt: - if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) - hns_roce_cleanup_hem_table(hr_dev, - &hr_dev->mr_table.mtt_cqe_table); - -err_unmap_cqe: - hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtt_table); - return ret; } diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 176f34692f88..ecd76759d47a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -66,645 +66,89 @@ int hns_roce_hw_destroy_mpt(struct hns_roce_dev *hr_dev, HNS_ROCE_CMD_TIMEOUT_MSECS); } -static int hns_roce_buddy_alloc(struct hns_roce_buddy *buddy, int order, - unsigned long *seg) +static int alloc_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, + u32 pd, u64 iova, u64 size, u32 access) { - int o; - u32 m; + struct ib_device *ibdev = &hr_dev->ib_dev; + unsigned long obj = 0; + int err; - spin_lock(&buddy->lock); - - for (o = order; o <= buddy->max_order; ++o) { - if (buddy->num_free[o]) { - m = 1 << (buddy->max_order - o); - *seg = find_first_bit(buddy->bits[o], m); - if (*seg < m) - goto found; - } - } - spin_unlock(&buddy->lock); - return -EINVAL; - - found: - clear_bit(*seg, buddy->bits[o]); - --buddy->num_free[o]; - - while (o > order) { - --o; - *seg <<= 1; - set_bit(*seg ^ 1, buddy->bits[o]); - ++buddy->num_free[o]; - } - - spin_unlock(&buddy->lock); - - *seg <<= order; - return 0; -} - -static void hns_roce_buddy_free(struct hns_roce_buddy *buddy, unsigned long seg, - int order) -{ - seg >>= order; - - spin_lock(&buddy->lock); - - while (test_bit(seg ^ 1, buddy->bits[order])) { - clear_bit(seg ^ 1, buddy->bits[order]); - --buddy->num_free[order]; - seg >>= 1; - ++order; - } - - set_bit(seg, buddy->bits[order]); - ++buddy->num_free[order]; - - spin_unlock(&buddy->lock); -} - -static int hns_roce_buddy_init(struct hns_roce_buddy *buddy, int max_order) -{ - int i, s; - - buddy->max_order = max_order; - spin_lock_init(&buddy->lock); - buddy->bits = kcalloc(buddy->max_order + 1, - sizeof(*buddy->bits), - GFP_KERNEL); - buddy->num_free = kcalloc(buddy->max_order + 1, - sizeof(*buddy->num_free), - GFP_KERNEL); - if (!buddy->bits || !buddy->num_free) - goto err_out; - - for (i = 0; i <= buddy->max_order; ++i) { - s = BITS_TO_LONGS(1 << (buddy->max_order - i)); - buddy->bits[i] = kcalloc(s, sizeof(long), GFP_KERNEL | - __GFP_NOWARN); - if (!buddy->bits[i]) { - buddy->bits[i] = vzalloc(array_size(s, sizeof(long))); - if (!buddy->bits[i]) - goto err_out_free; - } - } - - set_bit(0, buddy->bits[buddy->max_order]); - buddy->num_free[buddy->max_order] = 1; - - return 0; - -err_out_free: - for (i = 0; i <= buddy->max_order; ++i) - kvfree(buddy->bits[i]); - -err_out: - kfree(buddy->bits); - kfree(buddy->num_free); - return -ENOMEM; -} - -static void hns_roce_buddy_cleanup(struct hns_roce_buddy *buddy) -{ - int i; - - for (i = 0; i <= buddy->max_order; ++i) - kvfree(buddy->bits[i]); - - kfree(buddy->bits); - kfree(buddy->num_free); -} - -static int hns_roce_alloc_mtt_range(struct hns_roce_dev *hr_dev, int order, - unsigned long *seg, u32 mtt_type) -{ - struct hns_roce_mr_table *mr_table = &hr_dev->mr_table; - struct hns_roce_hem_table *table; - struct hns_roce_buddy *buddy; - int ret; - - switch (mtt_type) { - case MTT_TYPE_WQE: - buddy = &mr_table->mtt_buddy; - table = &mr_table->mtt_table; - break; - case MTT_TYPE_CQE: - buddy = &mr_table->mtt_cqe_buddy; - table = &mr_table->mtt_cqe_table; - break; - case MTT_TYPE_SRQWQE: - buddy = &mr_table->mtt_srqwqe_buddy; - table = &mr_table->mtt_srqwqe_table; - break; - case MTT_TYPE_IDX: - buddy = &mr_table->mtt_idx_buddy; - table = &mr_table->mtt_idx_table; - break; - default: - dev_err(hr_dev->dev, "Unsupport MTT table type: %d\n", - mtt_type); - return -EINVAL; - } - - ret = hns_roce_buddy_alloc(buddy, order, seg); - if (ret) - return ret; - - ret = hns_roce_table_get_range(hr_dev, table, *seg, - *seg + (1 << order) - 1); - if (ret) { - hns_roce_buddy_free(buddy, *seg, order); - return ret; - } - - return 0; -} - -int hns_roce_mtt_init(struct hns_roce_dev *hr_dev, int npages, int page_shift, - struct hns_roce_mtt *mtt) -{ - int ret; - int i; - - /* Page num is zero, correspond to DMA memory register */ - if (!npages) { - mtt->order = -1; - mtt->page_shift = HNS_ROCE_HEM_PAGE_SHIFT; - return 0; - } - - /* Note: if page_shift is zero, FAST memory register */ - mtt->page_shift = page_shift; - - /* Compute MTT entry necessary */ - for (mtt->order = 0, i = HNS_ROCE_MTT_ENTRY_PER_SEG; i < npages; - i <<= 1) - ++mtt->order; - - /* Allocate MTT entry */ - ret = hns_roce_alloc_mtt_range(hr_dev, mtt->order, &mtt->first_seg, - mtt->mtt_type); - if (ret) + /* Allocate a key for mr from mr_table */ + err = hns_roce_bitmap_alloc(&hr_dev->mr_table.mtpt_bitmap, &obj); + if (err) { + ibdev_err(ibdev, + "failed to alloc bitmap for MR key, ret = %d.\n", + err); return -ENOMEM; - - return 0; -} - -void hns_roce_mtt_cleanup(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt) -{ - struct hns_roce_mr_table *mr_table = &hr_dev->mr_table; - - if (mtt->order < 0) - return; - - switch (mtt->mtt_type) { - case MTT_TYPE_WQE: - hns_roce_buddy_free(&mr_table->mtt_buddy, mtt->first_seg, - mtt->order); - hns_roce_table_put_range(hr_dev, &mr_table->mtt_table, - mtt->first_seg, - mtt->first_seg + (1 << mtt->order) - 1); - break; - case MTT_TYPE_CQE: - hns_roce_buddy_free(&mr_table->mtt_cqe_buddy, mtt->first_seg, - mtt->order); - hns_roce_table_put_range(hr_dev, &mr_table->mtt_cqe_table, - mtt->first_seg, - mtt->first_seg + (1 << mtt->order) - 1); - break; - case MTT_TYPE_SRQWQE: - hns_roce_buddy_free(&mr_table->mtt_srqwqe_buddy, mtt->first_seg, - mtt->order); - hns_roce_table_put_range(hr_dev, &mr_table->mtt_srqwqe_table, - mtt->first_seg, - mtt->first_seg + (1 << mtt->order) - 1); - break; - case MTT_TYPE_IDX: - hns_roce_buddy_free(&mr_table->mtt_idx_buddy, mtt->first_seg, - mtt->order); - hns_roce_table_put_range(hr_dev, &mr_table->mtt_idx_table, - mtt->first_seg, - mtt->first_seg + (1 << mtt->order) - 1); - break; - default: - dev_err(hr_dev->dev, - "Unsupport mtt type %d, clean mtt failed\n", - mtt->mtt_type); - break; - } -} - -static void hns_roce_loop_free(struct hns_roce_dev *hr_dev, - struct hns_roce_mr *mr, int err_loop_index, - int loop_i, int loop_j) -{ - struct device *dev = hr_dev->dev; - u32 mhop_num; - u32 pbl_bt_sz; - u64 bt_idx; - int i, j; - - pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT); - mhop_num = hr_dev->caps.pbl_hop_num; - - i = loop_i; - if (mhop_num == 3 && err_loop_index == 2) { - for (; i >= 0; i--) { - dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i], - mr->pbl_l1_dma_addr[i]); - - for (j = 0; j < pbl_bt_sz / BA_BYTE_LEN; j++) { - if (i == loop_i && j >= loop_j) - break; - - bt_idx = i * pbl_bt_sz / BA_BYTE_LEN + j; - dma_free_coherent(dev, pbl_bt_sz, - mr->pbl_bt_l2[bt_idx], - mr->pbl_l2_dma_addr[bt_idx]); - } - } - } else if (mhop_num == 3 && err_loop_index == 1) { - for (i -= 1; i >= 0; i--) { - dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i], - mr->pbl_l1_dma_addr[i]); - - for (j = 0; j < pbl_bt_sz / BA_BYTE_LEN; j++) { - bt_idx = i * pbl_bt_sz / BA_BYTE_LEN + j; - dma_free_coherent(dev, pbl_bt_sz, - mr->pbl_bt_l2[bt_idx], - mr->pbl_l2_dma_addr[bt_idx]); - } - } - } else if (mhop_num == 2 && err_loop_index == 1) { - for (i -= 1; i >= 0; i--) - dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i], - mr->pbl_l1_dma_addr[i]); - } else { - dev_warn(dev, "not support: mhop_num=%d, err_loop_index=%d.", - mhop_num, err_loop_index); - return; } - dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l0, mr->pbl_l0_dma_addr); - mr->pbl_bt_l0 = NULL; - mr->pbl_l0_dma_addr = 0; -} -static int pbl_1hop_alloc(struct hns_roce_dev *hr_dev, int npages, - struct hns_roce_mr *mr, u32 pbl_bt_sz) -{ - struct device *dev = hr_dev->dev; - - if (npages > pbl_bt_sz / 8) { - dev_err(dev, "npages %d is larger than buf_pg_sz!", - npages); - return -EINVAL; - } - mr->pbl_buf = dma_alloc_coherent(dev, npages * 8, - &(mr->pbl_dma_addr), - GFP_KERNEL); - if (!mr->pbl_buf) - return -ENOMEM; - - mr->pbl_size = npages; - mr->pbl_ba = mr->pbl_dma_addr; - mr->pbl_hop_num = 1; - mr->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz; - mr->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz; - return 0; - -} - - -static int pbl_2hop_alloc(struct hns_roce_dev *hr_dev, int npages, - struct hns_roce_mr *mr, u32 pbl_bt_sz) -{ - struct device *dev = hr_dev->dev; - int npages_allocated; - u64 pbl_last_bt_num; - u64 pbl_bt_cnt = 0; - u64 size; - int i; - - pbl_last_bt_num = (npages + pbl_bt_sz / 8 - 1) / (pbl_bt_sz / 8); - - /* alloc L1 BT */ - for (i = 0; i < pbl_bt_sz / 8; i++) { - if (pbl_bt_cnt + 1 < pbl_last_bt_num) { - size = pbl_bt_sz; - } else { - npages_allocated = i * (pbl_bt_sz / 8); - size = (npages - npages_allocated) * 8; - } - mr->pbl_bt_l1[i] = dma_alloc_coherent(dev, size, - &(mr->pbl_l1_dma_addr[i]), - GFP_KERNEL); - if (!mr->pbl_bt_l1[i]) { - hns_roce_loop_free(hr_dev, mr, 1, i, 0); - return -ENOMEM; - } - - *(mr->pbl_bt_l0 + i) = mr->pbl_l1_dma_addr[i]; + mr->iova = iova; /* MR va starting addr */ + mr->size = size; /* MR addr range */ + mr->pd = pd; /* MR num */ + mr->access = access; /* MR access permit */ + mr->enabled = 0; /* MR active status */ + mr->key = hw_index_to_key(obj); /* MR key */ - pbl_bt_cnt++; - if (pbl_bt_cnt >= pbl_last_bt_num) - break; + err = hns_roce_table_get(hr_dev, &hr_dev->mr_table.mtpt_table, obj); + if (err) { + ibdev_err(ibdev, "failed to alloc mtpt, ret = %d.\n", err); + goto err_free_bitmap; } - mr->l0_chunk_last_num = i + 1; - return 0; +err_free_bitmap: + hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap, obj, BITMAP_NO_RR); + return err; } -static int pbl_3hop_alloc(struct hns_roce_dev *hr_dev, int npages, - struct hns_roce_mr *mr, u32 pbl_bt_sz) +static void free_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr) { - struct device *dev = hr_dev->dev; - int mr_alloc_done = 0; - int npages_allocated; - u64 pbl_last_bt_num; - u64 pbl_bt_cnt = 0; - u64 bt_idx; - u64 size; - int i; - int j = 0; + unsigned long obj = key_to_hw_index(mr->key); - pbl_last_bt_num = (npages + pbl_bt_sz / 8 - 1) / (pbl_bt_sz / 8); - - mr->pbl_l2_dma_addr = kcalloc(pbl_last_bt_num, - sizeof(*mr->pbl_l2_dma_addr), - GFP_KERNEL); - if (!mr->pbl_l2_dma_addr) - return -ENOMEM; - - mr->pbl_bt_l2 = kcalloc(pbl_last_bt_num, - sizeof(*mr->pbl_bt_l2), - GFP_KERNEL); - if (!mr->pbl_bt_l2) - goto err_kcalloc_bt_l2; - - /* alloc L1, L2 BT */ - for (i = 0; i < pbl_bt_sz / 8; i++) { - mr->pbl_bt_l1[i] = dma_alloc_coherent(dev, pbl_bt_sz, - &(mr->pbl_l1_dma_addr[i]), - GFP_KERNEL); - if (!mr->pbl_bt_l1[i]) { - hns_roce_loop_free(hr_dev, mr, 1, i, 0); - goto err_dma_alloc_l0; - } - - *(mr->pbl_bt_l0 + i) = mr->pbl_l1_dma_addr[i]; - - for (j = 0; j < pbl_bt_sz / 8; j++) { - bt_idx = i * pbl_bt_sz / 8 + j; - - if (pbl_bt_cnt + 1 < pbl_last_bt_num) { - size = pbl_bt_sz; - } else { - npages_allocated = bt_idx * - (pbl_bt_sz / 8); - size = (npages - npages_allocated) * 8; - } - mr->pbl_bt_l2[bt_idx] = dma_alloc_coherent( - dev, size, - &(mr->pbl_l2_dma_addr[bt_idx]), - GFP_KERNEL); - if (!mr->pbl_bt_l2[bt_idx]) { - hns_roce_loop_free(hr_dev, mr, 2, i, j); - goto err_dma_alloc_l0; - } - - *(mr->pbl_bt_l1[i] + j) = - mr->pbl_l2_dma_addr[bt_idx]; - - pbl_bt_cnt++; - if (pbl_bt_cnt >= pbl_last_bt_num) { - mr_alloc_done = 1; - break; - } - } - - if (mr_alloc_done) - break; - } - - mr->l0_chunk_last_num = i + 1; - mr->l1_chunk_last_num = j + 1; - - - return 0; - -err_dma_alloc_l0: - kfree(mr->pbl_bt_l2); - mr->pbl_bt_l2 = NULL; - -err_kcalloc_bt_l2: - kfree(mr->pbl_l2_dma_addr); - mr->pbl_l2_dma_addr = NULL; - - return -ENOMEM; + hns_roce_table_put(hr_dev, &hr_dev->mr_table.mtpt_table, obj); + hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap, obj, BITMAP_NO_RR); } - -/* PBL multi hop addressing */ -static int hns_roce_mhop_alloc(struct hns_roce_dev *hr_dev, int npages, - struct hns_roce_mr *mr) +static int alloc_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, + size_t length, struct ib_udata *udata, u64 start, + int access) { - struct device *dev = hr_dev->dev; - u32 pbl_bt_sz; - u32 mhop_num; - - mhop_num = (mr->type == MR_TYPE_FRMR ? 1 : hr_dev->caps.pbl_hop_num); - pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT); - - if (mhop_num == HNS_ROCE_HOP_NUM_0) - return 0; - - if (mhop_num == 1) - return pbl_1hop_alloc(hr_dev, npages, mr, pbl_bt_sz); - - mr->pbl_l1_dma_addr = kcalloc(pbl_bt_sz / 8, - sizeof(*mr->pbl_l1_dma_addr), - GFP_KERNEL); - if (!mr->pbl_l1_dma_addr) - return -ENOMEM; - - mr->pbl_bt_l1 = kcalloc(pbl_bt_sz / 8, sizeof(*mr->pbl_bt_l1), - GFP_KERNEL); - if (!mr->pbl_bt_l1) - goto err_kcalloc_bt_l1; - - /* alloc L0 BT */ - mr->pbl_bt_l0 = dma_alloc_coherent(dev, pbl_bt_sz, - &(mr->pbl_l0_dma_addr), - GFP_KERNEL); - if (!mr->pbl_bt_l0) - goto err_kcalloc_l2_dma; - - if (mhop_num == 2) { - if (pbl_2hop_alloc(hr_dev, npages, mr, pbl_bt_sz)) - goto err_kcalloc_l2_dma; - } - - if (mhop_num == 3) { - if (pbl_3hop_alloc(hr_dev, npages, mr, pbl_bt_sz)) - goto err_kcalloc_l2_dma; - } - - - mr->pbl_size = npages; - mr->pbl_ba = mr->pbl_l0_dma_addr; - mr->pbl_hop_num = hr_dev->caps.pbl_hop_num; - mr->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz; - mr->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz; - - return 0; - -err_kcalloc_l2_dma: - kfree(mr->pbl_bt_l1); - mr->pbl_bt_l1 = NULL; - -err_kcalloc_bt_l1: - kfree(mr->pbl_l1_dma_addr); - mr->pbl_l1_dma_addr = NULL; - - return -ENOMEM; -} - -static int hns_roce_mr_alloc(struct hns_roce_dev *hr_dev, u32 pd, u64 iova, - u64 size, u32 access, int npages, - struct hns_roce_mr *mr) -{ - struct device *dev = hr_dev->dev; - unsigned long index = 0; - int ret; - - /* Allocate a key for mr from mr_table */ - ret = hns_roce_bitmap_alloc(&hr_dev->mr_table.mtpt_bitmap, &index); - if (ret) - return -ENOMEM; - - mr->iova = iova; /* MR va starting addr */ - mr->size = size; /* MR addr range */ - mr->pd = pd; /* MR num */ - mr->access = access; /* MR access permit */ - mr->enabled = 0; /* MR active status */ - mr->key = hw_index_to_key(index); /* MR key */ - - if (size == ~0ull) { - mr->pbl_buf = NULL; - mr->pbl_dma_addr = 0; - /* PBL multi-hop addressing parameters */ - mr->pbl_bt_l2 = NULL; - mr->pbl_bt_l1 = NULL; - mr->pbl_bt_l0 = NULL; - mr->pbl_l2_dma_addr = NULL; - mr->pbl_l1_dma_addr = NULL; - mr->pbl_l0_dma_addr = 0; - } else { - if (!hr_dev->caps.pbl_hop_num) { - mr->pbl_buf = dma_alloc_coherent(dev, - npages * BA_BYTE_LEN, - &(mr->pbl_dma_addr), - GFP_KERNEL); - if (!mr->pbl_buf) - return -ENOMEM; - } else { - ret = hns_roce_mhop_alloc(hr_dev, npages, mr); - } - } + struct ib_device *ibdev = &hr_dev->ib_dev; + bool is_fast = mr->type == MR_TYPE_FRMR; + struct hns_roce_buf_attr buf_attr = {}; + int err; + + mr->pbl_hop_num = is_fast ? 1 : hr_dev->caps.pbl_hop_num; + buf_attr.page_shift = is_fast ? PAGE_SHIFT : + hr_dev->caps.pbl_buf_pg_sz + PAGE_ADDR_SHIFT; + buf_attr.region[0].size = length; + buf_attr.region[0].hopnum = mr->pbl_hop_num; + buf_attr.region_count = 1; + buf_attr.fixed_page = true; + buf_attr.user_access = access; + /* fast MR's buffer is alloced before mapping, not at creation */ + buf_attr.mtt_only = is_fast; + + err = hns_roce_mtr_create(hr_dev, &mr->pbl_mtr, &buf_attr, + hr_dev->caps.pbl_ba_pg_sz + PAGE_ADDR_SHIFT, + udata, start); + if (err) + ibdev_err(ibdev, "failed to alloc pbl mtr, ret = %d.\n", err); + else + mr->npages = mr->pbl_mtr.hem_cfg.buf_pg_count; - return ret; + return err; } -static void hns_roce_mhop_free(struct hns_roce_dev *hr_dev, - struct hns_roce_mr *mr) +static void free_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr) { - struct device *dev = hr_dev->dev; - int npages_allocated; - int npages; - int i, j; - u32 pbl_bt_sz; - u32 mhop_num; - u64 bt_idx; - - npages = mr->pbl_size; - pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT); - mhop_num = (mr->type == MR_TYPE_FRMR) ? 1 : hr_dev->caps.pbl_hop_num; - - if (mhop_num == HNS_ROCE_HOP_NUM_0) - return; - - if (mhop_num == 1) { - dma_free_coherent(dev, (unsigned int)(npages * BA_BYTE_LEN), - mr->pbl_buf, mr->pbl_dma_addr); - return; - } - - dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l0, - mr->pbl_l0_dma_addr); - - if (mhop_num == 2) { - for (i = 0; i < mr->l0_chunk_last_num; i++) { - if (i == mr->l0_chunk_last_num - 1) { - npages_allocated = - i * (pbl_bt_sz / BA_BYTE_LEN); - - dma_free_coherent(dev, - (npages - npages_allocated) * BA_BYTE_LEN, - mr->pbl_bt_l1[i], - mr->pbl_l1_dma_addr[i]); - - break; - } - - dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i], - mr->pbl_l1_dma_addr[i]); - } - } else if (mhop_num == 3) { - for (i = 0; i < mr->l0_chunk_last_num; i++) { - dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i], - mr->pbl_l1_dma_addr[i]); - - for (j = 0; j < pbl_bt_sz / BA_BYTE_LEN; j++) { - bt_idx = i * (pbl_bt_sz / BA_BYTE_LEN) + j; - - if ((i == mr->l0_chunk_last_num - 1) - && j == mr->l1_chunk_last_num - 1) { - npages_allocated = bt_idx * - (pbl_bt_sz / BA_BYTE_LEN); - - dma_free_coherent(dev, - (npages - npages_allocated) * - BA_BYTE_LEN, - mr->pbl_bt_l2[bt_idx], - mr->pbl_l2_dma_addr[bt_idx]); - - break; - } - - dma_free_coherent(dev, pbl_bt_sz, - mr->pbl_bt_l2[bt_idx], - mr->pbl_l2_dma_addr[bt_idx]); - } - } - } - - kfree(mr->pbl_bt_l1); - kfree(mr->pbl_l1_dma_addr); - mr->pbl_bt_l1 = NULL; - mr->pbl_l1_dma_addr = NULL; - if (mhop_num == 3) { - kfree(mr->pbl_bt_l2); - kfree(mr->pbl_l2_dma_addr); - mr->pbl_bt_l2 = NULL; - mr->pbl_l2_dma_addr = NULL; - } + hns_roce_mtr_destroy(hr_dev, &mr->pbl_mtr); } static void hns_roce_mr_free(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr) { - struct device *dev = hr_dev->dev; - int npages = 0; + struct ib_device *ibdev = &hr_dev->ib_dev; int ret; if (mr->enabled) { @@ -712,27 +156,12 @@ static void hns_roce_mr_free(struct hns_roce_dev *hr_dev, key_to_hw_index(mr->key) & (hr_dev->caps.num_mtpts - 1)); if (ret) - dev_warn(dev, "DESTROY_MPT failed (%d)\n", ret); + ibdev_warn(ibdev, "failed to destroy mpt, ret = %d.\n", + ret); } - if (mr->size != ~0ULL) { - if (mr->type == MR_TYPE_MR) - npages = ib_umem_page_count(mr->umem); - - if (!hr_dev->caps.pbl_hop_num) - dma_free_coherent(dev, - (unsigned int)(npages * BA_BYTE_LEN), - mr->pbl_buf, mr->pbl_dma_addr); - else - hns_roce_mhop_free(hr_dev, mr); - } - - if (mr->enabled) - hns_roce_table_put(hr_dev, &hr_dev->mr_table.mtpt_table, - key_to_hw_index(mr->key)); - - hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap, - key_to_hw_index(mr->key), BITMAP_NO_RR); + free_mr_pbl(hr_dev, mr); + free_mr_key(hr_dev, mr); } static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev, @@ -742,18 +171,12 @@ static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev, unsigned long mtpt_idx = key_to_hw_index(mr->key); struct device *dev = hr_dev->dev; struct hns_roce_cmd_mailbox *mailbox; - struct hns_roce_mr_table *mr_table = &hr_dev->mr_table; - - /* Prepare HEM entry memory */ - ret = hns_roce_table_get(hr_dev, &mr_table->mtpt_table, mtpt_idx); - if (ret) - return ret; /* Allocate mailbox memory */ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); if (IS_ERR(mailbox)) { ret = PTR_ERR(mailbox); - goto err_table; + return ret; } if (mr->type != MR_TYPE_FRMR) @@ -780,137 +203,6 @@ static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev, err_page: hns_roce_free_cmd_mailbox(hr_dev, mailbox); -err_table: - hns_roce_table_put(hr_dev, &mr_table->mtpt_table, mtpt_idx); - return ret; -} - -static int hns_roce_write_mtt_chunk(struct hns_roce_dev *hr_dev, - struct hns_roce_mtt *mtt, u32 start_index, - u32 npages, u64 *page_list) -{ - struct hns_roce_hem_table *table; - dma_addr_t dma_handle; - __le64 *mtts; - u32 bt_page_size; - u32 i; - - switch (mtt->mtt_type) { - case MTT_TYPE_WQE: - table = &hr_dev->mr_table.mtt_table; - bt_page_size = 1 << (hr_dev->caps.mtt_ba_pg_sz + PAGE_SHIFT); - break; - case MTT_TYPE_CQE: - table = &hr_dev->mr_table.mtt_cqe_table; - bt_page_size = 1 << (hr_dev->caps.cqe_ba_pg_sz + PAGE_SHIFT); - break; - case MTT_TYPE_SRQWQE: - table = &hr_dev->mr_table.mtt_srqwqe_table; - bt_page_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz + PAGE_SHIFT); - break; - case MTT_TYPE_IDX: - table = &hr_dev->mr_table.mtt_idx_table; - bt_page_size = 1 << (hr_dev->caps.idx_ba_pg_sz + PAGE_SHIFT); - break; - default: - return -EINVAL; - } - - /* All MTTs must fit in the same page */ - if (start_index / (bt_page_size / sizeof(u64)) != - (start_index + npages - 1) / (bt_page_size / sizeof(u64))) - return -EINVAL; - - if (start_index & (HNS_ROCE_MTT_ENTRY_PER_SEG - 1)) - return -EINVAL; - - mtts = hns_roce_table_find(hr_dev, table, - mtt->first_seg + - start_index / HNS_ROCE_MTT_ENTRY_PER_SEG, - &dma_handle); - if (!mtts) - return -ENOMEM; - - /* Save page addr, low 12 bits : 0 */ - for (i = 0; i < npages; ++i) { - if (!hr_dev->caps.mtt_hop_num) - mtts[i] = cpu_to_le64(page_list[i] >> PAGE_ADDR_SHIFT); - else - mtts[i] = cpu_to_le64(page_list[i]); - } - - return 0; -} - -static int hns_roce_write_mtt(struct hns_roce_dev *hr_dev, - struct hns_roce_mtt *mtt, u32 start_index, - u32 npages, u64 *page_list) -{ - int chunk; - int ret; - u32 bt_page_size; - - if (mtt->order < 0) - return -EINVAL; - - switch (mtt->mtt_type) { - case MTT_TYPE_WQE: - bt_page_size = 1 << (hr_dev->caps.mtt_ba_pg_sz + PAGE_SHIFT); - break; - case MTT_TYPE_CQE: - bt_page_size = 1 << (hr_dev->caps.cqe_ba_pg_sz + PAGE_SHIFT); - break; - case MTT_TYPE_SRQWQE: - bt_page_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz + PAGE_SHIFT); - break; - case MTT_TYPE_IDX: - bt_page_size = 1 << (hr_dev->caps.idx_ba_pg_sz + PAGE_SHIFT); - break; - default: - dev_err(hr_dev->dev, - "Unsupport mtt type %d, write mtt failed\n", - mtt->mtt_type); - return -EINVAL; - } - - while (npages > 0) { - chunk = min_t(int, bt_page_size / sizeof(u64), npages); - - ret = hns_roce_write_mtt_chunk(hr_dev, mtt, start_index, chunk, - page_list); - if (ret) - return ret; - - npages -= chunk; - start_index += chunk; - page_list += chunk; - } - - return 0; -} - -int hns_roce_buf_write_mtt(struct hns_roce_dev *hr_dev, - struct hns_roce_mtt *mtt, struct hns_roce_buf *buf) -{ - u64 *page_list; - int ret; - u32 i; - - page_list = kmalloc_array(buf->npages, sizeof(*page_list), GFP_KERNEL); - if (!page_list) - return -ENOMEM; - - for (i = 0; i < buf->npages; ++i) { - if (buf->nbufs == 1) - page_list[i] = buf->direct.map + (i << buf->page_shift); - else - page_list[i] = buf->page_list[i].map; - - } - ret = hns_roce_write_mtt(hr_dev, mtt, 0, buf->npages, page_list); - - kfree(page_list); - return ret; } @@ -923,50 +215,6 @@ int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev) hr_dev->caps.num_mtpts, hr_dev->caps.num_mtpts - 1, hr_dev->caps.reserved_mrws, 0); - if (ret) - return ret; - - ret = hns_roce_buddy_init(&mr_table->mtt_buddy, - ilog2(hr_dev->caps.num_mtt_segs)); - if (ret) - goto err_buddy; - - if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) { - ret = hns_roce_buddy_init(&mr_table->mtt_cqe_buddy, - ilog2(hr_dev->caps.num_cqe_segs)); - if (ret) - goto err_buddy_cqe; - } - - if (hr_dev->caps.num_srqwqe_segs) { - ret = hns_roce_buddy_init(&mr_table->mtt_srqwqe_buddy, - ilog2(hr_dev->caps.num_srqwqe_segs)); - if (ret) - goto err_buddy_srqwqe; - } - - if (hr_dev->caps.num_idx_segs) { - ret = hns_roce_buddy_init(&mr_table->mtt_idx_buddy, - ilog2(hr_dev->caps.num_idx_segs)); - if (ret) - goto err_buddy_idx; - } - - return 0; - -err_buddy_idx: - if (hr_dev->caps.num_srqwqe_segs) - hns_roce_buddy_cleanup(&mr_table->mtt_srqwqe_buddy); - -err_buddy_srqwqe: - if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) - hns_roce_buddy_cleanup(&mr_table->mtt_cqe_buddy); - -err_buddy_cqe: - hns_roce_buddy_cleanup(&mr_table->mtt_buddy); - -err_buddy: - hns_roce_bitmap_cleanup(&mr_table->mtpt_bitmap); return ret; } @@ -974,30 +222,24 @@ void hns_roce_cleanup_mr_table(struct hns_roce_dev *hr_dev) { struct hns_roce_mr_table *mr_table = &hr_dev->mr_table; - if (hr_dev->caps.num_idx_segs) - hns_roce_buddy_cleanup(&mr_table->mtt_idx_buddy); - if (hr_dev->caps.num_srqwqe_segs) - hns_roce_buddy_cleanup(&mr_table->mtt_srqwqe_buddy); - hns_roce_buddy_cleanup(&mr_table->mtt_buddy); - if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) - hns_roce_buddy_cleanup(&mr_table->mtt_cqe_buddy); hns_roce_bitmap_cleanup(&mr_table->mtpt_bitmap); } struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc) { + struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); struct hns_roce_mr *mr; int ret; - mr = kmalloc(sizeof(*mr), GFP_KERNEL); + mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (mr == NULL) return ERR_PTR(-ENOMEM); mr->type = MR_TYPE_DMA; /* Allocate memory region key */ - ret = hns_roce_mr_alloc(to_hr_dev(pd->device), to_hr_pd(pd)->pdn, 0, - ~0ULL, acc, 0, mr); + hns_roce_hem_list_init(&mr->pbl_mtr.hem_list); + ret = alloc_mr_key(hr_dev, mr, to_hr_pd(pd)->pdn, 0, 0, acc); if (ret) goto err_free; @@ -1006,203 +248,51 @@ struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc) goto err_mr; mr->ibmr.rkey = mr->ibmr.lkey = mr->key; - mr->umem = NULL; return &mr->ibmr; - err_mr: - hns_roce_mr_free(to_hr_dev(pd->device), mr); + free_mr_key(hr_dev, mr); err_free: kfree(mr); return ERR_PTR(ret); } -int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev, - struct hns_roce_mtt *mtt, struct ib_umem *umem) -{ - struct device *dev = hr_dev->dev; - struct sg_dma_page_iter sg_iter; - unsigned int order; - int npage = 0; - int ret = 0; - int i; - u64 page_addr; - u64 *pages; - u32 bt_page_size; - u32 n; - - switch (mtt->mtt_type) { - case MTT_TYPE_WQE: - order = hr_dev->caps.mtt_ba_pg_sz; - break; - case MTT_TYPE_CQE: - order = hr_dev->caps.cqe_ba_pg_sz; - break; - case MTT_TYPE_SRQWQE: - order = hr_dev->caps.srqwqe_ba_pg_sz; - break; - case MTT_TYPE_IDX: - order = hr_dev->caps.idx_ba_pg_sz; - break; - default: - dev_err(dev, "Unsupport mtt type %d, write mtt failed\n", - mtt->mtt_type); - return -EINVAL; - } - - bt_page_size = 1 << (order + PAGE_SHIFT); - - pages = (u64 *) __get_free_pages(GFP_KERNEL, order); - if (!pages) - return -ENOMEM; - - i = n = 0; - - for_each_sg_dma_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) { - page_addr = sg_page_iter_dma_address(&sg_iter); - if (!(npage % (1 << (mtt->page_shift - PAGE_SHIFT)))) { - if (page_addr & ((1 << mtt->page_shift) - 1)) { - dev_err(dev, - "page_addr is not page_shift %d alignment!\n", - mtt->page_shift); - ret = -EINVAL; - goto out; - } - pages[i++] = page_addr; - } - npage++; - if (i == bt_page_size / sizeof(u64)) { - ret = hns_roce_write_mtt(hr_dev, mtt, n, i, pages); - if (ret) - goto out; - n += i; - i = 0; - } - } - - if (i) - ret = hns_roce_write_mtt(hr_dev, mtt, n, i, pages); - -out: - free_pages((unsigned long) pages, order); - return ret; -} - -static int hns_roce_ib_umem_write_mr(struct hns_roce_dev *hr_dev, - struct hns_roce_mr *mr, - struct ib_umem *umem) -{ - struct sg_dma_page_iter sg_iter; - int i = 0, j = 0; - u64 page_addr; - u32 pbl_bt_sz; - - if (hr_dev->caps.pbl_hop_num == HNS_ROCE_HOP_NUM_0) - return 0; - - pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT); - for_each_sg_dma_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) { - page_addr = sg_page_iter_dma_address(&sg_iter); - if (!hr_dev->caps.pbl_hop_num) { - /* for hip06, page addr is aligned to 4K */ - mr->pbl_buf[i++] = page_addr >> 12; - } else if (hr_dev->caps.pbl_hop_num == 1) { - mr->pbl_buf[i++] = page_addr; - } else { - if (hr_dev->caps.pbl_hop_num == 2) - mr->pbl_bt_l1[i][j] = page_addr; - else if (hr_dev->caps.pbl_hop_num == 3) - mr->pbl_bt_l2[i][j] = page_addr; - - j++; - if (j >= (pbl_bt_sz / BA_BYTE_LEN)) { - i++; - j = 0; - } - } - } - - /* Memory barrier */ - mb(); - - return 0; -} - struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); - struct device *dev = hr_dev->dev; struct hns_roce_mr *mr; - int bt_size; int ret; - int n; - int i; - mr = kmalloc(sizeof(*mr), GFP_KERNEL); + mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) return ERR_PTR(-ENOMEM); - mr->umem = ib_umem_get(pd->device, start, length, access_flags); - if (IS_ERR(mr->umem)) { - ret = PTR_ERR(mr->umem); - goto err_free; - } - - n = ib_umem_page_count(mr->umem); - - if (!hr_dev->caps.pbl_hop_num) { - if (n > HNS_ROCE_MAX_MTPT_PBL_NUM) { - dev_err(dev, - " MR len %lld err. MR is limited to 4G at most!\n", - length); - ret = -EINVAL; - goto err_umem; - } - } else { - u64 pbl_size = 1; - - bt_size = (1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT)) / - BA_BYTE_LEN; - for (i = 0; i < hr_dev->caps.pbl_hop_num; i++) - pbl_size *= bt_size; - if (n > pbl_size) { - dev_err(dev, - " MR len %lld err. MR page num is limited to %lld!\n", - length, pbl_size); - ret = -EINVAL; - goto err_umem; - } - } - mr->type = MR_TYPE_MR; - - ret = hns_roce_mr_alloc(hr_dev, to_hr_pd(pd)->pdn, virt_addr, length, - access_flags, n, mr); + ret = alloc_mr_key(hr_dev, mr, to_hr_pd(pd)->pdn, virt_addr, length, + access_flags); if (ret) - goto err_umem; + goto err_alloc_mr; - ret = hns_roce_ib_umem_write_mr(hr_dev, mr, mr->umem); + ret = alloc_mr_pbl(hr_dev, mr, length, udata, start, access_flags); if (ret) - goto err_mr; + goto err_alloc_key; ret = hns_roce_mr_enable(hr_dev, mr); if (ret) - goto err_mr; + goto err_alloc_pbl; mr->ibmr.rkey = mr->ibmr.lkey = mr->key; return &mr->ibmr; -err_mr: - hns_roce_mr_free(hr_dev, mr); - -err_umem: - ib_umem_release(mr->umem); - -err_free: +err_alloc_pbl: + free_mr_pbl(hr_dev, mr); +err_alloc_key: + free_mr_key(hr_dev, mr); +err_alloc_mr: kfree(mr); return ERR_PTR(ret); } @@ -1214,84 +304,36 @@ static int rereg_mr_trans(struct ib_mr *ibmr, int flags, u32 pdn, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device); + struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_mr *mr = to_hr_mr(ibmr); - struct device *dev = hr_dev->dev; - int npages; int ret; - if (mr->size != ~0ULL) { - npages = ib_umem_page_count(mr->umem); - - if (hr_dev->caps.pbl_hop_num) - hns_roce_mhop_free(hr_dev, mr); - else - dma_free_coherent(dev, npages * 8, - mr->pbl_buf, mr->pbl_dma_addr); - } - ib_umem_release(mr->umem); - - mr->umem = ib_umem_get(ibmr->device, start, length, mr_access_flags); - if (IS_ERR(mr->umem)) { - ret = PTR_ERR(mr->umem); - mr->umem = NULL; - return -ENOMEM; - } - npages = ib_umem_page_count(mr->umem); - - if (hr_dev->caps.pbl_hop_num) { - ret = hns_roce_mhop_alloc(hr_dev, npages, mr); - if (ret) - goto release_umem; - } else { - mr->pbl_buf = dma_alloc_coherent(dev, npages * 8, - &(mr->pbl_dma_addr), - GFP_KERNEL); - if (!mr->pbl_buf) { - ret = -ENOMEM; - goto release_umem; - } + free_mr_pbl(hr_dev, mr); + ret = alloc_mr_pbl(hr_dev, mr, length, udata, start, mr_access_flags); + if (ret) { + ibdev_err(ibdev, "failed to create mr PBL, ret = %d.\n", ret); + return ret; } ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, pdn, mr_access_flags, virt_addr, length, mailbox->buf); - if (ret) - goto release_umem; - - - ret = hns_roce_ib_umem_write_mr(hr_dev, mr, mr->umem); if (ret) { - if (mr->size != ~0ULL) { - npages = ib_umem_page_count(mr->umem); - - if (hr_dev->caps.pbl_hop_num) - hns_roce_mhop_free(hr_dev, mr); - else - dma_free_coherent(dev, npages * 8, - mr->pbl_buf, - mr->pbl_dma_addr); - } - - goto release_umem; + ibdev_err(ibdev, "failed to write mtpt, ret = %d.\n", ret); + free_mr_pbl(hr_dev, mr); } - return 0; - -release_umem: - ib_umem_release(mr->umem); return ret; - } - int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length, u64 virt_addr, int mr_access_flags, struct ib_pd *pd, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device); + struct ib_device *ib_dev = &hr_dev->ib_dev; struct hns_roce_mr *mr = to_hr_mr(ibmr); struct hns_roce_cmd_mailbox *mailbox; - struct device *dev = hr_dev->dev; unsigned long mtpt_idx; u32 pdn = 0; int ret; @@ -1312,7 +354,7 @@ int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length, ret = hns_roce_hw_destroy_mpt(hr_dev, NULL, mtpt_idx); if (ret) - dev_warn(dev, "DESTROY_MPT failed (%d)\n", ret); + ibdev_warn(ib_dev, "failed to destroy MPT, ret = %d.\n", ret); mr->enabled = 0; @@ -1336,8 +378,7 @@ int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length, ret = hns_roce_hw_create_mpt(hr_dev, mailbox, mtpt_idx); if (ret) { - dev_err(dev, "CREATE_MPT failed (%d)\n", ret); - ib_umem_release(mr->umem); + ibdev_err(ib_dev, "failed to create MPT, ret = %d.\n", ret); goto free_cmd_mbox; } @@ -1365,8 +406,6 @@ int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) ret = hr_dev->hw->dereg_mr(hr_dev, mr, udata); } else { hns_roce_mr_free(hr_dev, mr); - - ib_umem_release(mr->umem); kfree(mr); } @@ -1380,12 +419,8 @@ struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, struct device *dev = hr_dev->dev; struct hns_roce_mr *mr; u64 length; - u32 page_size; int ret; - page_size = 1 << (hr_dev->caps.pbl_buf_pg_sz + PAGE_SHIFT); - length = max_num_sg * page_size; - if (mr_type != IB_MR_TYPE_MEM_REG) return ERR_PTR(-EINVAL); @@ -1402,23 +437,27 @@ struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, mr->type = MR_TYPE_FRMR; /* Allocate memory region key */ - ret = hns_roce_mr_alloc(hr_dev, to_hr_pd(pd)->pdn, 0, length, - 0, max_num_sg, mr); + length = max_num_sg * (1 << PAGE_SHIFT); + ret = alloc_mr_key(hr_dev, mr, to_hr_pd(pd)->pdn, 0, length, 0); if (ret) goto err_free; + ret = alloc_mr_pbl(hr_dev, mr, length, NULL, 0, 0); + if (ret) + goto err_key; + ret = hns_roce_mr_enable(hr_dev, mr); if (ret) - goto err_mr; + goto err_pbl; mr->ibmr.rkey = mr->ibmr.lkey = mr->key; - mr->umem = NULL; return &mr->ibmr; -err_mr: - hns_roce_mr_free(to_hr_dev(pd->device), mr); - +err_key: + free_mr_key(hr_dev, mr); +err_pbl: + free_mr_pbl(hr_dev, mr); err_free: kfree(mr); return ERR_PTR(ret); @@ -1428,19 +467,54 @@ static int hns_roce_set_page(struct ib_mr *ibmr, u64 addr) { struct hns_roce_mr *mr = to_hr_mr(ibmr); - mr->pbl_buf[mr->npages++] = addr; + if (likely(mr->npages < mr->pbl_mtr.hem_cfg.buf_pg_count)) { + mr->page_list[mr->npages++] = addr; + return 0; + } - return 0; + return -ENOBUFS; } int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset) { + struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device); + struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_mr *mr = to_hr_mr(ibmr); + struct hns_roce_buf_region region = {}; + int ret = 0; mr->npages = 0; + mr->page_list = kvcalloc(mr->pbl_mtr.hem_cfg.buf_pg_count, + sizeof(dma_addr_t), GFP_KERNEL); + if (!mr->page_list) + return ret; - return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, hns_roce_set_page); + ret = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, hns_roce_set_page); + if (ret < 1) { + ibdev_err(ibdev, "failed to store sg pages %d %d, cnt = %d.\n", + mr->npages, mr->pbl_mtr.hem_cfg.buf_pg_count, ret); + goto err_page_list; + } + + region.offset = 0; + region.count = mr->npages; + region.hopnum = mr->pbl_hop_num; + ret = hns_roce_mtr_map(hr_dev, &mr->pbl_mtr, ®ion, 1, mr->page_list, + mr->npages); + if (ret) { + ibdev_err(ibdev, "failed to map sg mtr, ret = %d.\n", ret); + ret = 0; + } else { + mr->pbl_mtr.hem_cfg.buf_pg_shift = ilog2(ibmr->page_size); + ret = mr->npages; + } + +err_page_list: + kvfree(mr->page_list); + mr->page_list = NULL; + + return ret; } static void hns_roce_mw_free(struct hns_roce_dev *hr_dev, @@ -1564,32 +638,23 @@ int hns_roce_dealloc_mw(struct ib_mw *ibmw) return 0; } -void hns_roce_mtr_init(struct hns_roce_mtr *mtr, int bt_pg_shift, - int buf_pg_shift) -{ - hns_roce_hem_list_init(&mtr->hem_list, bt_pg_shift); - mtr->buf_pg_shift = buf_pg_shift; -} - -void hns_roce_mtr_cleanup(struct hns_roce_dev *hr_dev, - struct hns_roce_mtr *mtr) -{ - hns_roce_hem_list_release(hr_dev, &mtr->hem_list); -} - -static int hns_roce_write_mtr(struct hns_roce_dev *hr_dev, - struct hns_roce_mtr *mtr, dma_addr_t *bufs, - struct hns_roce_buf_region *r) +static int mtr_map_region(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + dma_addr_t *pages, struct hns_roce_buf_region *region) { + __le64 *mtts; int offset; int count; int npage; - u64 *mtts; + u64 addr; int end; int i; - offset = r->offset; - end = offset + r->count; + /* if hopnum is 0, buffer cannot store BAs, so skip write mtt */ + if (!region->hopnum) + return 0; + + offset = region->offset; + end = offset + region->count; npage = 0; while (offset < end) { mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list, @@ -1597,13 +662,13 @@ static int hns_roce_write_mtr(struct hns_roce_dev *hr_dev, if (!mtts) return -ENOBUFS; - /* Save page addr, low 12 bits : 0 */ for (i = 0; i < count; i++) { if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) - mtts[i] = bufs[npage] >> PAGE_ADDR_SHIFT; + addr = to_hr_hw_page_addr(pages[npage]); else - mtts[i] = bufs[npage]; + addr = pages[npage]; + mtts[i] = cpu_to_le64(addr); npage++; } offset += count; @@ -1612,69 +677,412 @@ static int hns_roce_write_mtr(struct hns_roce_dev *hr_dev, return 0; } -int hns_roce_mtr_attach(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, - dma_addr_t **bufs, struct hns_roce_buf_region *regions, - int region_cnt) +static inline bool mtr_has_mtt(struct hns_roce_buf_attr *attr) { - struct hns_roce_buf_region *r; - int ret; int i; - ret = hns_roce_hem_list_request(hr_dev, &mtr->hem_list, regions, - region_cnt); - if (ret) - return ret; + for (i = 0; i < attr->region_count; i++) + if (attr->region[i].hopnum != HNS_ROCE_HOP_NUM_0 && + attr->region[i].hopnum > 0) + return true; - for (i = 0; i < region_cnt; i++) { - r = ®ions[i]; - ret = hns_roce_write_mtr(hr_dev, mtr, bufs[i], r); + /* because the mtr only one root base address, when hopnum is 0 means + * root base address equals the first buffer address, thus all alloced + * memory must in a continuous space accessed by direct mode. + */ + return false; +} + +static inline size_t mtr_bufs_size(struct hns_roce_buf_attr *attr) +{ + size_t size = 0; + int i; + + for (i = 0; i < attr->region_count; i++) + size += attr->region[i].size; + + return size; +} + +static inline int mtr_umem_page_count(struct ib_umem *umem, int page_shift) +{ + int count = ib_umem_page_count(umem); + + if (page_shift >= PAGE_SHIFT) + count >>= page_shift - PAGE_SHIFT; + else + count <<= PAGE_SHIFT - page_shift; + + return count; +} + +static inline size_t mtr_kmem_direct_size(bool is_direct, size_t alloc_size, + int page_shift) +{ + if (is_direct) + return ALIGN(alloc_size, 1 << page_shift); + else + return HNS_HW_DIRECT_PAGE_COUNT << page_shift; +} + +/* + * check the given pages in continuous address space + * Returns 0 on success, or the error page num. + */ +static inline int mtr_check_direct_pages(dma_addr_t *pages, int page_count, + int page_shift) +{ + size_t page_size = 1 << page_shift; + int i; + + for (i = 1; i < page_count; i++) + if (pages[i] - pages[i - 1] != page_size) + return i; + + return 0; +} + +static void mtr_free_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr) +{ + /* release user buffers */ + if (mtr->umem) { + ib_umem_release(mtr->umem); + mtr->umem = NULL; + } + + /* release kernel buffers */ + if (mtr->kmem) { + hns_roce_buf_free(hr_dev, mtr->kmem); + kfree(mtr->kmem); + mtr->kmem = NULL; + } +} + +static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + struct hns_roce_buf_attr *buf_attr, bool is_direct, + struct ib_udata *udata, unsigned long user_addr) +{ + struct ib_device *ibdev = &hr_dev->ib_dev; + int max_pg_shift = buf_attr->page_shift; + int best_pg_shift = 0; + int all_pg_count = 0; + size_t direct_size; + size_t total_size; + unsigned long tmp; + int ret = 0; + + total_size = mtr_bufs_size(buf_attr); + if (total_size < 1) { + ibdev_err(ibdev, "Failed to check mtr size\n"); + return -EINVAL; + } + + if (udata) { + mtr->kmem = NULL; + mtr->umem = ib_umem_get(ibdev, user_addr, total_size, + buf_attr->user_access); + if (IS_ERR_OR_NULL(mtr->umem)) { + ibdev_err(ibdev, "Failed to get umem, ret %ld\n", + PTR_ERR(mtr->umem)); + return -ENOMEM; + } + if (buf_attr->fixed_page) { + best_pg_shift = max_pg_shift; + } else { + tmp = GENMASK(max_pg_shift, 0); + ret = ib_umem_find_best_pgsz(mtr->umem, tmp, user_addr); + best_pg_shift = (ret <= PAGE_SIZE) ? + PAGE_SHIFT : ilog2(ret); + } + all_pg_count = mtr_umem_page_count(mtr->umem, best_pg_shift); + ret = 0; + } else { + mtr->umem = NULL; + mtr->kmem = kzalloc(sizeof(*mtr->kmem), GFP_KERNEL); + if (!mtr->kmem) { + ibdev_err(ibdev, "Failed to alloc kmem\n"); + return -ENOMEM; + } + direct_size = mtr_kmem_direct_size(is_direct, total_size, + max_pg_shift); + ret = hns_roce_buf_alloc(hr_dev, total_size, direct_size, + mtr->kmem, max_pg_shift); if (ret) { - dev_err(hr_dev->dev, - "write mtr[%d/%d] err %d,offset=%d.\n", - i, region_cnt, ret, r->offset); - goto err_write; + ibdev_err(ibdev, "Failed to alloc kmem, ret %d\n", ret); + goto err_alloc_mem; + } else { + best_pg_shift = max_pg_shift; + all_pg_count = mtr->kmem->npages; } } - return 0; + /* must bigger than minimum hardware page shift */ + if (best_pg_shift < PAGE_ADDR_SHIFT || all_pg_count < 1) { + ret = -EINVAL; + ibdev_err(ibdev, "Failed to check mtr page shift %d count %d\n", + best_pg_shift, all_pg_count); + goto err_alloc_mem; + } -err_write: - hns_roce_hem_list_release(hr_dev, &mtr->hem_list); + mtr->hem_cfg.buf_pg_shift = best_pg_shift; + mtr->hem_cfg.buf_pg_count = all_pg_count; + return 0; +err_alloc_mem: + mtr_free_bufs(hr_dev, mtr); return ret; } +static int mtr_get_pages(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + dma_addr_t *pages, int count, int page_shift) +{ + struct ib_device *ibdev = &hr_dev->ib_dev; + int npage; + int err; + + if (mtr->umem) + npage = hns_roce_get_umem_bufs(hr_dev, pages, count, 0, + mtr->umem, page_shift); + else + npage = hns_roce_get_kmem_bufs(hr_dev, pages, count, 0, + mtr->kmem); + + if (mtr->hem_cfg.is_direct && npage > 1) { + err = mtr_check_direct_pages(pages, npage, page_shift); + if (err) { + ibdev_err(ibdev, "Failed to check %s direct page-%d\n", + mtr->umem ? "user" : "kernel", err); + npage = err; + } + } + + return npage; +} + +int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + struct hns_roce_buf_region *regions, int region_cnt, + dma_addr_t *pages, int page_cnt) +{ + struct ib_device *ibdev = &hr_dev->ib_dev; + struct hns_roce_buf_region *r; + int err; + int i; + + for (i = 0; i < region_cnt; i++) { + r = ®ions[i]; + if (r->offset + r->count > page_cnt) { + err = -EINVAL; + ibdev_err(ibdev, + "Failed to check mtr%d end %d + %d, max %d\n", + i, r->offset, r->count, page_cnt); + return err; + } + + err = mtr_map_region(hr_dev, mtr, &pages[r->offset], r); + if (err) { + ibdev_err(ibdev, + "Failed to map mtr%d offset %d, err %d\n", + i, r->offset, err); + return err; + } + } + + return 0; +} + int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, int offset, u64 *mtt_buf, int mtt_max, u64 *base_addr) { - u64 *mtts = mtt_buf; int mtt_count; int total = 0; - u64 *addr; + __le64 *mtts; int npage; + u64 addr; int left; - if (mtts == NULL || mtt_max < 1) + if (!mtt_buf || mtt_max < 1) goto done; + /* no mtt memory in direct mode, so just return the buffer address */ + if (mtr->hem_cfg.is_direct) { + npage = offset; + for (total = 0; total < mtt_max; total++, npage++) { + addr = mtr->hem_cfg.root_ba + + (npage << mtr->hem_cfg.buf_pg_shift); + + if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) + mtt_buf[total] = to_hr_hw_page_addr(addr); + else + mtt_buf[total] = addr; + } + + goto done; + } + left = mtt_max; while (left > 0) { mtt_count = 0; - addr = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list, + mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list, offset + total, &mtt_count, NULL); - if (!addr || !mtt_count) + if (!mtts || !mtt_count) goto done; npage = min(mtt_count, left); - memcpy(&mtts[total], addr, BA_BYTE_LEN * npage); left -= npage; - total += npage; + for (mtt_count = 0; mtt_count < npage; mtt_count++) + mtt_buf[total++] = le64_to_cpu(mtts[mtt_count]); } done: if (base_addr) - *base_addr = mtr->hem_list.root_ba; + *base_addr = mtr->hem_cfg.root_ba; return total; } + +/* convert buffer size to page index and page count */ +static int mtr_init_region(struct hns_roce_buf_attr *attr, int page_cnt, + struct hns_roce_buf_region *regions, int region_cnt, + int page_shift) +{ + unsigned int page_size = 1 << page_shift; + int max_region = attr->region_count; + struct hns_roce_buf_region *r; + int page_idx = 0; + int i = 0; + + for (; i < region_cnt && i < max_region && page_idx < page_cnt; i++) { + r = ®ions[i]; + r->hopnum = attr->region[i].hopnum == HNS_ROCE_HOP_NUM_0 ? + 0 : attr->region[i].hopnum; + r->offset = page_idx; + r->count = DIV_ROUND_UP(attr->region[i].size, page_size); + page_idx += r->count; + } + + return i; +} + +/** + * hns_roce_mtr_create - Create hns memory translate region. + * + * @mtr: memory translate region + * @init_attr: init attribute for creating mtr + * @page_shift: page shift for multi-hop base address table + * @udata: user space context, if it's NULL, means kernel space + * @user_addr: userspace virtual address to start at + * @buf_alloced: mtr has private buffer, true means need to alloc + */ +int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + struct hns_roce_buf_attr *buf_attr, int page_shift, + struct ib_udata *udata, unsigned long user_addr) +{ + struct hns_roce_buf_region regions[HNS_ROCE_MAX_BT_REGION] = {}; + struct ib_device *ibdev = &hr_dev->ib_dev; + dma_addr_t *pages = NULL; + int region_cnt = 0; + int all_pg_cnt; + int get_pg_cnt; + bool has_mtt; + int err = 0; + + has_mtt = mtr_has_mtt(buf_attr); + /* if buffer only need mtt, just init the hem cfg */ + if (buf_attr->mtt_only) { + mtr->hem_cfg.buf_pg_shift = buf_attr->page_shift; + mtr->hem_cfg.buf_pg_count = mtr_bufs_size(buf_attr) >> + buf_attr->page_shift; + mtr->umem = NULL; + mtr->kmem = NULL; + } else { + err = mtr_alloc_bufs(hr_dev, mtr, buf_attr, !has_mtt, udata, + user_addr); + if (err) { + ibdev_err(ibdev, "Failed to alloc mtr bufs, err %d\n", + err); + return err; + } + } + + /* alloc mtt memory */ + all_pg_cnt = mtr->hem_cfg.buf_pg_count; + hns_roce_hem_list_init(&mtr->hem_list); + mtr->hem_cfg.is_direct = !has_mtt; + mtr->hem_cfg.ba_pg_shift = page_shift; + if (has_mtt) { + region_cnt = mtr_init_region(buf_attr, all_pg_cnt, + regions, ARRAY_SIZE(regions), + mtr->hem_cfg.buf_pg_shift); + if (region_cnt < 1) { + err = -ENOBUFS; + ibdev_err(ibdev, "Failed to init mtr region %d\n", + region_cnt); + goto err_alloc_bufs; + } + err = hns_roce_hem_list_request(hr_dev, &mtr->hem_list, + regions, region_cnt, + page_shift); + if (err) { + ibdev_err(ibdev, "Failed to request mtr hem, err %d\n", + err); + goto err_alloc_bufs; + } + mtr->hem_cfg.root_ba = mtr->hem_list.root_ba; + } + + /* no buffer to map */ + if (buf_attr->mtt_only) + return 0; + + /* alloc a tmp array to store buffer's dma address */ + pages = kvcalloc(all_pg_cnt, sizeof(dma_addr_t), GFP_KERNEL); + if (!pages) { + err = -ENOMEM; + ibdev_err(ibdev, "Failed to alloc mtr page list %d\n", + all_pg_cnt); + goto err_alloc_hem_list; + } + + get_pg_cnt = mtr_get_pages(hr_dev, mtr, pages, all_pg_cnt, + mtr->hem_cfg.buf_pg_shift); + if (get_pg_cnt != all_pg_cnt) { + ibdev_err(ibdev, "Failed to get mtr page %d != %d\n", + get_pg_cnt, all_pg_cnt); + err = -ENOBUFS; + goto err_alloc_page_list; + } + + if (!has_mtt) { + mtr->hem_cfg.root_ba = pages[0]; + } else { + /* write buffer's dma address to BA table */ + err = hns_roce_mtr_map(hr_dev, mtr, regions, region_cnt, pages, + all_pg_cnt); + if (err) { + ibdev_err(ibdev, "Failed to map mtr pages, err %d\n", + err); + goto err_alloc_page_list; + } + } + + /* drop tmp array */ + kvfree(pages); + return 0; +err_alloc_page_list: + kvfree(pages); +err_alloc_hem_list: + hns_roce_hem_list_release(hr_dev, &mtr->hem_list); +err_alloc_bufs: + mtr_free_bufs(hr_dev, mtr); + return err; +} + +void hns_roce_mtr_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr) +{ + /* release multi-hop addressing resource */ + hns_roce_hem_list_release(hr_dev, &mtr->hem_list); + + /* free buffers */ + mtr_free_bufs(hr_dev, mtr); +} diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 6317901c4b4f..dca979d8c345 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -355,16 +355,16 @@ static void free_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) hns_roce_bitmap_free_range(&qp_table->bitmap, hr_qp->qpn, 1, BITMAP_RR); } -static int set_rq_size(struct hns_roce_dev *hr_dev, - struct ib_qp_cap *cap, bool is_user, int has_rq, - struct hns_roce_qp *hr_qp) +static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, + struct hns_roce_qp *hr_qp, int has_rq) { - u32 max_cnt; + u32 cnt; /* If srq exist, set zero for relative number of rq */ if (!has_rq) { hr_qp->rq.wqe_cnt = 0; hr_qp->rq.max_gs = 0; + hr_qp->rq_inl_buf.wqe_cnt = 0; cap->max_recv_wr = 0; cap->max_recv_sge = 0; @@ -379,17 +379,14 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, return -EINVAL; } - max_cnt = max(cap->max_recv_wr, hr_dev->caps.min_wqes); - - hr_qp->rq.wqe_cnt = roundup_pow_of_two(max_cnt); - if ((u32)hr_qp->rq.wqe_cnt > hr_dev->caps.max_wqes) { + cnt = roundup_pow_of_two(max(cap->max_recv_wr, hr_dev->caps.min_wqes)); + if (cnt > hr_dev->caps.max_wqes) { ibdev_err(&hr_dev->ib_dev, "rq depth %u too large\n", cap->max_recv_wr); return -EINVAL; } - max_cnt = max(1U, cap->max_recv_sge); - hr_qp->rq.max_gs = roundup_pow_of_two(max_cnt); + hr_qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge)); if (hr_dev->caps.max_rq_sg <= HNS_ROCE_SGE_IN_WQE) hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz); @@ -397,12 +394,61 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz * hr_qp->rq.max_gs); - cap->max_recv_wr = hr_qp->rq.wqe_cnt; + hr_qp->rq.wqe_cnt = cnt; + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) + hr_qp->rq_inl_buf.wqe_cnt = cnt; + else + hr_qp->rq_inl_buf.wqe_cnt = 0; + + cap->max_recv_wr = cnt; cap->max_recv_sge = hr_qp->rq.max_gs; return 0; } +static int set_extend_sge_param(struct hns_roce_dev *hr_dev, u32 sq_wqe_cnt, + struct hns_roce_qp *hr_qp, + struct ib_qp_cap *cap) +{ + struct ib_device *ibdev = &hr_dev->ib_dev; + u32 cnt; + + cnt = max(1U, cap->max_send_sge); + if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) { + hr_qp->sq.max_gs = roundup_pow_of_two(cnt); + hr_qp->sge.sge_cnt = 0; + + return 0; + } + + hr_qp->sq.max_gs = cnt; + + /* UD sqwqe's sge use extend sge */ + if (hr_qp->ibqp.qp_type == IB_QPT_GSI || + hr_qp->ibqp.qp_type == IB_QPT_UD) { + cnt = roundup_pow_of_two(sq_wqe_cnt * hr_qp->sq.max_gs); + } else if (hr_qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE) { + cnt = roundup_pow_of_two(sq_wqe_cnt * + (hr_qp->sq.max_gs - HNS_ROCE_SGE_IN_WQE)); + + if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08_A) { + if (cnt > hr_dev->caps.max_extend_sg) { + ibdev_err(ibdev, + "failed to check exSGE num, exSGE num = %d.\n", + cnt); + return -EINVAL; + } + } + } else { + cnt = 0; + } + + hr_qp->sge.sge_shift = HNS_ROCE_SGE_SHIFT; + hr_qp->sge.sge_cnt = cnt; + + return 0; +} + static int check_sq_size_with_integrity(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, struct hns_roce_ib_create_qp *ucmd) @@ -430,174 +476,79 @@ static int set_user_sq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, struct hns_roce_qp *hr_qp, struct hns_roce_ib_create_qp *ucmd) { - u32 ex_sge_num; - u32 page_size; - u32 max_cnt; + struct ib_device *ibdev = &hr_dev->ib_dev; + u32 cnt = 0; int ret; - if (check_shl_overflow(1, ucmd->log_sq_bb_count, &hr_qp->sq.wqe_cnt) || - hr_qp->sq.wqe_cnt > hr_dev->caps.max_wqes) + if (check_shl_overflow(1, ucmd->log_sq_bb_count, &cnt) || + cnt > hr_dev->caps.max_wqes) return -EINVAL; ret = check_sq_size_with_integrity(hr_dev, cap, ucmd); if (ret) { - ibdev_err(&hr_dev->ib_dev, "Failed to check user SQ size limit\n"); + ibdev_err(ibdev, "failed to check user SQ size, ret = %d.\n", + ret); return ret; } - hr_qp->sq.wqe_shift = ucmd->log_sq_stride; - - max_cnt = max(1U, cap->max_send_sge); - if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) - hr_qp->sq.max_gs = roundup_pow_of_two(max_cnt); - else - hr_qp->sq.max_gs = max_cnt; - - if (hr_qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE) - hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt * - (hr_qp->sq.max_gs - 2)); - - if (hr_qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE && - hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08_A) { - if (hr_qp->sge.sge_cnt > hr_dev->caps.max_extend_sg) { - ibdev_err(&hr_dev->ib_dev, - "Failed to check extended SGE size limit %d\n", - hr_qp->sge.sge_cnt); - return -EINVAL; - } - } - - hr_qp->sge.sge_shift = 4; - ex_sge_num = hr_qp->sge.sge_cnt; + ret = set_extend_sge_param(hr_dev, cnt, hr_qp, cap); + if (ret) + return ret; - /* Get buf size, SQ and RQ are aligned to page_szie */ - if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) { - hr_qp->buff_size = round_up((hr_qp->rq.wqe_cnt << - hr_qp->rq.wqe_shift), PAGE_SIZE) + - round_up((hr_qp->sq.wqe_cnt << - hr_qp->sq.wqe_shift), PAGE_SIZE); - - hr_qp->sq.offset = 0; - hr_qp->rq.offset = round_up((hr_qp->sq.wqe_cnt << - hr_qp->sq.wqe_shift), PAGE_SIZE); - } else { - page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT); - hr_qp->sge.sge_cnt = ex_sge_num ? - max(page_size / (1 << hr_qp->sge.sge_shift), ex_sge_num) : 0; - hr_qp->buff_size = round_up((hr_qp->rq.wqe_cnt << - hr_qp->rq.wqe_shift), page_size) + - round_up((hr_qp->sge.sge_cnt << - hr_qp->sge.sge_shift), page_size) + - round_up((hr_qp->sq.wqe_cnt << - hr_qp->sq.wqe_shift), page_size); - - hr_qp->sq.offset = 0; - if (ex_sge_num) { - hr_qp->sge.offset = round_up((hr_qp->sq.wqe_cnt << - hr_qp->sq.wqe_shift), - page_size); - hr_qp->rq.offset = hr_qp->sge.offset + - round_up((hr_qp->sge.sge_cnt << - hr_qp->sge.sge_shift), - page_size); - } else { - hr_qp->rq.offset = round_up((hr_qp->sq.wqe_cnt << - hr_qp->sq.wqe_shift), - page_size); - } - } + hr_qp->sq.wqe_shift = ucmd->log_sq_stride; + hr_qp->sq.wqe_cnt = cnt; return 0; } static int split_wqe_buf_region(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, - struct hns_roce_buf_region *regions, - int region_max, int page_shift) + struct hns_roce_buf_attr *buf_attr) { - int page_size = 1 << page_shift; - bool is_extend_sge; - int region_cnt = 0; int buf_size; - int buf_cnt; + int idx = 0; - if (hr_qp->buff_size < 1 || region_max < 1) - return region_cnt; + hr_qp->buff_size = 0; - if (hr_qp->sge.sge_cnt > 0) - is_extend_sge = true; - else - is_extend_sge = false; - - /* sq region */ - if (is_extend_sge) - buf_size = hr_qp->sge.offset - hr_qp->sq.offset; - else - buf_size = hr_qp->rq.offset - hr_qp->sq.offset; - - if (buf_size > 0 && region_cnt < region_max) { - buf_cnt = DIV_ROUND_UP(buf_size, page_size); - hns_roce_init_buf_region(®ions[region_cnt], - hr_dev->caps.wqe_sq_hop_num, - hr_qp->sq.offset / page_size, - buf_cnt); - region_cnt++; - } - - /* sge region */ - if (is_extend_sge) { - buf_size = hr_qp->rq.offset - hr_qp->sge.offset; - if (buf_size > 0 && region_cnt < region_max) { - buf_cnt = DIV_ROUND_UP(buf_size, page_size); - hns_roce_init_buf_region(®ions[region_cnt], - hr_dev->caps.wqe_sge_hop_num, - hr_qp->sge.offset / page_size, - buf_cnt); - region_cnt++; - } - } - - /* rq region */ - buf_size = hr_qp->buff_size - hr_qp->rq.offset; - if (buf_size > 0) { - buf_cnt = DIV_ROUND_UP(buf_size, page_size); - hns_roce_init_buf_region(®ions[region_cnt], - hr_dev->caps.wqe_rq_hop_num, - hr_qp->rq.offset / page_size, - buf_cnt); - region_cnt++; - } - - return region_cnt; -} - -static int set_extend_sge_param(struct hns_roce_dev *hr_dev, - struct hns_roce_qp *hr_qp) -{ - struct device *dev = hr_dev->dev; - - if (hr_qp->sq.max_gs > 2) { - hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt * - (hr_qp->sq.max_gs - 2)); - hr_qp->sge.sge_shift = 4; - } - - /* ud sqwqe's sge use extend sge */ - if (hr_dev->hw_rev != HNS_ROCE_HW_VER1 && - hr_qp->ibqp.qp_type == IB_QPT_GSI) { - hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt * - hr_qp->sq.max_gs); - hr_qp->sge.sge_shift = 4; - } + /* SQ WQE */ + hr_qp->sq.offset = 0; + buf_size = to_hr_hem_entries_size(hr_qp->sq.wqe_cnt, + hr_qp->sq.wqe_shift); + if (buf_size > 0 && idx < ARRAY_SIZE(buf_attr->region)) { + buf_attr->region[idx].size = buf_size; + buf_attr->region[idx].hopnum = hr_dev->caps.wqe_sq_hop_num; + idx++; + hr_qp->buff_size += buf_size; + } + + /* extend SGE WQE in SQ */ + hr_qp->sge.offset = hr_qp->buff_size; + buf_size = to_hr_hem_entries_size(hr_qp->sge.sge_cnt, + hr_qp->sge.sge_shift); + if (buf_size > 0 && idx < ARRAY_SIZE(buf_attr->region)) { + buf_attr->region[idx].size = buf_size; + buf_attr->region[idx].hopnum = hr_dev->caps.wqe_sge_hop_num; + idx++; + hr_qp->buff_size += buf_size; + } + + /* RQ WQE */ + hr_qp->rq.offset = hr_qp->buff_size; + buf_size = to_hr_hem_entries_size(hr_qp->rq.wqe_cnt, + hr_qp->rq.wqe_shift); + if (buf_size > 0 && idx < ARRAY_SIZE(buf_attr->region)) { + buf_attr->region[idx].size = buf_size; + buf_attr->region[idx].hopnum = hr_dev->caps.wqe_rq_hop_num; + idx++; + hr_qp->buff_size += buf_size; + } + + if (hr_qp->buff_size < 1) + return -EINVAL; - if (hr_qp->sq.max_gs > 2 && - hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08_A) { - if (hr_qp->sge.sge_cnt > hr_dev->caps.max_extend_sg) { - dev_err(dev, "The extended sge cnt error! sge_cnt=%d\n", - hr_qp->sge.sge_cnt); - return -EINVAL; - } - } + buf_attr->page_shift = PAGE_ADDR_SHIFT + hr_dev->caps.mtt_buf_pg_sz; + buf_attr->fixed_page = true; + buf_attr->region_count = idx; return 0; } @@ -605,62 +556,35 @@ static int set_extend_sge_param(struct hns_roce_dev *hr_dev, static int set_kernel_sq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, struct hns_roce_qp *hr_qp) { - u32 page_size; - u32 max_cnt; - int size; + struct ib_device *ibdev = &hr_dev->ib_dev; + u32 cnt; int ret; if (!cap->max_send_wr || cap->max_send_wr > hr_dev->caps.max_wqes || cap->max_send_sge > hr_dev->caps.max_sq_sg || cap->max_inline_data > hr_dev->caps.max_sq_inline) { - ibdev_err(&hr_dev->ib_dev, - "SQ WR or sge or inline data error!\n"); + ibdev_err(ibdev, + "failed to check SQ WR, SGE or inline num, ret = %d.\n", + -EINVAL); return -EINVAL; } - hr_qp->sq.wqe_shift = ilog2(hr_dev->caps.max_sq_desc_sz); - - max_cnt = max(cap->max_send_wr, hr_dev->caps.min_wqes); - - hr_qp->sq.wqe_cnt = roundup_pow_of_two(max_cnt); - if ((u32)hr_qp->sq.wqe_cnt > hr_dev->caps.max_wqes) { - ibdev_err(&hr_dev->ib_dev, - "while setting kernel sq size, sq.wqe_cnt too large\n"); + cnt = roundup_pow_of_two(max(cap->max_send_wr, hr_dev->caps.min_wqes)); + if (cnt > hr_dev->caps.max_wqes) { + ibdev_err(ibdev, "failed to check WQE num, WQE num = %d.\n", + cnt); return -EINVAL; } - /* Get data_seg numbers */ - max_cnt = max(1U, cap->max_send_sge); - if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) - hr_qp->sq.max_gs = roundup_pow_of_two(max_cnt); - else - hr_qp->sq.max_gs = max_cnt; + hr_qp->sq.wqe_shift = ilog2(hr_dev->caps.max_sq_desc_sz); + hr_qp->sq.wqe_cnt = cnt; - ret = set_extend_sge_param(hr_dev, hr_qp); - if (ret) { - ibdev_err(&hr_dev->ib_dev, "set extend sge parameters fail\n"); + ret = set_extend_sge_param(hr_dev, cnt, hr_qp, cap); + if (ret) return ret; - } - /* Get buf size, SQ and RQ are aligned to PAGE_SIZE */ - page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT); - hr_qp->sq.offset = 0; - size = round_up(hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift, page_size); - - if (hr_dev->hw_rev != HNS_ROCE_HW_VER1 && hr_qp->sge.sge_cnt) { - hr_qp->sge.sge_cnt = max(page_size/(1 << hr_qp->sge.sge_shift), - (u32)hr_qp->sge.sge_cnt); - hr_qp->sge.offset = size; - size += round_up(hr_qp->sge.sge_cnt << hr_qp->sge.sge_shift, - page_size); - } - - hr_qp->rq.offset = size; - size += round_up((hr_qp->rq.wqe_cnt << hr_qp->rq.wqe_shift), page_size); - hr_qp->buff_size = size; - - /* Get wr and sge number which send */ - cap->max_send_wr = hr_qp->sq.wqe_cnt; + /* sync the parameters of kernel QP to user's configuration */ + cap->max_send_wr = cnt; cap->max_send_sge = hr_qp->sq.max_gs; /* We don't support inline sends for kernel QPs (yet) */ @@ -691,8 +615,8 @@ static int alloc_rq_inline_buf(struct hns_roce_qp *hr_qp, struct ib_qp_init_attr *init_attr) { u32 max_recv_sge = init_attr->cap.max_recv_sge; + u32 wqe_cnt = hr_qp->rq_inl_buf.wqe_cnt; struct hns_roce_rinl_wqe *wqe_list; - u32 wqe_cnt = hr_qp->rq.wqe_cnt; int i; /* allocate recv inline buf */ @@ -714,7 +638,6 @@ static int alloc_rq_inline_buf(struct hns_roce_qp *hr_qp, wqe_list[i].sg_list = &wqe_list[0].sg_list[i * max_recv_sge]; hr_qp->rq_inl_buf.wqe_list = wqe_list; - hr_qp->rq_inl_buf.wqe_cnt = wqe_cnt; return 0; @@ -727,140 +650,55 @@ err: static void free_rq_inline_buf(struct hns_roce_qp *hr_qp) { - kfree(hr_qp->rq_inl_buf.wqe_list[0].sg_list); + if (hr_qp->rq_inl_buf.wqe_list) + kfree(hr_qp->rq_inl_buf.wqe_list[0].sg_list); kfree(hr_qp->rq_inl_buf.wqe_list); } -static int map_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, - u32 page_shift, bool is_user) -{ -/* WQE buffer include 3 parts: SQ, extend SGE and RQ. */ -#define HNS_ROCE_WQE_REGION_MAX 3 - struct hns_roce_buf_region regions[HNS_ROCE_WQE_REGION_MAX] = {}; - dma_addr_t *buf_list[HNS_ROCE_WQE_REGION_MAX] = {}; - struct ib_device *ibdev = &hr_dev->ib_dev; - struct hns_roce_buf_region *r; - int region_count; - int buf_count; - int ret; - int i; - - region_count = split_wqe_buf_region(hr_dev, hr_qp, regions, - ARRAY_SIZE(regions), page_shift); - - /* alloc a tmp list to store WQE buffers address */ - ret = hns_roce_alloc_buf_list(regions, buf_list, region_count); - if (ret) { - ibdev_err(ibdev, "Failed to alloc WQE buffer list\n"); - return ret; - } - - for (i = 0; i < region_count; i++) { - r = ®ions[i]; - if (is_user) - buf_count = hns_roce_get_umem_bufs(hr_dev, buf_list[i], - r->count, r->offset, hr_qp->umem, - page_shift); - else - buf_count = hns_roce_get_kmem_bufs(hr_dev, buf_list[i], - r->count, r->offset, &hr_qp->hr_buf); - - if (buf_count != r->count) { - ibdev_err(ibdev, "Failed to get %s WQE buf, expect %d = %d.\n", - is_user ? "user" : "kernel", - r->count, buf_count); - ret = -ENOBUFS; - goto done; - } - } - - hr_qp->wqe_bt_pg_shift = hr_dev->caps.mtt_ba_pg_sz; - hns_roce_mtr_init(&hr_qp->mtr, PAGE_SHIFT + hr_qp->wqe_bt_pg_shift, - page_shift); - ret = hns_roce_mtr_attach(hr_dev, &hr_qp->mtr, buf_list, regions, - region_count); - if (ret) - ibdev_err(ibdev, "Failed to attach WQE's mtr\n"); - - goto done; - - hns_roce_mtr_cleanup(hr_dev, &hr_qp->mtr); -done: - hns_roce_free_buf_list(buf_list, region_count); - - return ret; -} - static int alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct ib_qp_init_attr *init_attr, struct ib_udata *udata, unsigned long addr) { - u32 page_shift = PAGE_SHIFT + hr_dev->caps.mtt_buf_pg_sz; struct ib_device *ibdev = &hr_dev->ib_dev; - bool is_rq_buf_inline; + struct hns_roce_buf_attr buf_attr = {}; int ret; - is_rq_buf_inline = (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) && - hns_roce_qp_has_rq(init_attr); - if (is_rq_buf_inline) { + if (!udata && hr_qp->rq_inl_buf.wqe_cnt) { ret = alloc_rq_inline_buf(hr_qp, init_attr); if (ret) { - ibdev_err(ibdev, "Failed to alloc inline RQ buffer\n"); + ibdev_err(ibdev, + "failed to alloc inline buf, ret = %d.\n", + ret); return ret; } - } - - if (udata) { - hr_qp->umem = ib_umem_get(ibdev, addr, hr_qp->buff_size, 0); - if (IS_ERR(hr_qp->umem)) { - ret = PTR_ERR(hr_qp->umem); - goto err_inline; - } } else { - ret = hns_roce_buf_alloc(hr_dev, hr_qp->buff_size, - (1 << page_shift) * 2, - &hr_qp->hr_buf, page_shift); - if (ret) - goto err_inline; + hr_qp->rq_inl_buf.wqe_list = NULL; } - ret = map_wqe_buf(hr_dev, hr_qp, page_shift, udata); - if (ret) - goto err_alloc; + ret = split_wqe_buf_region(hr_dev, hr_qp, &buf_attr); + if (ret) { + ibdev_err(ibdev, "failed to split WQE buf, ret = %d.\n", ret); + goto err_inline; + } + ret = hns_roce_mtr_create(hr_dev, &hr_qp->mtr, &buf_attr, + PAGE_ADDR_SHIFT + hr_dev->caps.mtt_ba_pg_sz, + udata, addr); + if (ret) { + ibdev_err(ibdev, "failed to create WQE mtr, ret = %d.\n", ret); + goto err_inline; + } return 0; - err_inline: - if (is_rq_buf_inline) - free_rq_inline_buf(hr_qp); - -err_alloc: - if (udata) { - ib_umem_release(hr_qp->umem); - hr_qp->umem = NULL; - } else { - hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf); - } - - ibdev_err(ibdev, "Failed to alloc WQE buffer, ret %d.\n", ret); + free_rq_inline_buf(hr_qp); return ret; } static void free_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) { - hns_roce_mtr_cleanup(hr_dev, &hr_qp->mtr); - if (hr_qp->umem) { - ib_umem_release(hr_qp->umem); - hr_qp->umem = NULL; - } - - if (hr_qp->hr_buf.nbufs > 0) - hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf); - - if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) && - hr_qp->rq.wqe_cnt) - free_rq_inline_buf(hr_qp); + hns_roce_mtr_destroy(hr_dev, &hr_qp->mtr); + free_rq_inline_buf(hr_qp); } static inline bool user_qp_has_sdb(struct hns_roce_dev *hr_dev, @@ -912,8 +750,8 @@ static int alloc_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, "Failed to map user SQ doorbell\n"); goto err_out; } - hr_qp->sdb_en = 1; - resp->cap_flags |= HNS_ROCE_SUPPORT_SQ_RECORD_DB; + hr_qp->en_flags |= HNS_ROCE_QP_CAP_SQ_RECORD_DB; + resp->cap_flags |= HNS_ROCE_QP_CAP_SQ_RECORD_DB; } if (user_qp_has_rdb(hr_dev, init_attr, udata, resp)) { @@ -924,8 +762,8 @@ static int alloc_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, "Failed to map user RQ doorbell\n"); goto err_sdb; } - hr_qp->rdb_en = 1; - resp->cap_flags |= HNS_ROCE_SUPPORT_RQ_RECORD_DB; + hr_qp->en_flags |= HNS_ROCE_QP_CAP_RQ_RECORD_DB; + resp->cap_flags |= HNS_ROCE_QP_CAP_RQ_RECORD_DB; } } else { /* QP doorbell register address */ @@ -942,13 +780,13 @@ static int alloc_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, goto err_out; } *hr_qp->rdb.db_record = 0; - hr_qp->rdb_en = 1; + hr_qp->en_flags |= HNS_ROCE_QP_CAP_RQ_RECORD_DB; } } return 0; err_sdb: - if (udata && hr_qp->sdb_en) + if (udata && hr_qp->en_flags & HNS_ROCE_QP_CAP_SQ_RECORD_DB) hns_roce_db_unmap_user(uctx, &hr_qp->sdb); err_out: return ret; @@ -961,12 +799,12 @@ static void free_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, udata, struct hns_roce_ucontext, ibucontext); if (udata) { - if (hr_qp->rdb_en) + if (hr_qp->en_flags & HNS_ROCE_QP_CAP_RQ_RECORD_DB) hns_roce_db_unmap_user(uctx, &hr_qp->rdb); - if (hr_qp->sdb_en) + if (hr_qp->en_flags & HNS_ROCE_QP_CAP_SQ_RECORD_DB) hns_roce_db_unmap_user(uctx, &hr_qp->sdb); } else { - if (hr_qp->rdb_en) + if (hr_qp->en_flags & HNS_ROCE_QP_CAP_RQ_RECORD_DB) hns_roce_free_db(hr_dev, &hr_qp->rdb); } } @@ -1025,10 +863,11 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, else hr_qp->sq_signal_bits = IB_SIGNAL_REQ_WR; - ret = set_rq_size(hr_dev, &init_attr->cap, udata, - hns_roce_qp_has_rq(init_attr), hr_qp); + ret = set_rq_size(hr_dev, &init_attr->cap, hr_qp, + hns_roce_qp_has_rq(init_attr)); if (ret) { - ibdev_err(ibdev, "Failed to set user RQ size\n"); + ibdev_err(ibdev, "failed to set user RQ size, ret = %d.\n", + ret); return ret; } @@ -1339,10 +1178,10 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (ibqp->uobject && (attr_mask & IB_QP_STATE) && new_state == IB_QPS_ERR) { - if (hr_qp->sdb_en == 1) { + if (hr_qp->en_flags & HNS_ROCE_QP_CAP_SQ_RECORD_DB) { hr_qp->sq.head = *(int *)(hr_qp->sdb.virt_addr); - if (hr_qp->rdb_en == 1) + if (hr_qp->en_flags & HNS_ROCE_QP_CAP_RQ_RECORD_DB) hr_qp->rq.head = *(int *)(hr_qp->rdb.virt_addr); } else { ibdev_warn(&hr_dev->ib_dev, @@ -1431,10 +1270,9 @@ void hns_roce_unlock_cqs(struct hns_roce_cq *send_cq, } } -static void *get_wqe(struct hns_roce_qp *hr_qp, int offset) +static inline void *get_wqe(struct hns_roce_qp *hr_qp, int offset) { - - return hns_roce_buf_offset(&hr_qp->hr_buf, offset); + return hns_roce_buf_offset(hr_qp->mtr.kmem, offset); } void *hns_roce_get_recv_wqe(struct hns_roce_qp *hr_qp, int n) @@ -1449,8 +1287,7 @@ void *hns_roce_get_send_wqe(struct hns_roce_qp *hr_qp, int n) void *hns_roce_get_extend_sge(struct hns_roce_qp *hr_qp, int n) { - return hns_roce_buf_offset(&hr_qp->hr_buf, hr_qp->sge.offset + - (n << hr_qp->sge.sge_shift)); + return get_wqe(hr_qp, hr_qp->sge.offset + (n << hr_qp->sge.sge_shift)); } bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq, diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 5b3dd1a337d4..6e5a2adc2ab2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -77,56 +77,56 @@ static int hns_roce_hw_destroy_srq(struct hns_roce_dev *dev, HNS_ROCE_CMD_TIMEOUT_MSECS); } -static int hns_roce_srq_alloc(struct hns_roce_dev *hr_dev, u32 pdn, u32 cqn, - u16 xrcd, struct hns_roce_mtt *hr_mtt, - u64 db_rec_addr, struct hns_roce_srq *srq) +static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, + u32 pdn, u32 cqn, u16 xrcd, u64 db_rec_addr) { struct hns_roce_srq_table *srq_table = &hr_dev->srq_table; + struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_cmd_mailbox *mailbox; - dma_addr_t dma_handle_wqe; - dma_addr_t dma_handle_idx; - u64 *mtts_wqe; - u64 *mtts_idx; + u64 mtts_wqe[MTT_MIN_COUNT] = { 0 }; + u64 mtts_idx[MTT_MIN_COUNT] = { 0 }; + dma_addr_t dma_handle_wqe = 0; + dma_addr_t dma_handle_idx = 0; int ret; /* Get the physical address of srq buf */ - mtts_wqe = hns_roce_table_find(hr_dev, - &hr_dev->mr_table.mtt_srqwqe_table, - srq->mtt.first_seg, - &dma_handle_wqe); - if (!mtts_wqe) { - dev_err(hr_dev->dev, "Failed to find mtt for srq buf.\n"); - return -EINVAL; + ret = hns_roce_mtr_find(hr_dev, &srq->buf_mtr, 0, mtts_wqe, + ARRAY_SIZE(mtts_wqe), &dma_handle_wqe); + if (ret < 1) { + ibdev_err(ibdev, "Failed to find mtr for SRQ WQE\n"); + return -ENOBUFS; } /* Get physical address of idx que buf */ - mtts_idx = hns_roce_table_find(hr_dev, &hr_dev->mr_table.mtt_idx_table, - srq->idx_que.mtt.first_seg, - &dma_handle_idx); - if (!mtts_idx) { - dev_err(hr_dev->dev, - "Failed to find mtt for srq idx queue buf.\n"); - return -EINVAL; + ret = hns_roce_mtr_find(hr_dev, &srq->idx_que.mtr, 0, mtts_idx, + ARRAY_SIZE(mtts_idx), &dma_handle_idx); + if (ret < 1) { + ibdev_err(ibdev, "Failed to find mtr for SRQ idx\n"); + return -ENOBUFS; } ret = hns_roce_bitmap_alloc(&srq_table->bitmap, &srq->srqn); if (ret) { - dev_err(hr_dev->dev, - "Failed to alloc a bit from srq bitmap.\n"); + ibdev_err(ibdev, "Failed to alloc SRQ number, err %d\n", ret); return -ENOMEM; } ret = hns_roce_table_get(hr_dev, &srq_table->table, srq->srqn); - if (ret) + if (ret) { + ibdev_err(ibdev, "Failed to get SRQC table, err %d\n", ret); goto err_out; + } ret = xa_err(xa_store(&srq_table->xa, srq->srqn, srq, GFP_KERNEL)); - if (ret) + if (ret) { + ibdev_err(ibdev, "Failed to store SRQC, err %d\n", ret); goto err_put; + } mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); - if (IS_ERR(mailbox)) { - ret = PTR_ERR(mailbox); + if (IS_ERR_OR_NULL(mailbox)) { + ret = -ENOMEM; + ibdev_err(ibdev, "Failed to alloc mailbox for SRQC\n"); goto err_xa; } @@ -136,8 +136,10 @@ static int hns_roce_srq_alloc(struct hns_roce_dev *hr_dev, u32 pdn, u32 cqn, ret = hns_roce_hw_create_srq(hr_dev, mailbox, srq->srqn); hns_roce_free_cmd_mailbox(hr_dev, mailbox); - if (ret) + if (ret) { + ibdev_err(ibdev, "Failed to config SRQC, err %d\n", ret); goto err_xa; + } atomic_set(&srq->refcount, 1); init_completion(&srq->free); @@ -154,8 +156,7 @@ err_out: return ret; } -static void hns_roce_srq_free(struct hns_roce_dev *hr_dev, - struct hns_roce_srq *srq) +static void free_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) { struct hns_roce_srq_table *srq_table = &hr_dev->srq_table; int ret; @@ -175,187 +176,104 @@ static void hns_roce_srq_free(struct hns_roce_dev *hr_dev, hns_roce_bitmap_free(&srq_table->bitmap, srq->srqn, BITMAP_NO_RR); } -static int create_user_srq(struct hns_roce_srq *srq, struct ib_udata *udata, - int srq_buf_size) +static int alloc_srq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, + struct ib_udata *udata, unsigned long addr) { - struct hns_roce_dev *hr_dev = to_hr_dev(srq->ibsrq.device); - struct hns_roce_ib_create_srq ucmd; - struct hns_roce_buf *buf; - int ret; + struct ib_device *ibdev = &hr_dev->ib_dev; + struct hns_roce_buf_attr buf_attr = {}; + int err; + + srq->wqe_shift = ilog2(roundup_pow_of_two(max(HNS_ROCE_SGE_SIZE, + HNS_ROCE_SGE_SIZE * + srq->max_gs))); + + buf_attr.page_shift = hr_dev->caps.srqwqe_buf_pg_sz + PAGE_ADDR_SHIFT; + buf_attr.region[0].size = to_hr_hem_entries_size(srq->wqe_cnt, + srq->wqe_shift); + buf_attr.region[0].hopnum = hr_dev->caps.srqwqe_hop_num; + buf_attr.region_count = 1; + buf_attr.fixed_page = true; + + err = hns_roce_mtr_create(hr_dev, &srq->buf_mtr, &buf_attr, + hr_dev->caps.srqwqe_ba_pg_sz + + PAGE_ADDR_SHIFT, udata, addr); + if (err) + ibdev_err(ibdev, "Failed to alloc SRQ buf mtr, err %d\n", err); + + return err; +} - if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) - return -EFAULT; - - srq->umem = - ib_umem_get(srq->ibsrq.device, ucmd.buf_addr, srq_buf_size, 0); - if (IS_ERR(srq->umem)) - return PTR_ERR(srq->umem); - - buf = &srq->buf; - buf->npages = (ib_umem_page_count(srq->umem) + - (1 << hr_dev->caps.srqwqe_buf_pg_sz) - 1) / - (1 << hr_dev->caps.srqwqe_buf_pg_sz); - buf->page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz; - ret = hns_roce_mtt_init(hr_dev, buf->npages, buf->page_shift, - &srq->mtt); - if (ret) - goto err_user_buf; +static void free_srq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) +{ + hns_roce_mtr_destroy(hr_dev, &srq->buf_mtr); +} - ret = hns_roce_ib_umem_write_mtt(hr_dev, &srq->mtt, srq->umem); - if (ret) - goto err_user_srq_mtt; - - /* config index queue BA */ - srq->idx_que.umem = ib_umem_get(srq->ibsrq.device, ucmd.que_addr, - srq->idx_que.buf_size, 0); - if (IS_ERR(srq->idx_que.umem)) { - dev_err(hr_dev->dev, "ib_umem_get error for index queue\n"); - ret = PTR_ERR(srq->idx_que.umem); - goto err_user_srq_mtt; +static int alloc_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, + struct ib_udata *udata, unsigned long addr) +{ + struct hns_roce_idx_que *idx_que = &srq->idx_que; + struct ib_device *ibdev = &hr_dev->ib_dev; + struct hns_roce_buf_attr buf_attr = {}; + int err; + + srq->idx_que.entry_shift = ilog2(HNS_ROCE_IDX_QUE_ENTRY_SZ); + + buf_attr.page_shift = hr_dev->caps.idx_buf_pg_sz + PAGE_ADDR_SHIFT; + buf_attr.region[0].size = to_hr_hem_entries_size(srq->wqe_cnt, + srq->idx_que.entry_shift); + buf_attr.region[0].hopnum = hr_dev->caps.idx_hop_num; + buf_attr.region_count = 1; + buf_attr.fixed_page = true; + + err = hns_roce_mtr_create(hr_dev, &idx_que->mtr, &buf_attr, + hr_dev->caps.idx_ba_pg_sz + PAGE_ADDR_SHIFT, + udata, addr); + if (err) { + ibdev_err(ibdev, "Failed to alloc SRQ idx mtr, err %d\n", err); + return err; } - buf = &srq->idx_que.idx_buf; - buf->npages = DIV_ROUND_UP(ib_umem_page_count(srq->idx_que.umem), - 1 << hr_dev->caps.idx_buf_pg_sz); - buf->page_shift = PAGE_SHIFT + hr_dev->caps.idx_buf_pg_sz; - ret = hns_roce_mtt_init(hr_dev, buf->npages, buf->page_shift, - &srq->idx_que.mtt); - if (ret) { - dev_err(hr_dev->dev, "hns_roce_mtt_init error for idx que\n"); - goto err_user_idx_mtt; - } + if (!udata) { + idx_que->bitmap = bitmap_zalloc(srq->wqe_cnt, GFP_KERNEL); + if (!idx_que->bitmap) { + ibdev_err(ibdev, "Failed to alloc SRQ idx bitmap\n"); + err = -ENOMEM; + goto err_idx_mtr; + } - ret = hns_roce_ib_umem_write_mtt(hr_dev, &srq->idx_que.mtt, - srq->idx_que.umem); - if (ret) { - dev_err(hr_dev->dev, - "hns_roce_ib_umem_write_mtt error for idx que\n"); - goto err_user_idx_buf; } return 0; +err_idx_mtr: + hns_roce_mtr_destroy(hr_dev, &idx_que->mtr); -err_user_idx_buf: - hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt); - -err_user_idx_mtt: - ib_umem_release(srq->idx_que.umem); - -err_user_srq_mtt: - hns_roce_mtt_cleanup(hr_dev, &srq->mtt); - -err_user_buf: - ib_umem_release(srq->umem); - - return ret; + return err; } -static int hns_roce_create_idx_que(struct ib_pd *pd, struct hns_roce_srq *srq, - u32 page_shift) +static void free_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) { - struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); struct hns_roce_idx_que *idx_que = &srq->idx_que; - idx_que->bitmap = bitmap_zalloc(srq->wqe_cnt, GFP_KERNEL); - if (!idx_que->bitmap) - return -ENOMEM; - - idx_que->buf_size = srq->idx_que.buf_size; - - if (hns_roce_buf_alloc(hr_dev, idx_que->buf_size, (1 << page_shift) * 2, - &idx_que->idx_buf, page_shift)) { - bitmap_free(idx_que->bitmap); - return -ENOMEM; - } - - return 0; + bitmap_free(idx_que->bitmap); + idx_que->bitmap = NULL; + hns_roce_mtr_destroy(hr_dev, &idx_que->mtr); } -static int create_kernel_srq(struct hns_roce_srq *srq, int srq_buf_size) +static int alloc_srq_wrid(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) { - struct hns_roce_dev *hr_dev = to_hr_dev(srq->ibsrq.device); - u32 page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz; - int ret; - - if (hns_roce_buf_alloc(hr_dev, srq_buf_size, (1 << page_shift) * 2, - &srq->buf, page_shift)) - return -ENOMEM; - srq->head = 0; srq->tail = srq->wqe_cnt - 1; - - ret = hns_roce_mtt_init(hr_dev, srq->buf.npages, srq->buf.page_shift, - &srq->mtt); - if (ret) - goto err_kernel_buf; - - ret = hns_roce_buf_write_mtt(hr_dev, &srq->mtt, &srq->buf); - if (ret) - goto err_kernel_srq_mtt; - - page_shift = PAGE_SHIFT + hr_dev->caps.idx_buf_pg_sz; - ret = hns_roce_create_idx_que(srq->ibsrq.pd, srq, page_shift); - if (ret) { - dev_err(hr_dev->dev, "Create idx queue fail(%d)!\n", ret); - goto err_kernel_srq_mtt; - } - - /* Init mtt table for idx_que */ - ret = hns_roce_mtt_init(hr_dev, srq->idx_que.idx_buf.npages, - srq->idx_que.idx_buf.page_shift, - &srq->idx_que.mtt); - if (ret) - goto err_kernel_create_idx; - - /* Write buffer address into the mtt table */ - ret = hns_roce_buf_write_mtt(hr_dev, &srq->idx_que.mtt, - &srq->idx_que.idx_buf); - if (ret) - goto err_kernel_idx_buf; - srq->wrid = kvmalloc_array(srq->wqe_cnt, sizeof(u64), GFP_KERNEL); - if (!srq->wrid) { - ret = -ENOMEM; - goto err_kernel_idx_buf; - } + if (!srq->wrid) + return -ENOMEM; return 0; - -err_kernel_idx_buf: - hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt); - -err_kernel_create_idx: - hns_roce_buf_free(hr_dev, srq->idx_que.buf_size, - &srq->idx_que.idx_buf); - kfree(srq->idx_que.bitmap); - -err_kernel_srq_mtt: - hns_roce_mtt_cleanup(hr_dev, &srq->mtt); - -err_kernel_buf: - hns_roce_buf_free(hr_dev, srq_buf_size, &srq->buf); - - return ret; -} - -static void destroy_user_srq(struct hns_roce_dev *hr_dev, - struct hns_roce_srq *srq) -{ - hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt); - ib_umem_release(srq->idx_que.umem); - hns_roce_mtt_cleanup(hr_dev, &srq->mtt); - ib_umem_release(srq->umem); } -static void destroy_kernel_srq(struct hns_roce_dev *hr_dev, - struct hns_roce_srq *srq, int srq_buf_size) +static void free_srq_wrid(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) { - kvfree(srq->wrid); - hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt); - hns_roce_buf_free(hr_dev, srq->idx_que.buf_size, &srq->idx_que.idx_buf); - kfree(srq->idx_que.bitmap); - hns_roce_mtt_cleanup(hr_dev, &srq->mtt); - hns_roce_buf_free(hr_dev, srq_buf_size, &srq->buf); + kfree(srq->wrid); + srq->wrid = NULL; } int hns_roce_create_srq(struct ib_srq *ib_srq, @@ -365,8 +283,8 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, struct hns_roce_dev *hr_dev = to_hr_dev(ib_srq->device); struct hns_roce_ib_create_srq_resp resp = {}; struct hns_roce_srq *srq = to_hr_srq(ib_srq); - int srq_desc_size; - int srq_buf_size; + struct ib_device *ibdev = &hr_dev->ib_dev; + struct hns_roce_ib_create_srq ucmd = {}; int ret = 0; u32 cqn; @@ -381,41 +299,45 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, srq->wqe_cnt = roundup_pow_of_two(init_attr->attr.max_wr + 1); srq->max_gs = init_attr->attr.max_sge; - srq_desc_size = roundup_pow_of_two(max(HNS_ROCE_SGE_SIZE, - HNS_ROCE_SGE_SIZE * srq->max_gs)); - - srq->wqe_shift = ilog2(srq_desc_size); - - srq_buf_size = srq->wqe_cnt * srq_desc_size; - - srq->idx_que.entry_sz = HNS_ROCE_IDX_QUE_ENTRY_SZ; - srq->idx_que.buf_size = srq->wqe_cnt * srq->idx_que.entry_sz; - srq->mtt.mtt_type = MTT_TYPE_SRQWQE; - srq->idx_que.mtt.mtt_type = MTT_TYPE_IDX; - if (udata) { - ret = create_user_srq(srq, udata, srq_buf_size); + ret = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); if (ret) { - dev_err(hr_dev->dev, "Create user srq failed\n"); - goto err_srq; + ibdev_err(ibdev, "Failed to copy SRQ udata, err %d\n", + ret); + return ret; } - } else { - ret = create_kernel_srq(srq, srq_buf_size); + } + + ret = alloc_srq_buf(hr_dev, srq, udata, ucmd.buf_addr); + if (ret) { + ibdev_err(ibdev, "Failed to alloc SRQ buffer, err %d\n", ret); + return ret; + } + + ret = alloc_srq_idx(hr_dev, srq, udata, ucmd.que_addr); + if (ret) { + ibdev_err(ibdev, "Failed to alloc SRQ idx, err %d\n", ret); + goto err_buf_alloc; + } + + if (!udata) { + ret = alloc_srq_wrid(hr_dev, srq); if (ret) { - dev_err(hr_dev->dev, "Create kernel srq failed\n"); - goto err_srq; + ibdev_err(ibdev, "Failed to alloc SRQ wrid, err %d\n", + ret); + goto err_idx_alloc; } } cqn = ib_srq_has_cq(init_attr->srq_type) ? to_hr_cq(init_attr->ext.cq)->cqn : 0; - srq->db_reg_l = hr_dev->reg_base + SRQ_DB_REG; - ret = hns_roce_srq_alloc(hr_dev, to_hr_pd(ib_srq->pd)->pdn, cqn, 0, - &srq->mtt, 0, srq); - if (ret) - goto err_wrid; + ret = alloc_srqc(hr_dev, srq, to_hr_pd(ib_srq->pd)->pdn, cqn, 0, 0); + if (ret) { + ibdev_err(ibdev, "Failed to alloc SRQ context, err %d\n", ret); + goto err_wrid_alloc; + } srq->event = hns_roce_ib_srq_event; resp.srqn = srq->srqn; @@ -431,15 +353,13 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, return 0; err_srqc_alloc: - hns_roce_srq_free(hr_dev, srq); - -err_wrid: - if (udata) - destroy_user_srq(hr_dev, srq); - else - destroy_kernel_srq(hr_dev, srq, srq_buf_size); - -err_srq: + free_srqc(hr_dev, srq); +err_wrid_alloc: + free_srq_wrid(hr_dev, srq); +err_idx_alloc: + free_srq_idx(hr_dev, srq); +err_buf_alloc: + free_srq_buf(hr_dev, srq); return ret; } @@ -448,18 +368,10 @@ void hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device); struct hns_roce_srq *srq = to_hr_srq(ibsrq); - hns_roce_srq_free(hr_dev, srq); - hns_roce_mtt_cleanup(hr_dev, &srq->mtt); - - if (udata) { - hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt); - } else { - kvfree(srq->wrid); - hns_roce_buf_free(hr_dev, srq->wqe_cnt << srq->wqe_shift, - &srq->buf); - } - ib_umem_release(srq->idx_que.umem); - ib_umem_release(srq->umem); + free_srqc(hr_dev, srq); + free_srq_idx(hr_dev, srq); + free_srq_wrid(hr_dev, srq); + free_srq_buf(hr_dev, srq); } int hns_roce_init_srq_table(struct hns_roce_dev *hr_dev) diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c index 02a169f8027b..5f8f8d5c0ce0 100644 --- a/drivers/infiniband/hw/mlx4/ah.c +++ b/drivers/infiniband/hw/mlx4/ah.c @@ -141,10 +141,11 @@ static int create_iboe_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr) return 0; } -int mlx4_ib_create_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr, - u32 flags, struct ib_udata *udata) - +int mlx4_ib_create_ah(struct ib_ah *ib_ah, struct rdma_ah_init_attr *init_attr, + struct ib_udata *udata) { + struct rdma_ah_attr *ah_attr = init_attr->ah_attr; + if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) { if (!(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH)) return -EINVAL; @@ -167,12 +168,14 @@ int mlx4_ib_create_ah_slave(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, int slave_sgid_index, u8 *s_mac, u16 vlan_tag) { struct rdma_ah_attr slave_attr = *ah_attr; + struct rdma_ah_init_attr init_attr = {}; struct mlx4_ib_ah *mah = to_mah(ah); int ret; slave_attr.grh.sgid_attr = NULL; slave_attr.grh.sgid_index = slave_sgid_index; - ret = mlx4_ib_create_ah(ah, &slave_attr, 0, NULL); + init_attr.ah_attr = &slave_attr; + ret = mlx4_ib_create_ah(ah, &init_attr, NULL); if (ret) return ret; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index d188573187fa..182a237b87f7 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -752,7 +752,7 @@ int mlx4_ib_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags); void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq); void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq); -int mlx4_ib_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, u32 flags, +int mlx4_ib_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata); int mlx4_ib_create_ah_slave(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, int slave_sgid_index, u8 *s_mac, u16 vlan_tag); diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile index 228be05fbaf8..8cca61c671f8 100644 --- a/drivers/infiniband/hw/mlx5/Makefile +++ b/drivers/infiniband/hw/mlx5/Makefile @@ -16,7 +16,8 @@ mlx5_ib-y := ah.o \ qpc.o \ restrack.o \ srq.o \ - srq_cmd.o + srq_cmd.o \ + wr.o mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c index 80642dd359bc..59e5ec39b447 100644 --- a/drivers/infiniband/hw/mlx5/ah.c +++ b/drivers/infiniband/hw/mlx5/ah.c @@ -32,9 +32,28 @@ #include "mlx5_ib.h" +static __be16 mlx5_ah_get_udp_sport(const struct mlx5_ib_dev *dev, + const struct rdma_ah_attr *ah_attr) +{ + enum ib_gid_type gid_type = ah_attr->grh.sgid_attr->gid_type; + __be16 sport; + + if ((gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) && + (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) && + (ah_attr->grh.flow_label & IB_GRH_FLOWLABEL_MASK)) + sport = cpu_to_be16( + rdma_flow_label_to_udp_sport(ah_attr->grh.flow_label)); + else + sport = mlx5_get_roce_udp_sport_min(dev, + ah_attr->grh.sgid_attr); + + return sport; +} + static void create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah, - struct rdma_ah_attr *ah_attr) + struct rdma_ah_init_attr *init_attr) { + struct rdma_ah_attr *ah_attr = init_attr->ah_attr; enum ib_gid_type gid_type; if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) { @@ -51,12 +70,15 @@ static void create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah, ah->av.stat_rate_sl = (rdma_ah_get_static_rate(ah_attr) << 4); if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) { + if (init_attr->xmit_slave) + ah->xmit_port = + mlx5_lag_get_slave_port(dev->mdev, + init_attr->xmit_slave); gid_type = ah_attr->grh.sgid_attr->gid_type; memcpy(ah->av.rmac, ah_attr->roce.dmac, sizeof(ah_attr->roce.dmac)); - ah->av.udp_sport = - mlx5_get_roce_udp_sport(dev, ah_attr->grh.sgid_attr); + ah->av.udp_sport = mlx5_ah_get_udp_sport(dev, ah_attr); ah->av.stat_rate_sl |= (rdma_ah_get_sl(ah_attr) & 0x7) << 1; if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) #define MLX5_ECN_ENABLED BIT(1) @@ -68,10 +90,11 @@ static void create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah, } } -int mlx5_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr, - u32 flags, struct ib_udata *udata) +int mlx5_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, + struct ib_udata *udata) { + struct rdma_ah_attr *ah_attr = init_attr->ah_attr; struct mlx5_ib_ah *ah = to_mah(ibah); struct mlx5_ib_dev *dev = to_mdev(ibah->device); enum rdma_ah_attr_type ah_type = ah_attr->type; @@ -97,7 +120,7 @@ int mlx5_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr, return err; } - create_ib_ah(dev, ah, ah_attr); + create_ib_ah(dev, ah, init_attr); return 0; } diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c index a2fcbc49131e..cc24c711e92a 100644 --- a/drivers/infiniband/hw/mlx5/cmd.c +++ b/drivers/infiniband/hw/mlx5/cmd.c @@ -1,46 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* - * Copyright (c) 2017, Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. + * Copyright (c) 2017-2020, Mellanox Technologies inc. All rights reserved. */ #include "cmd.h" int mlx5_cmd_dump_fill_mkey(struct mlx5_core_dev *dev, u32 *mkey) { - u32 out[MLX5_ST_SZ_DW(query_special_contexts_out)] = {0}; - u32 in[MLX5_ST_SZ_DW(query_special_contexts_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(query_special_contexts_out)] = {}; + u32 in[MLX5_ST_SZ_DW(query_special_contexts_in)] = {}; int err; MLX5_SET(query_special_contexts_in, in, opcode, MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS); - err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + err = mlx5_cmd_exec_inout(dev, query_special_contexts, in, out); if (!err) *mkey = MLX5_GET(query_special_contexts_out, out, dump_fill_mkey); @@ -50,12 +23,12 @@ int mlx5_cmd_dump_fill_mkey(struct mlx5_core_dev *dev, u32 *mkey) int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey) { u32 out[MLX5_ST_SZ_DW(query_special_contexts_out)] = {}; - u32 in[MLX5_ST_SZ_DW(query_special_contexts_in)] = {}; + u32 in[MLX5_ST_SZ_DW(query_special_contexts_in)] = {}; int err; MLX5_SET(query_special_contexts_in, in, opcode, MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS); - err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + err = mlx5_cmd_exec_inout(dev, query_special_contexts, in, out); if (!err) *null_mkey = MLX5_GET(query_special_contexts_out, out, null_mkey); @@ -63,23 +36,15 @@ int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey) } int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point, - void *out, int out_size) + void *out) { - u32 in[MLX5_ST_SZ_DW(query_cong_params_in)] = { }; + u32 in[MLX5_ST_SZ_DW(query_cong_params_in)] = {}; MLX5_SET(query_cong_params_in, in, opcode, MLX5_CMD_OP_QUERY_CONG_PARAMS); MLX5_SET(query_cong_params_in, in, cong_protocol, cong_point); - return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size); -} - -int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *dev, - void *in, int in_size) -{ - u32 out[MLX5_ST_SZ_DW(modify_cong_params_out)] = { }; - - return mlx5_cmd_exec(dev, in, in_size, out, sizeof(out)); + return mlx5_cmd_exec_inout(dev, query_cong_params, in, out); } int mlx5_cmd_alloc_memic(struct mlx5_dm *dm, phys_addr_t *addr, @@ -133,7 +98,7 @@ int mlx5_cmd_alloc_memic(struct mlx5_dm *dm, phys_addr_t *addr, MLX5_SET64(alloc_memic_in, in, range_start_addr, hw_start_addr + (page_idx * PAGE_SIZE)); - ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + ret = mlx5_cmd_exec_inout(dev, alloc_memic, in, out); if (ret) { spin_lock(&dm->lock); bitmap_clear(dm->memic_alloc_pages, @@ -162,8 +127,7 @@ void mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length) struct mlx5_core_dev *dev = dm->dev; u64 hw_start_addr = MLX5_CAP64_DEV_MEM(dev, memic_bar_start_addr); u32 num_pages = DIV_ROUND_UP(length, PAGE_SIZE); - u32 out[MLX5_ST_SZ_DW(dealloc_memic_out)] = {0}; - u32 in[MLX5_ST_SZ_DW(dealloc_memic_in)] = {0}; + u32 in[MLX5_ST_SZ_DW(dealloc_memic_in)] = {}; u64 start_page_idx; int err; @@ -174,7 +138,7 @@ void mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length) MLX5_SET64(dealloc_memic_in, in, memic_start_addr, addr); MLX5_SET(dealloc_memic_in, in, memic_size, length); - err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + err = mlx5_cmd_exec_in(dev, dealloc_memic, in); if (err) return; @@ -198,49 +162,46 @@ int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out) void mlx5_cmd_destroy_tir(struct mlx5_core_dev *dev, u32 tirn, u16 uid) { - u32 in[MLX5_ST_SZ_DW(destroy_tir_in)] = {}; - u32 out[MLX5_ST_SZ_DW(destroy_tir_out)] = {}; + u32 in[MLX5_ST_SZ_DW(destroy_tir_in)] = {}; MLX5_SET(destroy_tir_in, in, opcode, MLX5_CMD_OP_DESTROY_TIR); MLX5_SET(destroy_tir_in, in, tirn, tirn); MLX5_SET(destroy_tir_in, in, uid, uid); - mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + mlx5_cmd_exec_in(dev, destroy_tir, in); } void mlx5_cmd_destroy_tis(struct mlx5_core_dev *dev, u32 tisn, u16 uid) { - u32 in[MLX5_ST_SZ_DW(destroy_tis_in)] = {0}; - u32 out[MLX5_ST_SZ_DW(destroy_tis_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(destroy_tis_in)] = {}; MLX5_SET(destroy_tis_in, in, opcode, MLX5_CMD_OP_DESTROY_TIS); MLX5_SET(destroy_tis_in, in, tisn, tisn); MLX5_SET(destroy_tis_in, in, uid, uid); - mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + mlx5_cmd_exec_in(dev, destroy_tis, in); } void mlx5_cmd_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn, u16 uid) { - u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {}; - u32 out[MLX5_ST_SZ_DW(destroy_rqt_out)] = {}; + u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {}; MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT); MLX5_SET(destroy_rqt_in, in, rqtn, rqtn); MLX5_SET(destroy_rqt_in, in, uid, uid); - mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + mlx5_cmd_exec_in(dev, destroy_rqt, in); } int mlx5_cmd_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn, u16 uid) { - u32 in[MLX5_ST_SZ_DW(alloc_transport_domain_in)] = {0}; - u32 out[MLX5_ST_SZ_DW(alloc_transport_domain_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(alloc_transport_domain_in)] = {}; + u32 out[MLX5_ST_SZ_DW(alloc_transport_domain_out)] = {}; int err; MLX5_SET(alloc_transport_domain_in, in, opcode, MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN); MLX5_SET(alloc_transport_domain_in, in, uid, uid); - err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + err = mlx5_cmd_exec_inout(dev, alloc_transport_domain, in, out); if (!err) *tdn = MLX5_GET(alloc_transport_domain_out, out, transport_domain); @@ -251,32 +212,29 @@ int mlx5_cmd_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn, void mlx5_cmd_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn, u16 uid) { - u32 in[MLX5_ST_SZ_DW(dealloc_transport_domain_in)] = {0}; - u32 out[MLX5_ST_SZ_DW(dealloc_transport_domain_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(dealloc_transport_domain_in)] = {}; MLX5_SET(dealloc_transport_domain_in, in, opcode, MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN); MLX5_SET(dealloc_transport_domain_in, in, uid, uid); MLX5_SET(dealloc_transport_domain_in, in, transport_domain, tdn); - mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + mlx5_cmd_exec_in(dev, dealloc_transport_domain, in); } void mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid) { - u32 out[MLX5_ST_SZ_DW(dealloc_pd_out)] = {}; - u32 in[MLX5_ST_SZ_DW(dealloc_pd_in)] = {}; + u32 in[MLX5_ST_SZ_DW(dealloc_pd_in)] = {}; MLX5_SET(dealloc_pd_in, in, opcode, MLX5_CMD_OP_DEALLOC_PD); MLX5_SET(dealloc_pd_in, in, pd, pdn); MLX5_SET(dealloc_pd_in, in, uid, uid); - mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + mlx5_cmd_exec_in(dev, dealloc_pd, in); } int mlx5_cmd_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn, u16 uid) { - u32 out[MLX5_ST_SZ_DW(attach_to_mcg_out)] = {}; - u32 in[MLX5_ST_SZ_DW(attach_to_mcg_in)] = {}; + u32 in[MLX5_ST_SZ_DW(attach_to_mcg_in)] = {}; void *gid; MLX5_SET(attach_to_mcg_in, in, opcode, MLX5_CMD_OP_ATTACH_TO_MCG); @@ -284,14 +242,13 @@ int mlx5_cmd_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, MLX5_SET(attach_to_mcg_in, in, uid, uid); gid = MLX5_ADDR_OF(attach_to_mcg_in, in, multicast_gid); memcpy(gid, mgid, sizeof(*mgid)); - return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + return mlx5_cmd_exec_in(dev, attach_to_mcg, in); } int mlx5_cmd_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn, u16 uid) { - u32 out[MLX5_ST_SZ_DW(detach_from_mcg_out)] = {}; - u32 in[MLX5_ST_SZ_DW(detach_from_mcg_in)] = {}; + u32 in[MLX5_ST_SZ_DW(detach_from_mcg_in)] = {}; void *gid; MLX5_SET(detach_from_mcg_in, in, opcode, MLX5_CMD_OP_DETACH_FROM_MCG); @@ -299,18 +256,18 @@ int mlx5_cmd_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, MLX5_SET(detach_from_mcg_in, in, uid, uid); gid = MLX5_ADDR_OF(detach_from_mcg_in, in, multicast_gid); memcpy(gid, mgid, sizeof(*mgid)); - return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + return mlx5_cmd_exec_in(dev, detach_from_mcg, in); } int mlx5_cmd_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn, u16 uid) { u32 out[MLX5_ST_SZ_DW(alloc_xrcd_out)] = {}; - u32 in[MLX5_ST_SZ_DW(alloc_xrcd_in)] = {}; + u32 in[MLX5_ST_SZ_DW(alloc_xrcd_in)] = {}; int err; MLX5_SET(alloc_xrcd_in, in, opcode, MLX5_CMD_OP_ALLOC_XRCD); MLX5_SET(alloc_xrcd_in, in, uid, uid); - err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + err = mlx5_cmd_exec_inout(dev, alloc_xrcd, in, out); if (!err) *xrcdn = MLX5_GET(alloc_xrcd_out, out, xrcd); return err; @@ -318,13 +275,12 @@ int mlx5_cmd_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn, u16 uid) int mlx5_cmd_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn, u16 uid) { - u32 out[MLX5_ST_SZ_DW(dealloc_xrcd_out)] = {}; - u32 in[MLX5_ST_SZ_DW(dealloc_xrcd_in)] = {}; + u32 in[MLX5_ST_SZ_DW(dealloc_xrcd_in)] = {}; MLX5_SET(dealloc_xrcd_in, in, opcode, MLX5_CMD_OP_DEALLOC_XRCD); MLX5_SET(dealloc_xrcd_in, in, xrcd, xrcdn); MLX5_SET(dealloc_xrcd_in, in, uid, uid); - return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + return mlx5_cmd_exec_in(dev, dealloc_xrcd, in); } int mlx5_cmd_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb, @@ -350,7 +306,7 @@ int mlx5_cmd_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb, data = MLX5_ADDR_OF(mad_ifc_in, in, mad); memcpy(data, inb, MLX5_FLD_SZ_BYTES(mad_ifc_in, mad)); - err = mlx5_cmd_exec(dev, in, inlen, out, outlen); + err = mlx5_cmd_exec_inout(dev, mad_ifc, in, out); if (err) goto out; diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h index 43079b18d9b4..f4d8558db434 100644 --- a/drivers/infiniband/hw/mlx5/cmd.h +++ b/drivers/infiniband/hw/mlx5/cmd.h @@ -40,10 +40,8 @@ int mlx5_cmd_dump_fill_mkey(struct mlx5_core_dev *dev, u32 *mkey); int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey); int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point, - void *out, int out_size); + void *out); int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out); -int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev, - void *in, int in_size); int mlx5_cmd_alloc_memic(struct mlx5_dm *dm, phys_addr_t *addr, u64 length, u32 alignment); void mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length); diff --git a/drivers/infiniband/hw/mlx5/cong.c b/drivers/infiniband/hw/mlx5/cong.c index de4da92b81a6..b9291e482428 100644 --- a/drivers/infiniband/hw/mlx5/cong.c +++ b/drivers/infiniband/hw/mlx5/cong.c @@ -290,7 +290,7 @@ static int mlx5_ib_get_cc_params(struct mlx5_ib_dev *dev, u8 port_num, node = mlx5_ib_param_to_node(offset); - err = mlx5_cmd_query_cong_params(mdev, node, out, outlen); + err = mlx5_cmd_query_cong_params(mdev, node, out); if (err) goto free; @@ -339,7 +339,7 @@ static int mlx5_ib_set_cc_params(struct mlx5_ib_dev *dev, u8 port_num, MLX5_SET(field_select_r_roce_rp, field, field_select_r_roce_rp, attr_mask); - err = mlx5_cmd_modify_cong_params(mdev, in, inlen); + err = mlx5_cmd_exec_in(dev->mdev, modify_cong_params, in); kvfree(in); alloc_err: mlx5_ib_put_native_port_mdev(dev, port_num + 1); diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 35b98c2d64d5..1d7feed6d3cb 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -615,7 +615,7 @@ static bool devx_is_valid_obj_id(struct uverbs_attr_bundle *attrs, enum ib_qp_type qp_type = qp->ibqp.qp_type; if (qp_type == IB_QPT_RAW_PACKET || - (qp->flags & MLX5_IB_QP_UNDERLAY)) { + (qp->flags & IB_QP_CREATE_SOURCE_QPN)) { struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp; struct mlx5_ib_rq *rq = &raw_packet_qp->rq; diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c index 69cb7e6e8955..08fd6a650868 100644 --- a/drivers/infiniband/hw/mlx5/flow.c +++ b/drivers/infiniband/hw/mlx5/flow.c @@ -142,7 +142,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( return -EINVAL; mqp = to_mqp(qp); - if (mqp->flags & MLX5_IB_QP_RSS) + if (mqp->is_rss) dest_id = mqp->rss_qp.tirn; else dest_id = mqp->raw_packet_qp.rq.tirn; diff --git a/drivers/infiniband/hw/mlx5/gsi.c b/drivers/infiniband/hw/mlx5/gsi.c index 1ae6fd95acaa..40d418153891 100644 --- a/drivers/infiniband/hw/mlx5/gsi.c +++ b/drivers/infiniband/hw/mlx5/gsi.c @@ -119,17 +119,15 @@ struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd, struct mlx5_ib_gsi_qp *gsi; struct ib_qp_init_attr hw_init_attr = *init_attr; const u8 port_num = init_attr->port_num; - const int num_pkeys = pd->device->attrs.max_pkeys; - const int num_qps = mlx5_ib_deth_sqpn_cap(dev) ? num_pkeys : 0; + int num_qps = 0; int ret; - mlx5_ib_dbg(dev, "creating GSI QP\n"); - - if (port_num > ARRAY_SIZE(dev->devr.ports) || port_num < 1) { - mlx5_ib_warn(dev, - "invalid port number %d during GSI QP creation\n", - port_num); - return ERR_PTR(-EINVAL); + if (mlx5_ib_deth_sqpn_cap(dev)) { + if (MLX5_CAP_GEN(dev->mdev, + port_type) == MLX5_CAP_PORT_TYPE_IB) + num_qps = pd->device->attrs.max_pkeys; + else if (dev->lag_active) + num_qps = MLX5_MAX_PORTS; } gsi = kzalloc(sizeof(*gsi), GFP_KERNEL); @@ -270,7 +268,7 @@ static struct ib_qp *create_gsi_ud_qp(struct mlx5_ib_gsi_qp *gsi) } static int modify_to_rts(struct mlx5_ib_gsi_qp *gsi, struct ib_qp *qp, - u16 qp_index) + u16 pkey_index) { struct mlx5_ib_dev *dev = to_mdev(qp->device); struct ib_qp_attr attr; @@ -279,7 +277,7 @@ static int modify_to_rts(struct mlx5_ib_gsi_qp *gsi, struct ib_qp *qp, mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_QKEY | IB_QP_PORT; attr.qp_state = IB_QPS_INIT; - attr.pkey_index = qp_index; + attr.pkey_index = pkey_index; attr.qkey = IB_QP1_QKEY; attr.port_num = gsi->port_num; ret = ib_modify_qp(qp, &attr, mask); @@ -313,12 +311,17 @@ static void setup_qp(struct mlx5_ib_gsi_qp *gsi, u16 qp_index) { struct ib_device *device = gsi->rx_qp->device; struct mlx5_ib_dev *dev = to_mdev(device); + int pkey_index = qp_index; + struct mlx5_ib_qp *mqp; struct ib_qp *qp; unsigned long flags; u16 pkey; int ret; - ret = ib_query_pkey(device, gsi->port_num, qp_index, &pkey); + if (MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_IB) + pkey_index = 0; + + ret = ib_query_pkey(device, gsi->port_num, pkey_index, &pkey); if (ret) { mlx5_ib_warn(dev, "unable to read P_Key at port %d, index %d\n", gsi->port_num, qp_index); @@ -347,7 +350,10 @@ static void setup_qp(struct mlx5_ib_gsi_qp *gsi, u16 qp_index) return; } - ret = modify_to_rts(gsi, qp, qp_index); + mqp = to_mqp(qp); + if (dev->lag_active) + mqp->gsi_lag_port = qp_index + 1; + ret = modify_to_rts(gsi, qp, pkey_index); if (ret) goto err_destroy_qp; @@ -466,11 +472,15 @@ static int mlx5_ib_gsi_silent_drop(struct mlx5_ib_gsi_qp *gsi, static struct ib_qp *get_tx_qp(struct mlx5_ib_gsi_qp *gsi, struct ib_ud_wr *wr) { struct mlx5_ib_dev *dev = to_mdev(gsi->rx_qp->device); + struct mlx5_ib_ah *ah = to_mah(wr->ah); int qp_index = wr->pkey_index; - if (!mlx5_ib_deth_sqpn_cap(dev)) + if (!gsi->num_qps) return gsi->rx_qp; + if (dev->lag_active && ah->xmit_port) + qp_index = ah->xmit_port - 1; + if (qp_index >= gsi->num_qps) return NULL; diff --git a/drivers/infiniband/hw/mlx5/ib_rep.h b/drivers/infiniband/hw/mlx5/ib_rep.h index 3b6750cba796..5b30d3fa8f8d 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.h +++ b/drivers/infiniband/hw/mlx5/ib_rep.h @@ -9,9 +9,9 @@ #include <linux/mlx5/eswitch.h> #include "mlx5_ib.h" -#ifdef CONFIG_MLX5_ESWITCH extern const struct mlx5_ib_profile raw_eth_profile; +#ifdef CONFIG_MLX5_ESWITCH u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw); struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw, u16 vport_num); diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 566b42f3fb18..3af57dfe7224 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -53,6 +53,7 @@ #include <linux/list.h> #include <rdma/ib_smi.h> #include <rdma/ib_umem.h> +#include <rdma/lag.h> #include <linux/in.h> #include <linux/etherdevice.h> #include "mlx5_ib.h" @@ -60,6 +61,7 @@ #include "cmd.h" #include "srq.h" #include "qp.h" +#include "wr.h" #include <linux/mlx5/fs_helpers.h> #include <linux/mlx5/accel.h> #include <rdma/uverbs_std_types.h> @@ -628,8 +630,8 @@ static int mlx5_ib_del_gid(const struct ib_gid_attr *attr, attr->index, NULL, NULL); } -__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, - const struct ib_gid_attr *attr) +__be16 mlx5_get_roce_udp_sport_min(const struct mlx5_ib_dev *dev, + const struct ib_gid_attr *attr) { if (attr->gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP) return 0; @@ -2561,7 +2563,7 @@ static int mlx5_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) struct mlx5_ib_alloc_pd_resp resp; int err; u32 out[MLX5_ST_SZ_DW(alloc_pd_out)] = {}; - u32 in[MLX5_ST_SZ_DW(alloc_pd_in)] = {}; + u32 in[MLX5_ST_SZ_DW(alloc_pd_in)] = {}; u16 uid = 0; struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context( udata, struct mlx5_ib_ucontext, ibucontext); @@ -2569,8 +2571,7 @@ static int mlx5_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) uid = context ? context->devx_uid : 0; MLX5_SET(alloc_pd_in, in, opcode, MLX5_CMD_OP_ALLOC_PD); MLX5_SET(alloc_pd_in, in, uid, uid); - err = mlx5_cmd_exec(to_mdev(ibdev)->mdev, in, sizeof(in), - out, sizeof(out)); + err = mlx5_cmd_exec_inout(to_mdev(ibdev)->mdev, alloc_pd, in, out); if (err) return err; @@ -3944,7 +3945,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, dst->type = MLX5_FLOW_DESTINATION_TYPE_PORT; } else { dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR; - if (mqp->flags & MLX5_IB_QP_RSS) + if (mqp->is_rss) dst->tir_num = mqp->rss_qp.tirn; else dst->tir_num = mqp->raw_packet_qp.rq.tirn; @@ -4420,7 +4421,7 @@ static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) uid = ibqp->pd ? to_mpd(ibqp->pd)->uid : 0; - if (mqp->flags & MLX5_IB_QP_UNDERLAY) { + if (mqp->flags & IB_QP_CREATE_SOURCE_QPN) { mlx5_ib_dbg(dev, "Attaching a multi cast group to underlay QP is not supported\n"); return -EOPNOTSUPP; } @@ -6540,6 +6541,7 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) dev->ib_dev.phys_port_cnt = dev->num_ports; dev->ib_dev.num_comp_vectors = mlx5_comp_vectors_count(mdev); dev->ib_dev.dev.parent = mdev->device; + dev->ib_dev.lag_flags = RDMA_LAG_FLAGS_HASH_ALL_SLAVES; mutex_init(&dev->cap_mask_mutex); INIT_LIST_HEAD(&dev->qp_list); @@ -6629,8 +6631,8 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .modify_qp = mlx5_ib_modify_qp, .modify_srq = mlx5_ib_modify_srq, .poll_cq = mlx5_ib_poll_cq, - .post_recv = mlx5_ib_post_recv, - .post_send = mlx5_ib_post_send, + .post_recv = mlx5_ib_post_recv_nodrain, + .post_send = mlx5_ib_post_send_nodrain, .post_srq_recv = mlx5_ib_post_srq_recv, .process_mad = mlx5_ib_process_mad, .query_ah = mlx5_ib_query_ah, @@ -7131,6 +7133,8 @@ void *__mlx5_ib_add(struct mlx5_ib_dev *dev, int err; int i; + dev->profile = profile; + for (i = 0; i < MLX5_IB_STAGE_MAX; i++) { if (profile->stage[i].init) { err = profile->stage[i].init(dev); @@ -7139,7 +7143,6 @@ void *__mlx5_ib_add(struct mlx5_ib_dev *dev, } } - dev->profile = profile; dev->ib_active = true; return dev; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index aaabb8a98eed..482b54eb9764 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -337,7 +337,6 @@ struct mlx5_ib_rwq { struct ib_umem *umem; size_t buf_size; unsigned int page_shift; - int create_type; struct mlx5_db db; u32 user_index; u32 wqe_count; @@ -346,17 +345,6 @@ struct mlx5_ib_rwq { u32 create_flags; /* Use enum mlx5_ib_wq_flags */ }; -enum { - MLX5_QP_USER, - MLX5_QP_KERNEL, - MLX5_QP_EMPTY -}; - -enum { - MLX5_WQ_USER, - MLX5_WQ_KERNEL -}; - struct mlx5_ib_rwq_ind_table { struct ib_rwq_ind_table ib_rwq_ind_tbl; u32 rqtn; @@ -443,34 +431,37 @@ struct mlx5_ib_qp { /* serialize qp state modifications */ struct mutex mutex; + /* cached variant of create_flags from struct ib_qp_init_attr */ u32 flags; u8 port; u8 state; - int wq_sig; - int scat_cqe; int max_inline_data; struct mlx5_bf bf; - int has_rq; + u8 has_rq:1; + u8 is_rss:1; /* only for user space QPs. For kernel * we have it from the bf object */ int bfregn; - int create_type; - struct list_head qps_list; struct list_head cq_recv_list; struct list_head cq_send_list; struct mlx5_rate_limit rl; u32 underlay_qpn; u32 flags_en; - /* storage for qp sub type when core qp type is IB_QPT_DRIVER */ - enum ib_qp_type qp_sub_type; + /* + * IB/core doesn't store low-level QP types, so + * store both MLX and IBTA types in the field below. + * IB_QPT_DRIVER will be break to DCI/DCT subtypes. + */ + enum ib_qp_type type; /* A flag to indicate if there's a new counter is configured * but not take effective */ u32 counter_pending; + u16 gsi_lag_port; }; struct mlx5_ib_cq_buf { @@ -481,24 +472,6 @@ struct mlx5_ib_cq_buf { int nent; }; -enum mlx5_ib_qp_flags { - MLX5_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO, - MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK, - MLX5_IB_QP_CROSS_CHANNEL = IB_QP_CREATE_CROSS_CHANNEL, - MLX5_IB_QP_MANAGED_SEND = IB_QP_CREATE_MANAGED_SEND, - MLX5_IB_QP_MANAGED_RECV = IB_QP_CREATE_MANAGED_RECV, - MLX5_IB_QP_SIGNATURE_HANDLING = 1 << 5, - /* QP uses 1 as its source QP number */ - MLX5_IB_QP_SQPN_QP1 = 1 << 6, - MLX5_IB_QP_CAP_SCATTER_FCS = 1 << 7, - MLX5_IB_QP_RSS = 1 << 8, - MLX5_IB_QP_CVLAN_STRIPPING = 1 << 9, - MLX5_IB_QP_UNDERLAY = 1 << 10, - MLX5_IB_QP_PCI_WRITE_END_PADDING = 1 << 11, - MLX5_IB_QP_TUNNEL_OFFLOAD = 1 << 12, - MLX5_IB_QP_PACKET_BASED_CREDIT = 1 << 13, -}; - struct mlx5_umr_wr { struct ib_send_wr wr; u64 virt_addr; @@ -1181,7 +1154,7 @@ void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db) void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq); void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq); void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index); -int mlx5_ib_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, u32 flags, +int mlx5_ib_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata); int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); void mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags); @@ -1205,10 +1178,6 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr int mlx5_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata); void mlx5_ib_drain_sq(struct ib_qp *qp); void mlx5_ib_drain_rq(struct ib_qp *qp); -int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, - const struct ib_send_wr **bad_wr); -int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, - const struct ib_recv_wr **bad_wr); int mlx5_ib_read_wqe_sq(struct mlx5_ib_qp *qp, int wqe_index, void *buffer, size_t buflen, size_t *bc); int mlx5_ib_read_wqe_rq(struct mlx5_ib_qp *qp, int wqe_index, void *buffer, @@ -1383,8 +1352,8 @@ int mlx5_ib_get_vf_guid(struct ib_device *device, int vf, u8 port, int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid, int type); -__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, - const struct ib_gid_attr *attr); +__be16 mlx5_get_roce_udp_sport_min(const struct mlx5_ib_dev *dev, + const struct ib_gid_attr *attr); void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num); void mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num); diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 16af1105cfcf..7d2ec9ee5097 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -447,8 +447,7 @@ static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev, { int wq_num = pfault->event_subtype == MLX5_PFAULT_SUBTYPE_WQE ? pfault->wqe.wq_num : pfault->token; - u32 out[MLX5_ST_SZ_DW(page_fault_resume_out)] = { }; - u32 in[MLX5_ST_SZ_DW(page_fault_resume_in)] = { }; + u32 in[MLX5_ST_SZ_DW(page_fault_resume_in)] = {}; int err; MLX5_SET(page_fault_resume_in, in, opcode, MLX5_CMD_OP_PAGE_FAULT_RESUME); @@ -457,7 +456,7 @@ static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev, MLX5_SET(page_fault_resume_in, in, wq_number, wq_num); MLX5_SET(page_fault_resume_in, in, error, !!error); - err = mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); + err = mlx5_cmd_exec_in(dev->mdev, page_fault_resume, in); if (err) mlx5_ib_err(dev, "Failed to resolve the page fault on WQ 0x%x err %d\n", wq_num, err); @@ -1136,8 +1135,7 @@ static int mlx5_ib_mr_initiator_pfault_handler( if (qp->ibqp.qp_type == IB_QPT_XRC_INI) *wqe += sizeof(struct mlx5_wqe_xrc_seg); - if (qp->ibqp.qp_type == IB_QPT_UD || - qp->qp_sub_type == MLX5_IB_QPT_DCI) { + if (qp->type == IB_QPT_UD || qp->type == MLX5_IB_QPT_DCI) { av = *wqe; if (av->dqp_dct & cpu_to_be32(MLX5_EXTENDED_UD_AV)) *wqe += sizeof(struct mlx5_av); @@ -1190,7 +1188,7 @@ static int mlx5_ib_mr_responder_pfault_handler_rq(struct mlx5_ib_dev *dev, struct mlx5_ib_wq *wq = &qp->rq; int wqe_size = 1 << wq->wqe_shift; - if (qp->wq_sig) { + if (qp->flags_en & MLX5_QP_FLAG_SIGNATURE) { mlx5_ib_err(dev, "ODP fault with WQE signatures is not supported\n"); return -EFAULT; } diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index af599c8b88aa..c571b7a97f10 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -40,9 +40,7 @@ #include "ib_rep.h" #include "cmd.h" #include "qp.h" - -/* not supported currently */ -static int wq_signature; +#include "wr.h" enum { MLX5_IB_ACK_REQ_FREQ = 8, @@ -55,32 +53,6 @@ enum { MLX5_IB_LINK_TYPE_ETH = 1 }; -enum { - MLX5_IB_SQ_STRIDE = 6, - MLX5_IB_SQ_UMR_INLINE_THRESHOLD = 64, -}; - -static const u32 mlx5_ib_opcode[] = { - [IB_WR_SEND] = MLX5_OPCODE_SEND, - [IB_WR_LSO] = MLX5_OPCODE_LSO, - [IB_WR_SEND_WITH_IMM] = MLX5_OPCODE_SEND_IMM, - [IB_WR_RDMA_WRITE] = MLX5_OPCODE_RDMA_WRITE, - [IB_WR_RDMA_WRITE_WITH_IMM] = MLX5_OPCODE_RDMA_WRITE_IMM, - [IB_WR_RDMA_READ] = MLX5_OPCODE_RDMA_READ, - [IB_WR_ATOMIC_CMP_AND_SWP] = MLX5_OPCODE_ATOMIC_CS, - [IB_WR_ATOMIC_FETCH_AND_ADD] = MLX5_OPCODE_ATOMIC_FA, - [IB_WR_SEND_WITH_INV] = MLX5_OPCODE_SEND_INVAL, - [IB_WR_LOCAL_INV] = MLX5_OPCODE_UMR, - [IB_WR_REG_MR] = MLX5_OPCODE_UMR, - [IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = MLX5_OPCODE_ATOMIC_MASKED_CS, - [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = MLX5_OPCODE_ATOMIC_MASKED_FA, - [MLX5_IB_WR_UMR] = MLX5_OPCODE_UMR, -}; - -struct mlx5_wqe_eth_pad { - u8 rsvd0[16]; -}; - enum raw_qp_set_mask_map { MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID = 1UL << 0, MLX5_RAW_QP_RATE_LIMIT = 1UL << 1, @@ -392,17 +364,26 @@ static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap, cap->max_recv_wr = 0; cap->max_recv_sge = 0; } else { + int wq_sig = !!(qp->flags_en & MLX5_QP_FLAG_SIGNATURE); + if (ucmd) { qp->rq.wqe_cnt = ucmd->rq_wqe_count; if (ucmd->rq_wqe_shift > BITS_PER_BYTE * sizeof(ucmd->rq_wqe_shift)) return -EINVAL; qp->rq.wqe_shift = ucmd->rq_wqe_shift; - if ((1 << qp->rq.wqe_shift) / sizeof(struct mlx5_wqe_data_seg) < qp->wq_sig) + if ((1 << qp->rq.wqe_shift) / + sizeof(struct mlx5_wqe_data_seg) < + wq_sig) return -EINVAL; - qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof(struct mlx5_wqe_data_seg) - qp->wq_sig; + qp->rq.max_gs = + (1 << qp->rq.wqe_shift) / + sizeof(struct mlx5_wqe_data_seg) - + wq_sig; qp->rq.max_post = qp->rq.wqe_cnt; } else { - wqe_size = qp->wq_sig ? sizeof(struct mlx5_wqe_signature_seg) : 0; + wqe_size = + wq_sig ? sizeof(struct mlx5_wqe_signature_seg) : + 0; wqe_size += cap->max_recv_sge * sizeof(struct mlx5_wqe_data_seg); wqe_size = roundup_pow_of_two(wqe_size); wq_size = roundup_pow_of_two(cap->max_recv_wr) * wqe_size; @@ -416,7 +397,10 @@ static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap, return -EINVAL; } qp->rq.wqe_shift = ilog2(wqe_size); - qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof(struct mlx5_wqe_data_seg) - qp->wq_sig; + qp->rq.max_gs = + (1 << qp->rq.wqe_shift) / + sizeof(struct mlx5_wqe_data_seg) - + wq_sig; qp->rq.max_post = qp->rq.wqe_cnt; } } @@ -596,7 +580,7 @@ static int set_user_buf_size(struct mlx5_ib_dev *dev, } if (attr->qp_type == IB_QPT_RAW_PACKET || - qp->flags & MLX5_IB_QP_UNDERLAY) { + qp->flags & IB_QP_CREATE_SOURCE_QPN) { base->ubuffer.buf_size = qp->rq.wqe_cnt << qp->rq.wqe_shift; qp->raw_packet_qp.sq.ubuffer.buf_size = qp->sq.wqe_cnt << 6; } else { @@ -751,10 +735,7 @@ static int to_mlx5_st(enum ib_qp_type type) case IB_QPT_SMI: return MLX5_QP_ST_QP0; case MLX5_IB_QPT_HW_GSI: return MLX5_QP_ST_QP1; case MLX5_IB_QPT_DCI: return MLX5_QP_ST_DCI; - case IB_QPT_RAW_IPV6: return MLX5_QP_ST_RAW_IPV6; - case IB_QPT_RAW_PACKET: - case IB_QPT_RAW_ETHERTYPE: return MLX5_QP_ST_RAW_ETHERTYPE; - case IB_QPT_MAX: + case IB_QPT_RAW_PACKET: return MLX5_QP_ST_RAW_ETHERTYPE; default: return -EINVAL; } } @@ -891,7 +872,6 @@ static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, goto err_umem; } - rwq->create_type = MLX5_WQ_USER; return 0; err_umem: @@ -906,15 +886,14 @@ static int adjust_bfregn(struct mlx5_ib_dev *dev, bfregn % MLX5_NON_FP_BFREGS_PER_UAR; } -static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, - struct mlx5_ib_qp *qp, struct ib_udata *udata, - struct ib_qp_init_attr *attr, - u32 **in, - struct mlx5_ib_create_qp_resp *resp, int *inlen, - struct mlx5_ib_qp_base *base) +static int _create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, + struct mlx5_ib_qp *qp, struct ib_udata *udata, + struct ib_qp_init_attr *attr, u32 **in, + struct mlx5_ib_create_qp_resp *resp, int *inlen, + struct mlx5_ib_qp_base *base, + struct mlx5_ib_create_qp *ucmd) { struct mlx5_ib_ucontext *context; - struct mlx5_ib_create_qp ucmd; struct mlx5_ib_ubuffer *ubuffer = &base->ubuffer; int page_shift = 0; int uar_index = 0; @@ -928,30 +907,24 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, u16 uid; u32 uar_flags; - err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); - if (err) { - mlx5_ib_dbg(dev, "copy failed\n"); - return err; - } - context = rdma_udata_to_drv_context(udata, struct mlx5_ib_ucontext, ibucontext); - uar_flags = ucmd.flags & (MLX5_QP_FLAG_UAR_PAGE_INDEX | - MLX5_QP_FLAG_BFREG_INDEX); + uar_flags = qp->flags_en & + (MLX5_QP_FLAG_UAR_PAGE_INDEX | MLX5_QP_FLAG_BFREG_INDEX); switch (uar_flags) { case MLX5_QP_FLAG_UAR_PAGE_INDEX: - uar_index = ucmd.bfreg_index; + uar_index = ucmd->bfreg_index; bfregn = MLX5_IB_INVALID_BFREG; break; case MLX5_QP_FLAG_BFREG_INDEX: uar_index = bfregn_to_uar_index(dev, &context->bfregi, - ucmd.bfreg_index, true); + ucmd->bfreg_index, true); if (uar_index < 0) return uar_index; bfregn = MLX5_IB_INVALID_BFREG; break; case 0: - if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL) + if (qp->flags & IB_QP_CREATE_CROSS_CHANNEL) return -EINVAL; bfregn = alloc_bfreg(dev, &context->bfregi); if (bfregn < 0) @@ -970,12 +943,12 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB); qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift; - err = set_user_buf_size(dev, qp, &ucmd, base, attr); + err = set_user_buf_size(dev, qp, ucmd, base, attr); if (err) goto err_bfreg; - if (ucmd.buf_addr && ubuffer->buf_size) { - ubuffer->buf_addr = ucmd.buf_addr; + if (ucmd->buf_addr && ubuffer->buf_size) { + ubuffer->buf_addr = ucmd->buf_addr; err = mlx5_ib_umem_get(dev, udata, ubuffer->buf_addr, ubuffer->buf_size, &ubuffer->umem, &npages, &page_shift, &ncont, &offset); @@ -993,8 +966,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, goto err_umem; } - uid = (attr->qp_type != IB_QPT_XRC_TGT && - attr->qp_type != IB_QPT_XRC_INI) ? to_mpd(pd)->uid : 0; + uid = (attr->qp_type != IB_QPT_XRC_INI) ? to_mpd(pd)->uid : 0; MLX5_SET(create_qp_in, *in, uid, uid); pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas); if (ubuffer->umem) @@ -1012,24 +984,14 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, resp->bfreg_index = MLX5_IB_INVALID_BFREG; qp->bfregn = bfregn; - err = mlx5_ib_db_map_user(context, udata, ucmd.db_addr, &qp->db); + err = mlx5_ib_db_map_user(context, udata, ucmd->db_addr, &qp->db); if (err) { mlx5_ib_dbg(dev, "map failed\n"); goto err_free; } - err = ib_copy_to_udata(udata, resp, min(udata->outlen, sizeof(*resp))); - if (err) { - mlx5_ib_dbg(dev, "copy failed\n"); - goto err_unmap; - } - qp->create_type = MLX5_QP_USER; - return 0; -err_unmap: - mlx5_ib_db_unmap_user(context, &qp->db); - err_free: kvfree(*in); @@ -1042,72 +1004,50 @@ err_bfreg: return err; } -static void destroy_qp_user(struct mlx5_ib_dev *dev, struct ib_pd *pd, - struct mlx5_ib_qp *qp, struct mlx5_ib_qp_base *base, - struct ib_udata *udata) +static void destroy_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, + struct mlx5_ib_qp_base *base, struct ib_udata *udata) { - struct mlx5_ib_ucontext *context = - rdma_udata_to_drv_context( - udata, - struct mlx5_ib_ucontext, - ibucontext); - - mlx5_ib_db_unmap_user(context, &qp->db); - ib_umem_release(base->ubuffer.umem); - - /* - * Free only the BFREGs which are handled by the kernel. - * BFREGs of UARs allocated dynamically are handled by user. - */ - if (qp->bfregn != MLX5_IB_INVALID_BFREG) - mlx5_ib_free_bfreg(dev, &context->bfregi, qp->bfregn); -} + struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context( + udata, struct mlx5_ib_ucontext, ibucontext); -/* get_sq_edge - Get the next nearby edge. - * - * An 'edge' is defined as the first following address after the end - * of the fragment or the SQ. Accordingly, during the WQE construction - * which repetitively increases the pointer to write the next data, it - * simply should check if it gets to an edge. - * - * @sq - SQ buffer. - * @idx - Stride index in the SQ buffer. - * - * Return: - * The new edge. - */ -static void *get_sq_edge(struct mlx5_ib_wq *sq, u32 idx) -{ - void *fragment_end; + if (udata) { + /* User QP */ + mlx5_ib_db_unmap_user(context, &qp->db); + ib_umem_release(base->ubuffer.umem); - fragment_end = mlx5_frag_buf_get_wqe - (&sq->fbc, - mlx5_frag_buf_get_idx_last_contig_stride(&sq->fbc, idx)); + /* + * Free only the BFREGs which are handled by the kernel. + * BFREGs of UARs allocated dynamically are handled by user. + */ + if (qp->bfregn != MLX5_IB_INVALID_BFREG) + mlx5_ib_free_bfreg(dev, &context->bfregi, qp->bfregn); + return; + } - return fragment_end + MLX5_SEND_WQE_BB; + /* Kernel QP */ + kvfree(qp->sq.wqe_head); + kvfree(qp->sq.w_list); + kvfree(qp->sq.wrid); + kvfree(qp->sq.wr_data); + kvfree(qp->rq.wrid); + if (qp->db.db) + mlx5_db_free(dev->mdev, &qp->db); + if (qp->buf.frags) + mlx5_frag_buf_free(dev->mdev, &qp->buf); } -static int create_kernel_qp(struct mlx5_ib_dev *dev, - struct ib_qp_init_attr *init_attr, - struct mlx5_ib_qp *qp, - u32 **in, int *inlen, - struct mlx5_ib_qp_base *base) +static int _create_kernel_qp(struct mlx5_ib_dev *dev, + struct ib_qp_init_attr *init_attr, + struct mlx5_ib_qp *qp, u32 **in, int *inlen, + struct mlx5_ib_qp_base *base) { int uar_index; void *qpc; int err; - if (init_attr->create_flags & ~(IB_QP_CREATE_INTEGRITY_EN | - IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK | - IB_QP_CREATE_IPOIB_UD_LSO | - IB_QP_CREATE_NETIF_QP | - MLX5_IB_QP_CREATE_SQPN_QP1 | - MLX5_IB_QP_CREATE_WC_TEST)) - return -EINVAL; - if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR) qp->bf.bfreg = &dev->fp_bfreg; - else if (init_attr->create_flags & MLX5_IB_QP_CREATE_WC_TEST) + else if (qp->flags & MLX5_IB_QP_CREATE_WC_TEST) qp->bf.bfreg = &dev->wc_bfreg; else qp->bf.bfreg = &dev->bfreg; @@ -1167,10 +1107,8 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, MLX5_SET(qpc, qpc, fre, 1); MLX5_SET(qpc, qpc, rlky, 1); - if (init_attr->create_flags & MLX5_IB_QP_CREATE_SQPN_QP1) { + if (qp->flags & MLX5_IB_QP_CREATE_SQPN_QP1) MLX5_SET(qpc, qpc, deth_sqpn, 1); - qp->flags |= MLX5_IB_QP_SQPN_QP1; - } mlx5_fill_page_frag_array(&qp->buf, (__be64 *)MLX5_ADDR_OF(create_qp_in, @@ -1198,7 +1136,6 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, err = -ENOMEM; goto err_wrid; } - qp->create_type = MLX5_QP_KERNEL; return 0; @@ -1218,36 +1155,15 @@ err_buf: return err; } -static void destroy_qp_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) -{ - kvfree(qp->sq.wqe_head); - kvfree(qp->sq.w_list); - kvfree(qp->sq.wrid); - kvfree(qp->sq.wr_data); - kvfree(qp->rq.wrid); - mlx5_db_free(dev->mdev, &qp->db); - mlx5_frag_buf_free(dev->mdev, &qp->buf); -} - static u32 get_rx_type(struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr) { - if (attr->srq || (attr->qp_type == IB_QPT_XRC_TGT) || - (attr->qp_type == MLX5_IB_QPT_DCI) || - (attr->qp_type == IB_QPT_XRC_INI)) + if (attr->srq || (qp->type == IB_QPT_XRC_TGT) || + (qp->type == MLX5_IB_QPT_DCI) || (qp->type == IB_QPT_XRC_INI)) return MLX5_SRQ_RQ; else if (!qp->has_rq) return MLX5_ZERO_LEN_RQ; - else - return MLX5_NON_ZERO_RQ; -} - -static int is_connected(enum ib_qp_type qp_type) -{ - if (qp_type == IB_QPT_RC || qp_type == IB_QPT_UC || - qp_type == MLX5_IB_QPT_DCI) - return 1; - return 0; + return MLX5_NON_ZERO_RQ; } static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev, @@ -1260,7 +1176,7 @@ static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev, MLX5_SET(create_tis_in, in, uid, to_mpd(pd)->uid); MLX5_SET(tisc, tisc, transport_domain, tdn); - if (qp->flags & MLX5_IB_QP_UNDERLAY) + if (qp->flags & IB_QP_CREATE_SOURCE_QPN) MLX5_SET(tisc, tisc, underlay_qpn, qp->underlay_qpn); return mlx5_core_create_tis(dev->mdev, in, &sq->tisn); @@ -1409,7 +1325,7 @@ static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev, MLX5_SET(rqc, rqc, user_index, MLX5_GET(qpc, qpc, user_index)); MLX5_SET(rqc, rqc, cqn, MLX5_GET(qpc, qpc, cqn_rcv)); - if (mqp->flags & MLX5_IB_QP_CAP_SCATTER_FCS) + if (mqp->flags & IB_QP_CREATE_SCATTER_FCS) MLX5_SET(rqc, rqc, scatter_fcs, 1); wq = MLX5_ADDR_OF(rqc, rqc, wq); @@ -1440,13 +1356,6 @@ static void destroy_raw_packet_qp_rq(struct mlx5_ib_dev *dev, mlx5_core_destroy_rq_tracked(dev, &rq->base.mqp); } -static bool tunnel_offload_supported(struct mlx5_core_dev *dev) -{ - return (MLX5_CAP_ETH(dev, tunnel_stateless_vxlan) || - MLX5_CAP_ETH(dev, tunnel_stateless_gre) || - MLX5_CAP_ETH(dev, tunnel_stateless_geneve_rx)); -} - static void destroy_raw_packet_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_rq *rq, u32 qp_flags_en, @@ -1524,6 +1433,8 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, u16 uid = to_mpd(pd)->uid; u32 out[MLX5_ST_SZ_DW(create_tir_out)] = {}; + if (!qp->sq.wqe_cnt && !qp->rq.wqe_cnt) + return -EINVAL; if (qp->sq.wqe_cnt) { err = create_raw_packet_qp_tis(dev, qp, sq, tdn, pd); if (err) @@ -1547,9 +1458,9 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, if (qp->rq.wqe_cnt) { rq->base.container_mibqp = qp; - if (qp->flags & MLX5_IB_QP_CVLAN_STRIPPING) + if (qp->flags & IB_QP_CREATE_CVLAN_STRIPPING) rq->flags |= MLX5_IB_RQ_CVLAN_STRIPPING; - if (qp->flags & MLX5_IB_QP_PCI_WRITE_END_PADDING) + if (qp->flags & IB_QP_CREATE_PCI_WRITE_END_PADDING) rq->flags |= MLX5_IB_RQ_PCI_WRITE_END_PADDING; err = create_raw_packet_qp_rq(dev, rq, in, inlen, pd); if (err) @@ -1584,14 +1495,8 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, qp->trans_qp.base.mqp.qpn = qp->sq.wqe_cnt ? sq->base.mqp.qpn : rq->base.mqp.qpn; - err = ib_copy_to_udata(udata, resp, min(udata->outlen, sizeof(*resp))); - if (err) - goto err_destroy_tir; - return 0; -err_destroy_tir: - destroy_raw_packet_qp_tir(dev, rq, qp->flags_en, pd); err_destroy_rq: destroy_raw_packet_qp_rq(dev, rq); err_destroy_sq: @@ -1643,14 +1548,26 @@ static void destroy_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *q to_mpd(qp->ibqp.pd)->uid); } -static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, - struct ib_pd *pd, - struct ib_qp_init_attr *init_attr, - struct ib_udata *udata) +struct mlx5_create_qp_params { + struct ib_udata *udata; + size_t inlen; + size_t outlen; + void *ucmd; + u8 is_rss_raw : 1; + struct ib_qp_init_attr *attr; + u32 uidx; + struct mlx5_ib_create_qp_resp resp; +}; + +static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct ib_pd *pd, + struct mlx5_ib_qp *qp, + struct mlx5_create_qp_params *params) { + struct ib_qp_init_attr *init_attr = params->attr; + struct mlx5_ib_create_qp_rss *ucmd = params->ucmd; + struct ib_udata *udata = params->udata; struct mlx5_ib_ucontext *mucontext = rdma_udata_to_drv_context( udata, struct mlx5_ib_ucontext, ibucontext); - struct mlx5_ib_create_qp_resp resp = {}; int inlen; int outlen; int err; @@ -1660,79 +1577,28 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, void *hfso; u32 selected_fields = 0; u32 outer_l4; - size_t min_resp_len; u32 tdn = mucontext->tdn; - struct mlx5_ib_create_qp_rss ucmd = {}; - size_t required_cmd_sz; u8 lb_flag = 0; - if (init_attr->qp_type != IB_QPT_RAW_PACKET) - return -EOPNOTSUPP; - - if (init_attr->create_flags || init_attr->send_cq) - return -EINVAL; - - min_resp_len = offsetof(typeof(resp), bfreg_index) + sizeof(resp.bfreg_index); - if (udata->outlen < min_resp_len) - return -EINVAL; - - required_cmd_sz = offsetof(typeof(ucmd), flags) + sizeof(ucmd.flags); - if (udata->inlen < required_cmd_sz) { - mlx5_ib_dbg(dev, "invalid inlen\n"); - return -EINVAL; - } - - if (udata->inlen > sizeof(ucmd) && - !ib_is_udata_cleared(udata, sizeof(ucmd), - udata->inlen - sizeof(ucmd))) { - mlx5_ib_dbg(dev, "inlen is not supported\n"); - return -EOPNOTSUPP; - } - - if (ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen))) { - mlx5_ib_dbg(dev, "copy failed\n"); - return -EFAULT; - } - - if (ucmd.comp_mask) { + if (ucmd->comp_mask) { mlx5_ib_dbg(dev, "invalid comp mask\n"); return -EOPNOTSUPP; } - if (ucmd.flags & ~(MLX5_QP_FLAG_TUNNEL_OFFLOADS | - MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC | - MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC)) { - mlx5_ib_dbg(dev, "invalid flags\n"); - return -EOPNOTSUPP; - } - - if (ucmd.flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS && - !tunnel_offload_supported(dev->mdev)) { - mlx5_ib_dbg(dev, "tunnel offloads isn't supported\n"); - return -EOPNOTSUPP; - } - - if (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_INNER && - !(ucmd.flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS)) { + if (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_INNER && + !(ucmd->flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS)) { mlx5_ib_dbg(dev, "Tunnel offloads must be set for inner RSS\n"); return -EOPNOTSUPP; } - if (ucmd.flags & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC || dev->is_rep) { - lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST; + if (dev->is_rep) qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC; - } - if (ucmd.flags & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC) { - lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST; - qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC; - } + if (qp->flags_en & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC) + lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST; - err = ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp))); - if (err) { - mlx5_ib_dbg(dev, "copy failed\n"); - return -EINVAL; - } + if (qp->flags_en & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC) + lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST; inlen = MLX5_ST_SZ_BYTES(create_tir_in); outlen = MLX5_ST_SZ_BYTES(create_tir_out); @@ -1751,29 +1617,29 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); - if (ucmd.flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS) + if (ucmd->flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS) MLX5_SET(tirc, tirc, tunneled_offload_en, 1); MLX5_SET(tirc, tirc, self_lb_block, lb_flag); - if (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_INNER) + if (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_INNER) hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_inner); else hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); - switch (ucmd.rx_hash_function) { + switch (ucmd->rx_hash_function) { case MLX5_RX_HASH_FUNC_TOEPLITZ: { void *rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key); size_t len = MLX5_FLD_SZ_BYTES(tirc, rx_hash_toeplitz_key); - if (len != ucmd.rx_key_len) { + if (len != ucmd->rx_key_len) { err = -EINVAL; goto err; } MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ); - memcpy(rss_key, ucmd.rx_hash_key, len); + memcpy(rss_key, ucmd->rx_hash_key, len); break; } default: @@ -1781,7 +1647,7 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, goto err; } - if (!ucmd.rx_hash_fields_mask) { + if (!ucmd->rx_hash_fields_mask) { /* special case when this TIR serves as steering entry without hashing */ if (!init_attr->rwq_ind_tbl->log_ind_tbl_size) goto create_tir; @@ -1789,29 +1655,31 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, goto err; } - if (((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) || - (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4)) && - ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6) || - (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6))) { + if (((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) || + (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4)) && + ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6) || + (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6))) { err = -EINVAL; goto err; } /* If none of IPV4 & IPV6 SRC/DST was set - this bit field is ignored */ - if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) || - (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4)) + if ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) || + (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4)) MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4); - else if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6) || - (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6)) + else if ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6) || + (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6)) MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, MLX5_L3_PROT_TYPE_IPV6); - outer_l4 = ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) || - (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP)) << 0 | - ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP) || - (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP)) << 1 | - (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_IPSEC_SPI) << 2; + outer_l4 = ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) || + (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP)) + << 0 | + ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP) || + (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP)) + << 1 | + (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_IPSEC_SPI) << 2; /* Check that only one l4 protocol is set */ if (outer_l4 & (outer_l4 - 1)) { @@ -1820,32 +1688,32 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, } /* If none of TCP & UDP SRC/DST was set - this bit field is ignored */ - if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) || - (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP)) + if ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) || + (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP)) MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, MLX5_L4_PROT_TYPE_TCP); - else if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP) || - (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP)) + else if ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP) || + (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP)) MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, MLX5_L4_PROT_TYPE_UDP); - if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) || - (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6)) + if ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) || + (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6)) selected_fields |= MLX5_HASH_FIELD_SEL_SRC_IP; - if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4) || - (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6)) + if ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4) || + (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6)) selected_fields |= MLX5_HASH_FIELD_SEL_DST_IP; - if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) || - (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP)) + if ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) || + (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP)) selected_fields |= MLX5_HASH_FIELD_SEL_L4_SPORT; - if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP) || - (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP)) + if ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP) || + (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP)) selected_fields |= MLX5_HASH_FIELD_SEL_L4_DPORT; - if (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_IPSEC_SPI) + if (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_IPSEC_SPI) selected_fields |= MLX5_HASH_FIELD_SEL_IPSEC_SPI; MLX5_SET(rx_hash_field_select, hfso, selected_fields, selected_fields); @@ -1867,73 +1735,43 @@ create_tir: goto err; if (mucontext->devx_uid) { - resp.comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_TIRN; - resp.tirn = qp->rss_qp.tirn; + params->resp.comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_TIRN; + params->resp.tirn = qp->rss_qp.tirn; if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner)) { - resp.tir_icm_addr = + params->resp.tir_icm_addr = MLX5_GET(create_tir_out, out, icm_address_31_0); - resp.tir_icm_addr |= (u64)MLX5_GET(create_tir_out, out, - icm_address_39_32) - << 32; - resp.tir_icm_addr |= (u64)MLX5_GET(create_tir_out, out, - icm_address_63_40) - << 40; - resp.comp_mask |= + params->resp.tir_icm_addr |= + (u64)MLX5_GET(create_tir_out, out, + icm_address_39_32) + << 32; + params->resp.tir_icm_addr |= + (u64)MLX5_GET(create_tir_out, out, + icm_address_63_40) + << 40; + params->resp.comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_TIR_ICM_ADDR; } } - err = ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp))); - if (err) - goto err_copy; - kvfree(in); /* qpn is reserved for that QP */ qp->trans_qp.base.mqp.qpn = 0; - qp->flags |= MLX5_IB_QP_RSS; + qp->is_rss = true; return 0; -err_copy: - mlx5_cmd_destroy_tir(dev->mdev, qp->rss_qp.tirn, mucontext->devx_uid); err: kvfree(in); return err; } -static void configure_responder_scat_cqe(struct ib_qp_init_attr *init_attr, - void *qpc) -{ - int rcqe_sz; - - if (init_attr->qp_type == MLX5_IB_QPT_DCI) - return; - - rcqe_sz = mlx5_ib_get_cqe_size(init_attr->recv_cq); - - if (init_attr->qp_type == MLX5_IB_QPT_DCT) { - if (rcqe_sz == 128) - MLX5_SET(dctc, qpc, cs_res, MLX5_RES_SCAT_DATA64_CQE); - - return; - } - - MLX5_SET(qpc, qpc, cs_res, - rcqe_sz == 128 ? MLX5_RES_SCAT_DATA64_CQE : - MLX5_RES_SCAT_DATA32_CQE); -} - static void configure_requester_scat_cqe(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *init_attr, struct mlx5_ib_create_qp *ucmd, void *qpc) { - enum ib_qp_type qpt = init_attr->qp_type; int scqe_sz; bool allow_scat_cqe = false; - if (qpt == IB_QPT_UC || qpt == IB_QPT_UD) - return; - if (ucmd) allow_scat_cqe = ucmd->flags & MLX5_QP_FLAG_ALLOW_SCATTER_CQE; @@ -1998,234 +1836,137 @@ static int get_atomic_mode(struct mlx5_ib_dev *dev, return atomic_mode; } -static inline bool check_flags_mask(uint64_t input, uint64_t supported) -{ - return (input & ~supported) == 0; -} - -static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, - struct ib_qp_init_attr *init_attr, - struct ib_udata *udata, struct mlx5_ib_qp *qp) +static int create_xrc_tgt_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, + struct mlx5_create_qp_params *params) { + struct ib_qp_init_attr *attr = params->attr; + u32 uidx = params->uidx; struct mlx5_ib_resources *devr = &dev->devr; int inlen = MLX5_ST_SZ_BYTES(create_qp_in); struct mlx5_core_dev *mdev = dev->mdev; - struct mlx5_ib_create_qp_resp resp = {}; - struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context( - udata, struct mlx5_ib_ucontext, ibucontext); - struct mlx5_ib_cq *send_cq; - struct mlx5_ib_cq *recv_cq; - unsigned long flags; - u32 uidx = MLX5_IB_DEFAULT_UIDX; - struct mlx5_ib_create_qp ucmd; struct mlx5_ib_qp_base *base; - int mlx5_st; + unsigned long flags; void *qpc; u32 *in; int err; mutex_init(&qp->mutex); - spin_lock_init(&qp->sq.lock); - spin_lock_init(&qp->rq.lock); - mlx5_st = to_mlx5_st(init_attr->qp_type); - if (mlx5_st < 0) - return -EINVAL; + if (attr->sq_sig_type == IB_SIGNAL_ALL_WR) + qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE; - if (init_attr->rwq_ind_tbl) { - if (!udata) - return -ENOSYS; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; - err = create_rss_raw_qp_tir(dev, qp, pd, init_attr, udata); - return err; - } + qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); - if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) { - if (!MLX5_CAP_GEN(mdev, block_lb_mc)) { - mlx5_ib_dbg(dev, "block multicast loopback isn't supported\n"); - return -EINVAL; - } else { - qp->flags |= MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK; - } - } + MLX5_SET(qpc, qpc, st, MLX5_QP_ST_XRC); + MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); + MLX5_SET(qpc, qpc, pd, to_mpd(devr->p0)->pdn); - if (init_attr->create_flags & - (IB_QP_CREATE_CROSS_CHANNEL | - IB_QP_CREATE_MANAGED_SEND | - IB_QP_CREATE_MANAGED_RECV)) { - if (!MLX5_CAP_GEN(mdev, cd)) { - mlx5_ib_dbg(dev, "cross-channel isn't supported\n"); - return -EINVAL; - } - if (init_attr->create_flags & IB_QP_CREATE_CROSS_CHANNEL) - qp->flags |= MLX5_IB_QP_CROSS_CHANNEL; - if (init_attr->create_flags & IB_QP_CREATE_MANAGED_SEND) - qp->flags |= MLX5_IB_QP_MANAGED_SEND; - if (init_attr->create_flags & IB_QP_CREATE_MANAGED_RECV) - qp->flags |= MLX5_IB_QP_MANAGED_RECV; - } - - if (init_attr->qp_type == IB_QPT_UD && - (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)) - if (!MLX5_CAP_GEN(mdev, ipoib_basic_offloads)) { - mlx5_ib_dbg(dev, "ipoib UD lso qp isn't supported\n"); - return -EOPNOTSUPP; - } + if (qp->flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) + MLX5_SET(qpc, qpc, block_lb_mc, 1); + if (qp->flags & IB_QP_CREATE_CROSS_CHANNEL) + MLX5_SET(qpc, qpc, cd_master, 1); + if (qp->flags & IB_QP_CREATE_MANAGED_SEND) + MLX5_SET(qpc, qpc, cd_slave_send, 1); + if (qp->flags & IB_QP_CREATE_MANAGED_RECV) + MLX5_SET(qpc, qpc, cd_slave_receive, 1); - if (init_attr->create_flags & IB_QP_CREATE_SCATTER_FCS) { - if (init_attr->qp_type != IB_QPT_RAW_PACKET) { - mlx5_ib_dbg(dev, "Scatter FCS is supported only for Raw Packet QPs"); - return -EOPNOTSUPP; - } - if (!MLX5_CAP_GEN(dev->mdev, eth_net_offloads) || - !MLX5_CAP_ETH(dev->mdev, scatter_fcs)) { - mlx5_ib_dbg(dev, "Scatter FCS isn't supported\n"); - return -EOPNOTSUPP; - } - qp->flags |= MLX5_IB_QP_CAP_SCATTER_FCS; - } + MLX5_SET(qpc, qpc, rq_type, MLX5_SRQ_RQ); + MLX5_SET(qpc, qpc, no_sq, 1); + MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(devr->c0)->mcq.cqn); + MLX5_SET(qpc, qpc, cqn_snd, to_mcq(devr->c0)->mcq.cqn); + MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(devr->s0)->msrq.srqn); + MLX5_SET(qpc, qpc, xrcd, to_mxrcd(attr->xrcd)->xrcdn); + MLX5_SET64(qpc, qpc, dbr_addr, qp->db.dma); - if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) - qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE; + /* 0xffffff means we ask to work with cqe version 0 */ + if (MLX5_CAP_GEN(mdev, cqe_version) == MLX5_CQE_VERSION_V1) + MLX5_SET(qpc, qpc, user_index, uidx); - if (init_attr->create_flags & IB_QP_CREATE_CVLAN_STRIPPING) { - if (!(MLX5_CAP_GEN(dev->mdev, eth_net_offloads) && - MLX5_CAP_ETH(dev->mdev, vlan_cap)) || - (init_attr->qp_type != IB_QPT_RAW_PACKET)) - return -EOPNOTSUPP; - qp->flags |= MLX5_IB_QP_CVLAN_STRIPPING; + if (qp->flags & IB_QP_CREATE_PCI_WRITE_END_PADDING) { + MLX5_SET(qpc, qpc, end_padding_mode, + MLX5_WQ_END_PAD_MODE_ALIGN); + /* Special case to clean flag */ + qp->flags &= ~IB_QP_CREATE_PCI_WRITE_END_PADDING; } - if (udata) { - if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { - mlx5_ib_dbg(dev, "copy failed\n"); - return -EFAULT; - } + base = &qp->trans_qp.base; + err = mlx5_core_create_qp(dev, &base->mqp, in, inlen); + kvfree(in); + if (err) + return err; - if (!check_flags_mask(ucmd.flags, - MLX5_QP_FLAG_ALLOW_SCATTER_CQE | - MLX5_QP_FLAG_BFREG_INDEX | - MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE | - MLX5_QP_FLAG_SCATTER_CQE | - MLX5_QP_FLAG_SIGNATURE | - MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC | - MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC | - MLX5_QP_FLAG_TUNNEL_OFFLOADS | - MLX5_QP_FLAG_UAR_PAGE_INDEX | - MLX5_QP_FLAG_TYPE_DCI | - MLX5_QP_FLAG_TYPE_DCT)) - return -EINVAL; + base->container_mibqp = qp; + base->mqp.event = mlx5_ib_qp_event; - err = get_qp_user_index(ucontext, &ucmd, udata->inlen, &uidx); - if (err) - return err; + spin_lock_irqsave(&dev->reset_flow_resource_lock, flags); + list_add_tail(&qp->qps_list, &dev->qp_list); + spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags); - qp->wq_sig = !!(ucmd.flags & MLX5_QP_FLAG_SIGNATURE); - if (MLX5_CAP_GEN(dev->mdev, sctr_data_cqe)) - qp->scat_cqe = !!(ucmd.flags & MLX5_QP_FLAG_SCATTER_CQE); - if (ucmd.flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS) { - if (init_attr->qp_type != IB_QPT_RAW_PACKET || - !tunnel_offload_supported(mdev)) { - mlx5_ib_dbg(dev, "Tunnel offload isn't supported\n"); - return -EOPNOTSUPP; - } - qp->flags_en |= MLX5_QP_FLAG_TUNNEL_OFFLOADS; - } + qp->trans_qp.xrcdn = to_mxrcd(attr->xrcd)->xrcdn; + return 0; +} - if (ucmd.flags & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC) { - if (init_attr->qp_type != IB_QPT_RAW_PACKET) { - mlx5_ib_dbg(dev, "Self-LB UC isn't supported\n"); - return -EOPNOTSUPP; - } - qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC; - } +static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, + struct mlx5_ib_qp *qp, + struct mlx5_create_qp_params *params) +{ + struct ib_qp_init_attr *init_attr = params->attr; + struct mlx5_ib_create_qp *ucmd = params->ucmd; + struct ib_udata *udata = params->udata; + u32 uidx = params->uidx; + struct mlx5_ib_resources *devr = &dev->devr; + int inlen = MLX5_ST_SZ_BYTES(create_qp_in); + struct mlx5_core_dev *mdev = dev->mdev; + struct mlx5_ib_cq *send_cq; + struct mlx5_ib_cq *recv_cq; + unsigned long flags; + struct mlx5_ib_qp_base *base; + int mlx5_st; + void *qpc; + u32 *in; + int err; - if (ucmd.flags & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC) { - if (init_attr->qp_type != IB_QPT_RAW_PACKET) { - mlx5_ib_dbg(dev, "Self-LB UM isn't supported\n"); - return -EOPNOTSUPP; - } - qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC; - } + mutex_init(&qp->mutex); + spin_lock_init(&qp->sq.lock); + spin_lock_init(&qp->rq.lock); - if (ucmd.flags & MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE) { - if (init_attr->qp_type != IB_QPT_RC || - !MLX5_CAP_GEN(dev->mdev, qp_packet_based)) { - mlx5_ib_dbg(dev, "packet based credit mode isn't supported\n"); - return -EOPNOTSUPP; - } - qp->flags |= MLX5_IB_QP_PACKET_BASED_CREDIT; - } + mlx5_st = to_mlx5_st(qp->type); + if (mlx5_st < 0) + return -EINVAL; - if (init_attr->create_flags & IB_QP_CREATE_SOURCE_QPN) { - if (init_attr->qp_type != IB_QPT_UD || - (MLX5_CAP_GEN(dev->mdev, port_type) != - MLX5_CAP_PORT_TYPE_IB) || - !mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS)) { - mlx5_ib_dbg(dev, "Source QP option isn't supported\n"); - return -EOPNOTSUPP; - } + if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) + qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE; - qp->flags |= MLX5_IB_QP_UNDERLAY; - qp->underlay_qpn = init_attr->source_qpn; - } - } else { - qp->wq_sig = !!wq_signature; - } + if (qp->flags & IB_QP_CREATE_SOURCE_QPN) + qp->underlay_qpn = init_attr->source_qpn; base = (init_attr->qp_type == IB_QPT_RAW_PACKET || - qp->flags & MLX5_IB_QP_UNDERLAY) ? + qp->flags & IB_QP_CREATE_SOURCE_QPN) ? &qp->raw_packet_qp.rq.base : &qp->trans_qp.base; qp->has_rq = qp_has_rq(init_attr); - err = set_rq_size(dev, &init_attr->cap, qp->has_rq, - qp, udata ? &ucmd : NULL); + err = set_rq_size(dev, &init_attr->cap, qp->has_rq, qp, ucmd); if (err) { mlx5_ib_dbg(dev, "err %d\n", err); return err; } - if (pd) { - if (udata) { - __u32 max_wqes = - 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz); - mlx5_ib_dbg(dev, "requested sq_wqe_count (%d)\n", ucmd.sq_wqe_count); - if (ucmd.rq_wqe_shift != qp->rq.wqe_shift || - ucmd.rq_wqe_count != qp->rq.wqe_cnt) { - mlx5_ib_dbg(dev, "invalid rq params\n"); - return -EINVAL; - } - if (ucmd.sq_wqe_count > max_wqes) { - mlx5_ib_dbg(dev, "requested sq_wqe_count (%d) > max allowed (%d)\n", - ucmd.sq_wqe_count, max_wqes); - return -EINVAL; - } - if (init_attr->create_flags & - MLX5_IB_QP_CREATE_SQPN_QP1) { - mlx5_ib_dbg(dev, "user-space is not allowed to create UD QPs spoofing as QP1\n"); - return -EINVAL; - } - err = create_user_qp(dev, pd, qp, udata, init_attr, &in, - &resp, &inlen, base); - if (err) - mlx5_ib_dbg(dev, "err %d\n", err); - } else { - err = create_kernel_qp(dev, init_attr, qp, &in, &inlen, - base); - if (err) - mlx5_ib_dbg(dev, "err %d\n", err); - } + if (ucmd->rq_wqe_shift != qp->rq.wqe_shift || + ucmd->rq_wqe_count != qp->rq.wqe_cnt) + return -EINVAL; - if (err) - return err; - } else { - in = kvzalloc(inlen, GFP_KERNEL); - if (!in) - return -ENOMEM; + if (ucmd->sq_wqe_count > (1 << MLX5_CAP_GEN(mdev, log_max_qp_sz))) + return -EINVAL; - qp->create_type = MLX5_QP_EMPTY; - } + err = _create_user_qp(dev, pd, qp, udata, init_attr, &in, ¶ms->resp, + &inlen, base, ucmd); + if (err) + return err; if (is_sqp(init_attr->qp_type)) qp->port = init_attr->port_num; @@ -2234,33 +1975,34 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, MLX5_SET(qpc, qpc, st, mlx5_st); MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); + MLX5_SET(qpc, qpc, pd, to_mpd(pd)->pdn); - if (init_attr->qp_type != MLX5_IB_QPT_REG_UMR) - MLX5_SET(qpc, qpc, pd, to_mpd(pd ? pd : devr->p0)->pdn); - else - MLX5_SET(qpc, qpc, latency_sensitive, 1); - - - if (qp->wq_sig) + if (qp->flags_en & MLX5_QP_FLAG_SIGNATURE) MLX5_SET(qpc, qpc, wq_signature, 1); - if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK) + if (qp->flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) MLX5_SET(qpc, qpc, block_lb_mc, 1); - if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL) + if (qp->flags & IB_QP_CREATE_CROSS_CHANNEL) MLX5_SET(qpc, qpc, cd_master, 1); - if (qp->flags & MLX5_IB_QP_MANAGED_SEND) + if (qp->flags & IB_QP_CREATE_MANAGED_SEND) MLX5_SET(qpc, qpc, cd_slave_send, 1); - if (qp->flags & MLX5_IB_QP_MANAGED_RECV) + if (qp->flags & IB_QP_CREATE_MANAGED_RECV) MLX5_SET(qpc, qpc, cd_slave_receive, 1); - if (qp->flags & MLX5_IB_QP_PACKET_BASED_CREDIT) + if (qp->flags_en & MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE) MLX5_SET(qpc, qpc, req_e2e_credit_mode, 1); - if (qp->scat_cqe && is_connected(init_attr->qp_type)) { - configure_responder_scat_cqe(init_attr, qpc); - configure_requester_scat_cqe(dev, init_attr, - udata ? &ucmd : NULL, - qpc); + if ((qp->flags_en & MLX5_QP_FLAG_SCATTER_CQE) && + (init_attr->qp_type == IB_QPT_RC || + init_attr->qp_type == IB_QPT_UC)) { + int rcqe_sz = mlx5_ib_get_cqe_size(init_attr->recv_cq); + + MLX5_SET(qpc, qpc, cs_res, + rcqe_sz == 128 ? MLX5_RES_SCAT_DATA64_CQE : + MLX5_RES_SCAT_DATA32_CQE); } + if ((qp->flags_en & MLX5_QP_FLAG_SCATTER_CQE) && + (qp->type == MLX5_IB_QPT_DCI || qp->type == IB_QPT_RC)) + configure_requester_scat_cqe(dev, init_attr, ucmd, qpc); if (qp->rq.wqe_cnt) { MLX5_SET(qpc, qpc, log_rq_stride, qp->rq.wqe_shift - 4); @@ -2281,12 +2023,6 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, /* Set default resources */ switch (init_attr->qp_type) { - case IB_QPT_XRC_TGT: - MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(devr->c0)->mcq.cqn); - MLX5_SET(qpc, qpc, cqn_snd, to_mcq(devr->c0)->mcq.cqn); - MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(devr->s0)->msrq.srqn); - MLX5_SET(qpc, qpc, xrcd, to_mxrcd(init_attr->xrcd)->xrcdn); - break; case IB_QPT_XRC_INI: MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(devr->c0)->mcq.cqn); MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x1)->xrcdn); @@ -2314,52 +2050,160 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, if (MLX5_CAP_GEN(mdev, cqe_version) == MLX5_CQE_VERSION_V1) MLX5_SET(qpc, qpc, user_index, uidx); - /* we use IB_QP_CREATE_IPOIB_UD_LSO to indicates ipoib qp */ - if (init_attr->qp_type == IB_QPT_UD && - (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)) { - MLX5_SET(qpc, qpc, ulp_stateless_offload_mode, 1); - qp->flags |= MLX5_IB_QP_LSO; - } - - if (init_attr->create_flags & IB_QP_CREATE_PCI_WRITE_END_PADDING) { - if (!MLX5_CAP_GEN(dev->mdev, end_pad)) { - mlx5_ib_dbg(dev, "scatter end padding is not supported\n"); - err = -EOPNOTSUPP; - goto err; - } else if (init_attr->qp_type != IB_QPT_RAW_PACKET) { - MLX5_SET(qpc, qpc, end_padding_mode, - MLX5_WQ_END_PAD_MODE_ALIGN); - } else { - qp->flags |= MLX5_IB_QP_PCI_WRITE_END_PADDING; - } - } - - if (inlen < 0) { - err = -EINVAL; - goto err; + if (qp->flags & IB_QP_CREATE_PCI_WRITE_END_PADDING && + init_attr->qp_type != IB_QPT_RAW_PACKET) { + MLX5_SET(qpc, qpc, end_padding_mode, + MLX5_WQ_END_PAD_MODE_ALIGN); + /* Special case to clean flag */ + qp->flags &= ~IB_QP_CREATE_PCI_WRITE_END_PADDING; } if (init_attr->qp_type == IB_QPT_RAW_PACKET || - qp->flags & MLX5_IB_QP_UNDERLAY) { - qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd.sq_buf_addr; + qp->flags & IB_QP_CREATE_SOURCE_QPN) { + qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd->sq_buf_addr; raw_packet_qp_copy_info(qp, &qp->raw_packet_qp); err = create_raw_packet_qp(dev, qp, in, inlen, pd, udata, - &resp); - } else { + ¶ms->resp); + } else err = mlx5_core_create_qp(dev, &base->mqp, in, inlen); - } - if (err) { - mlx5_ib_dbg(dev, "create qp failed\n"); + kvfree(in); + if (err) goto err_create; + + base->container_mibqp = qp; + base->mqp.event = mlx5_ib_qp_event; + + get_cqs(qp->type, init_attr->send_cq, init_attr->recv_cq, + &send_cq, &recv_cq); + spin_lock_irqsave(&dev->reset_flow_resource_lock, flags); + mlx5_ib_lock_cqs(send_cq, recv_cq); + /* Maintain device to QPs access, needed for further handling via reset + * flow + */ + list_add_tail(&qp->qps_list, &dev->qp_list); + /* Maintain CQ to QPs access, needed for further handling via reset flow + */ + if (send_cq) + list_add_tail(&qp->cq_send_list, &send_cq->list_send_qp); + if (recv_cq) + list_add_tail(&qp->cq_recv_list, &recv_cq->list_recv_qp); + mlx5_ib_unlock_cqs(send_cq, recv_cq); + spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags); + + return 0; + +err_create: + destroy_qp(dev, qp, base, udata); + return err; +} + +static int create_kernel_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, + struct mlx5_ib_qp *qp, + struct mlx5_create_qp_params *params) +{ + struct ib_qp_init_attr *attr = params->attr; + u32 uidx = params->uidx; + struct mlx5_ib_resources *devr = &dev->devr; + int inlen = MLX5_ST_SZ_BYTES(create_qp_in); + struct mlx5_core_dev *mdev = dev->mdev; + struct mlx5_ib_cq *send_cq; + struct mlx5_ib_cq *recv_cq; + unsigned long flags; + struct mlx5_ib_qp_base *base; + int mlx5_st; + void *qpc; + u32 *in; + int err; + + mutex_init(&qp->mutex); + spin_lock_init(&qp->sq.lock); + spin_lock_init(&qp->rq.lock); + + mlx5_st = to_mlx5_st(qp->type); + if (mlx5_st < 0) + return -EINVAL; + + if (attr->sq_sig_type == IB_SIGNAL_ALL_WR) + qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE; + + base = &qp->trans_qp.base; + + qp->has_rq = qp_has_rq(attr); + err = set_rq_size(dev, &attr->cap, qp->has_rq, qp, NULL); + if (err) { + mlx5_ib_dbg(dev, "err %d\n", err); + return err; } + err = _create_kernel_qp(dev, attr, qp, &in, &inlen, base); + if (err) + return err; + + if (is_sqp(attr->qp_type)) + qp->port = attr->port_num; + + qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); + + MLX5_SET(qpc, qpc, st, mlx5_st); + MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); + + if (attr->qp_type != MLX5_IB_QPT_REG_UMR) + MLX5_SET(qpc, qpc, pd, to_mpd(pd ? pd : devr->p0)->pdn); + else + MLX5_SET(qpc, qpc, latency_sensitive, 1); + + + if (qp->flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) + MLX5_SET(qpc, qpc, block_lb_mc, 1); + + if (qp->rq.wqe_cnt) { + MLX5_SET(qpc, qpc, log_rq_stride, qp->rq.wqe_shift - 4); + MLX5_SET(qpc, qpc, log_rq_size, ilog2(qp->rq.wqe_cnt)); + } + + MLX5_SET(qpc, qpc, rq_type, get_rx_type(qp, attr)); + + if (qp->sq.wqe_cnt) + MLX5_SET(qpc, qpc, log_sq_size, ilog2(qp->sq.wqe_cnt)); + else + MLX5_SET(qpc, qpc, no_sq, 1); + + if (attr->srq) { + MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x0)->xrcdn); + MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, + to_msrq(attr->srq)->msrq.srqn); + } else { + MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x1)->xrcdn); + MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, + to_msrq(devr->s1)->msrq.srqn); + } + + if (attr->send_cq) + MLX5_SET(qpc, qpc, cqn_snd, to_mcq(attr->send_cq)->mcq.cqn); + + if (attr->recv_cq) + MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(attr->recv_cq)->mcq.cqn); + + MLX5_SET64(qpc, qpc, dbr_addr, qp->db.dma); + + /* 0xffffff means we ask to work with cqe version 0 */ + if (MLX5_CAP_GEN(mdev, cqe_version) == MLX5_CQE_VERSION_V1) + MLX5_SET(qpc, qpc, user_index, uidx); + + /* we use IB_QP_CREATE_IPOIB_UD_LSO to indicates ipoib qp */ + if (qp->flags & IB_QP_CREATE_IPOIB_UD_LSO) + MLX5_SET(qpc, qpc, ulp_stateless_offload_mode, 1); + + err = mlx5_core_create_qp(dev, &base->mqp, in, inlen); kvfree(in); + if (err) + goto err_create; base->container_mibqp = qp; base->mqp.event = mlx5_ib_qp_event; - get_cqs(init_attr->qp_type, init_attr->send_cq, init_attr->recv_cq, + get_cqs(qp->type, attr->send_cq, attr->recv_cq, &send_cq, &recv_cq); spin_lock_irqsave(&dev->reset_flow_resource_lock, flags); mlx5_ib_lock_cqs(send_cq, recv_cq); @@ -2379,13 +2223,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, return 0; err_create: - if (qp->create_type == MLX5_QP_USER) - destroy_qp_user(dev, pd, qp, base, udata); - else if (qp->create_type == MLX5_QP_KERNEL) - destroy_qp_kernel(dev, qp); - -err: - kvfree(in); + destroy_qp(dev, qp, base, NULL); return err; } @@ -2447,11 +2285,6 @@ static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *re } } -static struct mlx5_ib_pd *get_pd(struct mlx5_ib_qp *qp) -{ - return to_mpd(qp->ibqp.pd); -} - static void get_cqs(enum ib_qp_type qp_type, struct ib_cq *ib_send_cq, struct ib_cq *ib_recv_cq, struct mlx5_ib_cq **send_cq, struct mlx5_ib_cq **recv_cq) @@ -2472,14 +2305,10 @@ static void get_cqs(enum ib_qp_type qp_type, case IB_QPT_RC: case IB_QPT_UC: case IB_QPT_UD: - case IB_QPT_RAW_IPV6: - case IB_QPT_RAW_ETHERTYPE: case IB_QPT_RAW_PACKET: *send_cq = ib_send_cq ? to_mcq(ib_send_cq) : NULL; *recv_cq = ib_recv_cq ? to_mcq(ib_recv_cq) : NULL; break; - - case IB_QPT_MAX: default: *send_cq = NULL; *recv_cq = NULL; @@ -2505,13 +2334,13 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, } base = (qp->ibqp.qp_type == IB_QPT_RAW_PACKET || - qp->flags & MLX5_IB_QP_UNDERLAY) ? + qp->flags & IB_QP_CREATE_SOURCE_QPN) ? &qp->raw_packet_qp.rq.base : &qp->trans_qp.base; if (qp->state != IB_QPS_RESET) { if (qp->ibqp.qp_type != IB_QPT_RAW_PACKET && - !(qp->flags & MLX5_IB_QP_UNDERLAY)) { + !(qp->flags & IB_QP_CREATE_SOURCE_QPN)) { err = mlx5_core_qp_modify(dev, MLX5_CMD_OP_2RST_QP, 0, NULL, &base->mqp); } else { @@ -2539,7 +2368,7 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, if (recv_cq) list_del(&qp->cq_recv_list); - if (qp->create_type == MLX5_QP_KERNEL) { + if (!udata) { __mlx5_ib_cq_clean(recv_cq, base->mqp.qpn, qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL); if (send_cq != recv_cq) @@ -2550,7 +2379,7 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags); if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET || - qp->flags & MLX5_IB_QP_UNDERLAY) { + qp->flags & IB_QP_CREATE_SOURCE_QPN) { destroy_raw_packet_qp(dev, qp); } else { err = mlx5_core_destroy_qp(dev, &base->mqp); @@ -2559,254 +2388,446 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, base->mqp.qpn); } - if (qp->create_type == MLX5_QP_KERNEL) - destroy_qp_kernel(dev, qp); - else if (qp->create_type == MLX5_QP_USER) - destroy_qp_user(dev, &get_pd(qp)->ibpd, qp, base, udata); + destroy_qp(dev, qp, base, udata); } -static const char *ib_qp_type_str(enum ib_qp_type type) +static int create_dct(struct ib_pd *pd, struct mlx5_ib_qp *qp, + struct mlx5_create_qp_params *params) { - switch (type) { - case IB_QPT_SMI: - return "IB_QPT_SMI"; - case IB_QPT_GSI: - return "IB_QPT_GSI"; + struct ib_qp_init_attr *attr = params->attr; + struct mlx5_ib_create_qp *ucmd = params->ucmd; + u32 uidx = params->uidx; + void *dctc; + + qp->dct.in = kzalloc(MLX5_ST_SZ_BYTES(create_dct_in), GFP_KERNEL); + if (!qp->dct.in) + return -ENOMEM; + + MLX5_SET(create_dct_in, qp->dct.in, uid, to_mpd(pd)->uid); + dctc = MLX5_ADDR_OF(create_dct_in, qp->dct.in, dct_context_entry); + MLX5_SET(dctc, dctc, pd, to_mpd(pd)->pdn); + MLX5_SET(dctc, dctc, srqn_xrqn, to_msrq(attr->srq)->msrq.srqn); + MLX5_SET(dctc, dctc, cqn, to_mcq(attr->recv_cq)->mcq.cqn); + MLX5_SET64(dctc, dctc, dc_access_key, ucmd->access_key); + MLX5_SET(dctc, dctc, user_index, uidx); + + if (qp->flags_en & MLX5_QP_FLAG_SCATTER_CQE) { + int rcqe_sz = mlx5_ib_get_cqe_size(attr->recv_cq); + + if (rcqe_sz == 128) + MLX5_SET(dctc, dctc, cs_res, MLX5_RES_SCAT_DATA64_CQE); + } + + qp->state = IB_QPS_RESET; + + return 0; +} + +static int check_qp_type(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, + enum ib_qp_type *type) +{ + if (attr->qp_type == IB_QPT_DRIVER && !MLX5_CAP_GEN(dev->mdev, dct)) + goto out; + + switch (attr->qp_type) { + case IB_QPT_XRC_TGT: + case IB_QPT_XRC_INI: + if (!MLX5_CAP_GEN(dev->mdev, xrc)) + goto out; + fallthrough; case IB_QPT_RC: - return "IB_QPT_RC"; case IB_QPT_UC: - return "IB_QPT_UC"; - case IB_QPT_UD: - return "IB_QPT_UD"; - case IB_QPT_RAW_IPV6: - return "IB_QPT_RAW_IPV6"; - case IB_QPT_RAW_ETHERTYPE: - return "IB_QPT_RAW_ETHERTYPE"; - case IB_QPT_XRC_INI: - return "IB_QPT_XRC_INI"; - case IB_QPT_XRC_TGT: - return "IB_QPT_XRC_TGT"; + case IB_QPT_SMI: + case MLX5_IB_QPT_HW_GSI: + case IB_QPT_DRIVER: + case IB_QPT_GSI: + if (dev->profile == &raw_eth_profile) + goto out; case IB_QPT_RAW_PACKET: - return "IB_QPT_RAW_PACKET"; + case IB_QPT_UD: case MLX5_IB_QPT_REG_UMR: - return "MLX5_IB_QPT_REG_UMR"; - case IB_QPT_DRIVER: - return "IB_QPT_DRIVER"; - case IB_QPT_MAX: + break; default: - return "Invalid QP type"; + goto out; } + + *type = attr->qp_type; + return 0; + +out: + mlx5_ib_dbg(dev, "Unsupported QP type %d\n", attr->qp_type); + return -EOPNOTSUPP; } -static struct ib_qp *mlx5_ib_create_dct(struct ib_pd *pd, - struct ib_qp_init_attr *attr, - struct mlx5_ib_create_qp *ucmd, - struct ib_udata *udata) +static int check_valid_flow(struct mlx5_ib_dev *dev, struct ib_pd *pd, + struct ib_qp_init_attr *attr, + struct ib_udata *udata) { struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context( udata, struct mlx5_ib_ucontext, ibucontext); - struct mlx5_ib_qp *qp; - int err = 0; - u32 uidx = MLX5_IB_DEFAULT_UIDX; - void *dctc; - if (!attr->srq || !attr->recv_cq) - return ERR_PTR(-EINVAL); + if (!udata) { + /* Kernel create_qp callers */ + if (attr->rwq_ind_tbl) + return -EOPNOTSUPP; - err = get_qp_user_index(ucontext, ucmd, sizeof(*ucmd), &uidx); - if (err) - return ERR_PTR(err); + switch (attr->qp_type) { + case IB_QPT_RAW_PACKET: + case IB_QPT_DRIVER: + return -EOPNOTSUPP; + default: + return 0; + } + } - qp = kzalloc(sizeof(*qp), GFP_KERNEL); - if (!qp) - return ERR_PTR(-ENOMEM); + /* Userspace create_qp callers */ + if (attr->qp_type == IB_QPT_RAW_PACKET && !ucontext->cqe_version) { + mlx5_ib_dbg(dev, + "Raw Packet QP is only supported for CQE version > 0\n"); + return -EINVAL; + } - qp->dct.in = kzalloc(MLX5_ST_SZ_BYTES(create_dct_in), GFP_KERNEL); - if (!qp->dct.in) { - err = -ENOMEM; - goto err_free; + if (attr->qp_type != IB_QPT_RAW_PACKET && attr->rwq_ind_tbl) { + mlx5_ib_dbg(dev, + "Wrong QP type %d for the RWQ indirect table\n", + attr->qp_type); + return -EINVAL; } - MLX5_SET(create_dct_in, qp->dct.in, uid, to_mpd(pd)->uid); - dctc = MLX5_ADDR_OF(create_dct_in, qp->dct.in, dct_context_entry); - qp->qp_sub_type = MLX5_IB_QPT_DCT; - MLX5_SET(dctc, dctc, pd, to_mpd(pd)->pdn); - MLX5_SET(dctc, dctc, srqn_xrqn, to_msrq(attr->srq)->msrq.srqn); - MLX5_SET(dctc, dctc, cqn, to_mcq(attr->recv_cq)->mcq.cqn); - MLX5_SET64(dctc, dctc, dc_access_key, ucmd->access_key); - MLX5_SET(dctc, dctc, user_index, uidx); + switch (attr->qp_type) { + case IB_QPT_SMI: + case MLX5_IB_QPT_HW_GSI: + case MLX5_IB_QPT_REG_UMR: + case IB_QPT_GSI: + mlx5_ib_dbg(dev, "Kernel doesn't support QP type %d\n", + attr->qp_type); + return -EINVAL; + default: + break; + } - if (ucmd->flags & MLX5_QP_FLAG_SCATTER_CQE) - configure_responder_scat_cqe(attr, dctc); + /* + * We don't need to see this warning, it means that kernel code + * missing ib_pd. Placed here to catch developer's mistakes. + */ + WARN_ONCE(!pd && attr->qp_type != IB_QPT_XRC_TGT, + "There is a missing PD pointer assignment\n"); + return 0; +} - qp->state = IB_QPS_RESET; +static void process_vendor_flag(struct mlx5_ib_dev *dev, int *flags, int flag, + bool cond, struct mlx5_ib_qp *qp) +{ + if (!(*flags & flag)) + return; - return &qp->ibqp; -err_free: - kfree(qp); - return ERR_PTR(err); + if (cond) { + qp->flags_en |= flag; + *flags &= ~flag; + return; + } + + if (flag == MLX5_QP_FLAG_SCATTER_CQE) { + /* + * We don't return error if this flag was provided, + * and mlx5 doesn't have right capability. + */ + *flags &= ~MLX5_QP_FLAG_SCATTER_CQE; + return; + } + mlx5_ib_dbg(dev, "Vendor create QP flag 0x%X is not supported\n", flag); } -static int set_mlx_qp_type(struct mlx5_ib_dev *dev, - struct ib_qp_init_attr *init_attr, - struct mlx5_ib_create_qp *ucmd, - struct ib_udata *udata) +static int process_vendor_flags(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, + void *ucmd, struct ib_qp_init_attr *attr) { - enum { MLX_QP_FLAGS = MLX5_QP_FLAG_TYPE_DCT | MLX5_QP_FLAG_TYPE_DCI }; - int err; + struct mlx5_core_dev *mdev = dev->mdev; + bool cond; + int flags; - if (!udata) + if (attr->rwq_ind_tbl) + flags = ((struct mlx5_ib_create_qp_rss *)ucmd)->flags; + else + flags = ((struct mlx5_ib_create_qp *)ucmd)->flags; + + switch (flags & (MLX5_QP_FLAG_TYPE_DCT | MLX5_QP_FLAG_TYPE_DCI)) { + case MLX5_QP_FLAG_TYPE_DCI: + qp->type = MLX5_IB_QPT_DCI; + break; + case MLX5_QP_FLAG_TYPE_DCT: + qp->type = MLX5_IB_QPT_DCT; + break; + default: + if (qp->type != IB_QPT_DRIVER) + break; + /* + * It is IB_QPT_DRIVER and or no subtype or + * wrong subtype were provided. + */ return -EINVAL; + } - if (udata->inlen < sizeof(*ucmd)) { - mlx5_ib_dbg(dev, "create_qp user command is smaller than expected\n"); + process_vendor_flag(dev, &flags, MLX5_QP_FLAG_TYPE_DCI, true, qp); + process_vendor_flag(dev, &flags, MLX5_QP_FLAG_TYPE_DCT, true, qp); + + process_vendor_flag(dev, &flags, MLX5_QP_FLAG_SIGNATURE, true, qp); + process_vendor_flag(dev, &flags, MLX5_QP_FLAG_SCATTER_CQE, + MLX5_CAP_GEN(mdev, sctr_data_cqe), qp); + + if (qp->type == IB_QPT_RAW_PACKET) { + cond = MLX5_CAP_ETH(mdev, tunnel_stateless_vxlan) || + MLX5_CAP_ETH(mdev, tunnel_stateless_gre) || + MLX5_CAP_ETH(mdev, tunnel_stateless_geneve_rx); + process_vendor_flag(dev, &flags, MLX5_QP_FLAG_TUNNEL_OFFLOADS, + cond, qp); + process_vendor_flag(dev, &flags, + MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC, true, + qp); + process_vendor_flag(dev, &flags, + MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC, true, + qp); + } + + if (qp->type == IB_QPT_RC) + process_vendor_flag(dev, &flags, + MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE, + MLX5_CAP_GEN(mdev, qp_packet_based), qp); + + process_vendor_flag(dev, &flags, MLX5_QP_FLAG_BFREG_INDEX, true, qp); + process_vendor_flag(dev, &flags, MLX5_QP_FLAG_UAR_PAGE_INDEX, true, qp); + + cond = qp->flags_en & ~(MLX5_QP_FLAG_TUNNEL_OFFLOADS | + MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC | + MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC); + if (attr->rwq_ind_tbl && cond) { + mlx5_ib_dbg(dev, "RSS RAW QP has unsupported flags 0x%X\n", + cond); return -EINVAL; } - err = ib_copy_from_udata(ucmd, udata, sizeof(*ucmd)); - if (err) - return err; - if ((ucmd->flags & MLX_QP_FLAGS) == MLX5_QP_FLAG_TYPE_DCI) { - init_attr->qp_type = MLX5_IB_QPT_DCI; - } else { - if ((ucmd->flags & MLX_QP_FLAGS) == MLX5_QP_FLAG_TYPE_DCT) { - init_attr->qp_type = MLX5_IB_QPT_DCT; - } else { - mlx5_ib_dbg(dev, "Invalid QP flags\n"); - return -EINVAL; - } + if (flags) + mlx5_ib_dbg(dev, "udata has unsupported flags 0x%X\n", flags); + + return (flags) ? -EINVAL : 0; } - if (!MLX5_CAP_GEN(dev->mdev, dct)) { - mlx5_ib_dbg(dev, "DC transport is not supported\n"); - return -EOPNOTSUPP; +static void process_create_flag(struct mlx5_ib_dev *dev, int *flags, int flag, + bool cond, struct mlx5_ib_qp *qp) +{ + if (!(*flags & flag)) + return; + + if (cond) { + qp->flags |= flag; + *flags &= ~flag; + return; } - return 0; + if (flag == MLX5_IB_QP_CREATE_WC_TEST) { + /* + * Special case, if condition didn't meet, it won't be error, + * just different in-kernel flow. + */ + *flags &= ~MLX5_IB_QP_CREATE_WC_TEST; + return; + } + mlx5_ib_dbg(dev, "Verbs create QP flag 0x%X is not supported\n", flag); } -struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, - struct ib_qp_init_attr *verbs_init_attr, - struct ib_udata *udata) +static int process_create_flags(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, + struct ib_qp_init_attr *attr) { - struct mlx5_ib_dev *dev; - struct mlx5_ib_qp *qp; - u16 xrcdn = 0; - int err; - struct ib_qp_init_attr mlx_init_attr; - struct ib_qp_init_attr *init_attr = verbs_init_attr; - struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context( - udata, struct mlx5_ib_ucontext, ibucontext); + enum ib_qp_type qp_type = qp->type; + struct mlx5_core_dev *mdev = dev->mdev; + int create_flags = attr->create_flags; + bool cond; - if (pd) { - dev = to_mdev(pd->device); + if (qp->type == IB_QPT_UD && dev->profile == &raw_eth_profile) + if (create_flags & ~MLX5_IB_QP_CREATE_WC_TEST) + return -EINVAL; - if (init_attr->qp_type == IB_QPT_RAW_PACKET) { - if (!ucontext) { - mlx5_ib_dbg(dev, "Raw Packet QP is not supported for kernel consumers\n"); - return ERR_PTR(-EINVAL); - } else if (!ucontext->cqe_version) { - mlx5_ib_dbg(dev, "Raw Packet QP is only supported for CQE version > 0\n"); - return ERR_PTR(-EINVAL); - } - } - } else { - /* being cautious here */ - if (init_attr->qp_type != IB_QPT_XRC_TGT && - init_attr->qp_type != MLX5_IB_QPT_REG_UMR) { - pr_warn("%s: no PD for transport %s\n", __func__, - ib_qp_type_str(init_attr->qp_type)); - return ERR_PTR(-EINVAL); - } - dev = to_mdev(to_mxrcd(init_attr->xrcd)->ibxrcd.device); + if (qp_type == MLX5_IB_QPT_DCT) + return (create_flags) ? -EINVAL : 0; + + if (qp_type == IB_QPT_RAW_PACKET && attr->rwq_ind_tbl) + return (create_flags) ? -EINVAL : 0; + + process_create_flag(dev, &create_flags, + IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK, + MLX5_CAP_GEN(mdev, block_lb_mc), qp); + process_create_flag(dev, &create_flags, IB_QP_CREATE_CROSS_CHANNEL, + MLX5_CAP_GEN(mdev, cd), qp); + process_create_flag(dev, &create_flags, IB_QP_CREATE_MANAGED_SEND, + MLX5_CAP_GEN(mdev, cd), qp); + process_create_flag(dev, &create_flags, IB_QP_CREATE_MANAGED_RECV, + MLX5_CAP_GEN(mdev, cd), qp); + + if (qp_type == IB_QPT_UD) { + process_create_flag(dev, &create_flags, + IB_QP_CREATE_IPOIB_UD_LSO, + MLX5_CAP_GEN(mdev, ipoib_basic_offloads), + qp); + cond = MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_IB; + process_create_flag(dev, &create_flags, IB_QP_CREATE_SOURCE_QPN, + cond, qp); + } + + if (qp_type == IB_QPT_RAW_PACKET) { + cond = MLX5_CAP_GEN(mdev, eth_net_offloads) && + MLX5_CAP_ETH(mdev, scatter_fcs); + process_create_flag(dev, &create_flags, + IB_QP_CREATE_SCATTER_FCS, cond, qp); + + cond = MLX5_CAP_GEN(mdev, eth_net_offloads) && + MLX5_CAP_ETH(mdev, vlan_cap); + process_create_flag(dev, &create_flags, + IB_QP_CREATE_CVLAN_STRIPPING, cond, qp); + } + + process_create_flag(dev, &create_flags, + IB_QP_CREATE_PCI_WRITE_END_PADDING, + MLX5_CAP_GEN(mdev, end_pad), qp); + + process_create_flag(dev, &create_flags, MLX5_IB_QP_CREATE_WC_TEST, + qp_type != MLX5_IB_QPT_REG_UMR, qp); + process_create_flag(dev, &create_flags, MLX5_IB_QP_CREATE_SQPN_QP1, + true, qp); + + if (create_flags) + mlx5_ib_dbg(dev, "Create QP has unsupported flags 0x%X\n", + create_flags); + + return (create_flags) ? -EINVAL : 0; +} + +static int process_udata_size(struct mlx5_ib_dev *dev, + struct mlx5_create_qp_params *params) +{ + size_t ucmd = sizeof(struct mlx5_ib_create_qp); + struct ib_qp_init_attr *attr = params->attr; + struct ib_udata *udata = params->udata; + size_t outlen = udata->outlen; + size_t inlen = udata->inlen; + + params->outlen = min(outlen, sizeof(struct mlx5_ib_create_qp_resp)); + if (attr->qp_type == IB_QPT_DRIVER) { + params->inlen = (inlen < ucmd) ? 0 : ucmd; + goto out; } - if (init_attr->qp_type == IB_QPT_DRIVER) { - struct mlx5_ib_create_qp ucmd; + if (!params->is_rss_raw) { + params->inlen = ucmd; + goto out; + } - init_attr = &mlx_init_attr; - memcpy(init_attr, verbs_init_attr, sizeof(*verbs_init_attr)); - err = set_mlx_qp_type(dev, init_attr, &ucmd, udata); - if (err) - return ERR_PTR(err); + /* RSS RAW QP */ + if (inlen < offsetofend(struct mlx5_ib_create_qp_rss, flags)) + return -EINVAL; - if (init_attr->qp_type == MLX5_IB_QPT_DCI) { - if (init_attr->cap.max_recv_wr || - init_attr->cap.max_recv_sge) { - mlx5_ib_dbg(dev, "DCI QP requires zero size receive queue\n"); - return ERR_PTR(-EINVAL); - } - } else { - return mlx5_ib_create_dct(pd, init_attr, &ucmd, udata); - } + if (outlen < offsetofend(struct mlx5_ib_create_qp_resp, bfreg_index)) + return -EINVAL; + + ucmd = sizeof(struct mlx5_ib_create_qp_rss); + if (inlen > ucmd && !ib_is_udata_cleared(udata, ucmd, inlen - ucmd)) + return -EINVAL; + + params->inlen = min(ucmd, inlen); +out: + if (!params->inlen) + mlx5_ib_dbg(dev, "udata is too small or not cleared\n"); + + return (params->inlen) ? 0 : -EINVAL; +} + +static int create_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, + struct mlx5_ib_qp *qp, + struct mlx5_create_qp_params *params) +{ + int err; + + if (params->is_rss_raw) { + err = create_rss_raw_qp_tir(dev, pd, qp, params); + goto out; } - switch (init_attr->qp_type) { - case IB_QPT_XRC_TGT: - case IB_QPT_XRC_INI: - if (!MLX5_CAP_GEN(dev->mdev, xrc)) { - mlx5_ib_dbg(dev, "XRC not supported\n"); - return ERR_PTR(-ENOSYS); - } - init_attr->recv_cq = NULL; - if (init_attr->qp_type == IB_QPT_XRC_TGT) { - xrcdn = to_mxrcd(init_attr->xrcd)->xrcdn; - init_attr->send_cq = NULL; - } + if (qp->type == MLX5_IB_QPT_DCT) { + err = create_dct(pd, qp, params); + goto out; + } - /* fall through */ - case IB_QPT_RAW_PACKET: - case IB_QPT_RC: - case IB_QPT_UC: - case IB_QPT_UD: - case IB_QPT_SMI: - case MLX5_IB_QPT_HW_GSI: - case MLX5_IB_QPT_REG_UMR: - case MLX5_IB_QPT_DCI: - qp = kzalloc(sizeof(*qp), GFP_KERNEL); - if (!qp) - return ERR_PTR(-ENOMEM); + if (qp->type == IB_QPT_XRC_TGT) { + err = create_xrc_tgt_qp(dev, qp, params); + goto out; + } - err = create_qp_common(dev, pd, init_attr, udata, qp); - if (err) { - mlx5_ib_dbg(dev, "create_qp_common failed\n"); - kfree(qp); - return ERR_PTR(err); - } + if (params->udata) + err = create_user_qp(dev, pd, qp, params); + else + err = create_kernel_qp(dev, pd, qp, params); - if (is_qp0(init_attr->qp_type)) - qp->ibqp.qp_num = 0; - else if (is_qp1(init_attr->qp_type)) - qp->ibqp.qp_num = 1; - else - qp->ibqp.qp_num = qp->trans_qp.base.mqp.qpn; +out: + if (err) { + mlx5_ib_err(dev, "Create QP type %d failed\n", qp->type); + return err; + } - mlx5_ib_dbg(dev, "ib qpnum 0x%x, mlx qpn 0x%x, rcqn 0x%x, scqn 0x%x\n", - qp->ibqp.qp_num, qp->trans_qp.base.mqp.qpn, - init_attr->recv_cq ? to_mcq(init_attr->recv_cq)->mcq.cqn : -1, - init_attr->send_cq ? to_mcq(init_attr->send_cq)->mcq.cqn : -1); + if (is_qp0(qp->type)) + qp->ibqp.qp_num = 0; + else if (is_qp1(qp->type)) + qp->ibqp.qp_num = 1; + else + qp->ibqp.qp_num = qp->trans_qp.base.mqp.qpn; - qp->trans_qp.xrcdn = xrcdn; + mlx5_ib_dbg(dev, + "QP type %d, ib qpn 0x%X, mlx qpn 0x%x, rcqn 0x%x, scqn 0x%x\n", + qp->type, qp->ibqp.qp_num, qp->trans_qp.base.mqp.qpn, + params->attr->recv_cq ? to_mcq(params->attr->recv_cq)->mcq.cqn : + -1, + params->attr->send_cq ? to_mcq(params->attr->send_cq)->mcq.cqn : + -1); - break; + return 0; +} - case IB_QPT_GSI: - return mlx5_ib_gsi_create_qp(pd, init_attr); +static int check_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, + struct ib_qp_init_attr *attr) +{ + int ret = 0; - case IB_QPT_RAW_IPV6: - case IB_QPT_RAW_ETHERTYPE: - case IB_QPT_MAX: + switch (qp->type) { + case MLX5_IB_QPT_DCT: + ret = (!attr->srq || !attr->recv_cq) ? -EINVAL : 0; + break; + case MLX5_IB_QPT_DCI: + ret = (attr->cap.max_recv_wr || attr->cap.max_recv_sge) ? + -EINVAL : + 0; + break; + case IB_QPT_RAW_PACKET: + ret = (attr->rwq_ind_tbl && attr->send_cq) ? -EINVAL : 0; + break; default: - mlx5_ib_dbg(dev, "unsupported qp type %d\n", - init_attr->qp_type); - /* Don't support raw QPs */ - return ERR_PTR(-EOPNOTSUPP); + break; } - if (verbs_init_attr->qp_type == IB_QPT_DRIVER) - qp->qp_sub_type = init_attr->qp_type; + if (ret) + mlx5_ib_dbg(dev, "QP type %d has wrong attributes\n", qp->type); - return &qp->ibqp; + return ret; +} + +static int get_qp_uidx(struct mlx5_ib_qp *qp, + struct mlx5_create_qp_params *params) +{ + struct mlx5_ib_create_qp *ucmd = params->ucmd; + struct ib_udata *udata = params->udata; + struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context( + udata, struct mlx5_ib_ucontext, ibucontext); + + if (params->is_rss_raw) + return 0; + + return get_qp_user_index(ucontext, ucmd, sizeof(*ucmd), ¶ms->uidx); } static int mlx5_ib_destroy_dct(struct mlx5_ib_qp *mqp) @@ -2828,6 +2849,104 @@ static int mlx5_ib_destroy_dct(struct mlx5_ib_qp *mqp) return 0; } +struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attr, + struct ib_udata *udata) +{ + struct mlx5_create_qp_params params = {}; + struct mlx5_ib_dev *dev; + struct mlx5_ib_qp *qp; + enum ib_qp_type type; + int err; + + dev = pd ? to_mdev(pd->device) : + to_mdev(to_mxrcd(attr->xrcd)->ibxrcd.device); + + err = check_qp_type(dev, attr, &type); + if (err) + return ERR_PTR(err); + + err = check_valid_flow(dev, pd, attr, udata); + if (err) + return ERR_PTR(err); + + if (attr->qp_type == IB_QPT_GSI) + return mlx5_ib_gsi_create_qp(pd, attr); + + params.udata = udata; + params.uidx = MLX5_IB_DEFAULT_UIDX; + params.attr = attr; + params.is_rss_raw = !!attr->rwq_ind_tbl; + + if (udata) { + err = process_udata_size(dev, ¶ms); + if (err) + return ERR_PTR(err); + + params.ucmd = kzalloc(params.inlen, GFP_KERNEL); + if (!params.ucmd) + return ERR_PTR(-ENOMEM); + + err = ib_copy_from_udata(params.ucmd, udata, params.inlen); + if (err) + goto free_ucmd; + } + + qp = kzalloc(sizeof(*qp), GFP_KERNEL); + if (!qp) { + err = -ENOMEM; + goto free_ucmd; + } + + qp->type = type; + if (udata) { + err = process_vendor_flags(dev, qp, params.ucmd, attr); + if (err) + goto free_qp; + + err = get_qp_uidx(qp, ¶ms); + if (err) + goto free_qp; + } + err = process_create_flags(dev, qp, attr); + if (err) + goto free_qp; + + err = check_qp_attr(dev, qp, attr); + if (err) + goto free_qp; + + err = create_qp(dev, pd, qp, ¶ms); + if (err) + goto free_qp; + + kfree(params.ucmd); + params.ucmd = NULL; + + if (udata) + /* + * It is safe to copy response for all user create QP flows, + * including MLX5_IB_QPT_DCT, which doesn't need it. + * In that case, resp will be filled with zeros. + */ + err = ib_copy_to_udata(udata, ¶ms.resp, params.outlen); + if (err) + goto destroy_qp; + + return &qp->ibqp; + +destroy_qp: + if (qp->type == MLX5_IB_QPT_DCT) + mlx5_ib_destroy_dct(qp); + else + destroy_qp_common(dev, qp, udata); + qp = NULL; +free_qp: + kfree(qp); +free_ucmd: + kfree(params.ucmd); + return ERR_PTR(err); +} + int mlx5_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(qp->device); @@ -2836,7 +2955,7 @@ int mlx5_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata) if (unlikely(qp->qp_type == IB_QPT_GSI)) return mlx5_ib_gsi_destroy_qp(qp); - if (mqp->qp_sub_type == MLX5_IB_QPT_DCT) + if (mqp->type == MLX5_IB_QPT_DCT) return mlx5_ib_destroy_dct(mqp); destroy_qp_common(dev, mqp, udata); @@ -2965,6 +3084,21 @@ static int modify_raw_packet_tx_affinity(struct mlx5_core_dev *dev, return err; } +static void mlx5_set_path_udp_sport(struct mlx5_qp_path *path, + const struct rdma_ah_attr *ah, + u32 lqpn, u32 rqpn) + +{ + u32 fl = ah->grh.flow_label; + u16 sport; + + if (!fl) + fl = rdma_calc_flow_label(lqpn, rqpn); + + sport = rdma_flow_label_to_udp_sport(fl); + path->udp_sport = cpu_to_be16(sport); +} + static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, const struct rdma_ah_attr *ah, struct mlx5_qp_path *path, u8 port, int attr_mask, @@ -2996,12 +3130,15 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, return -EINVAL; memcpy(path->rmac, ah->roce.dmac, sizeof(ah->roce.dmac)); - if (qp->ibqp.qp_type == IB_QPT_RC || - qp->ibqp.qp_type == IB_QPT_UC || - qp->ibqp.qp_type == IB_QPT_XRC_INI || - qp->ibqp.qp_type == IB_QPT_XRC_TGT) - path->udp_sport = - mlx5_get_roce_udp_sport(dev, ah->grh.sgid_attr); + if ((qp->ibqp.qp_type == IB_QPT_RC || + qp->ibqp.qp_type == IB_QPT_UC || + qp->ibqp.qp_type == IB_QPT_XRC_INI || + qp->ibqp.qp_type == IB_QPT_XRC_TGT) && + (grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) && + (attr_mask & IB_QP_DEST_QPN)) + mlx5_set_path_udp_sport(path, ah, + qp->ibqp.qp_num, + attr->dest_qp_num); path->dci_cfi_prio_sl = (sl & 0x7) << 4; gid_type = ah->grh.sgid_attr->gid_type; if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) @@ -3050,10 +3187,12 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q MLX5_QP_OPTPAR_RAE | MLX5_QP_OPTPAR_RWE | MLX5_QP_OPTPAR_PKEY_INDEX | - MLX5_QP_OPTPAR_PRI_PORT, + MLX5_QP_OPTPAR_PRI_PORT | + MLX5_QP_OPTPAR_LAG_TX_AFF, [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE | MLX5_QP_OPTPAR_PKEY_INDEX | - MLX5_QP_OPTPAR_PRI_PORT, + MLX5_QP_OPTPAR_PRI_PORT | + MLX5_QP_OPTPAR_LAG_TX_AFF, [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_PKEY_INDEX | MLX5_QP_OPTPAR_Q_KEY | MLX5_QP_OPTPAR_PRI_PORT, @@ -3061,17 +3200,20 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q MLX5_QP_OPTPAR_RAE | MLX5_QP_OPTPAR_RWE | MLX5_QP_OPTPAR_PKEY_INDEX | - MLX5_QP_OPTPAR_PRI_PORT, + MLX5_QP_OPTPAR_PRI_PORT | + MLX5_QP_OPTPAR_LAG_TX_AFF, }, [MLX5_QP_STATE_RTR] = { [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH | MLX5_QP_OPTPAR_RRE | MLX5_QP_OPTPAR_RAE | MLX5_QP_OPTPAR_RWE | - MLX5_QP_OPTPAR_PKEY_INDEX, + MLX5_QP_OPTPAR_PKEY_INDEX | + MLX5_QP_OPTPAR_LAG_TX_AFF, [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH | MLX5_QP_OPTPAR_RWE | - MLX5_QP_OPTPAR_PKEY_INDEX, + MLX5_QP_OPTPAR_PKEY_INDEX | + MLX5_QP_OPTPAR_LAG_TX_AFF, [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_PKEY_INDEX | MLX5_QP_OPTPAR_Q_KEY, [MLX5_QP_ST_MLX] = MLX5_QP_OPTPAR_PKEY_INDEX | @@ -3080,7 +3222,8 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q MLX5_QP_OPTPAR_RRE | MLX5_QP_OPTPAR_RAE | MLX5_QP_OPTPAR_RWE | - MLX5_QP_OPTPAR_PKEY_INDEX, + MLX5_QP_OPTPAR_PKEY_INDEX | + MLX5_QP_OPTPAR_LAG_TX_AFF, }, }, [MLX5_QP_STATE_RTR] = { @@ -3414,33 +3557,68 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, return 0; } -static unsigned int get_tx_affinity(struct mlx5_ib_dev *dev, - struct mlx5_ib_pd *pd, - struct mlx5_ib_qp_base *qp_base, - u8 port_num, struct ib_udata *udata) +static unsigned int get_tx_affinity_rr(struct mlx5_ib_dev *dev, + struct ib_udata *udata) +{ + struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context( + udata, struct mlx5_ib_ucontext, ibucontext); + u8 port_num = mlx5_core_native_port_num(dev->mdev) - 1; + atomic_t *tx_port_affinity; + + if (ucontext) + tx_port_affinity = &ucontext->tx_port_affinity; + else + tx_port_affinity = &dev->port[port_num].roce.tx_port_affinity; + + return (unsigned int)atomic_add_return(1, tx_port_affinity) % + MLX5_MAX_PORTS + 1; +} + +static bool qp_supports_affinity(struct ib_qp *qp) +{ + if ((qp->qp_type == IB_QPT_RC) || + (qp->qp_type == IB_QPT_UD) || + (qp->qp_type == IB_QPT_UC) || + (qp->qp_type == IB_QPT_RAW_PACKET) || + (qp->qp_type == IB_QPT_XRC_INI) || + (qp->qp_type == IB_QPT_XRC_TGT)) + return true; + return false; +} + +static unsigned int get_tx_affinity(struct ib_qp *qp, + const struct ib_qp_attr *attr, + int attr_mask, u8 init, + struct ib_udata *udata) { struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context( udata, struct mlx5_ib_ucontext, ibucontext); - unsigned int tx_port_affinity; + struct mlx5_ib_dev *dev = to_mdev(qp->device); + struct mlx5_ib_qp *mqp = to_mqp(qp); + struct mlx5_ib_qp_base *qp_base; + unsigned int tx_affinity; + + if (!(dev->lag_active && qp_supports_affinity(qp))) + return 0; - if (ucontext) { - tx_port_affinity = (unsigned int)atomic_add_return( - 1, &ucontext->tx_port_affinity) % - MLX5_MAX_PORTS + - 1; + if (mqp->flags & MLX5_IB_QP_CREATE_SQPN_QP1) + tx_affinity = mqp->gsi_lag_port; + else if (init) + tx_affinity = get_tx_affinity_rr(dev, udata); + else if ((attr_mask & IB_QP_AV) && attr->xmit_slave) + tx_affinity = + mlx5_lag_get_slave_port(dev->mdev, attr->xmit_slave); + else + return 0; + + qp_base = &mqp->trans_qp.base; + if (ucontext) mlx5_ib_dbg(dev, "Set tx affinity 0x%x to qpn 0x%x ucontext %p\n", - tx_port_affinity, qp_base->mqp.qpn, ucontext); - } else { - tx_port_affinity = - (unsigned int)atomic_add_return( - 1, &dev->port[port_num].roce.tx_port_affinity) % - MLX5_MAX_PORTS + - 1; + tx_affinity, qp_base->mqp.qpn, ucontext); + else mlx5_ib_dbg(dev, "Set tx affinity 0x%x to qpn 0x%x\n", - tx_port_affinity, qp_base->mqp.qpn); - } - - return tx_port_affinity; + tx_affinity, qp_base->mqp.qpn); + return tx_affinity; } static int __mlx5_ib_qp_set_counter(struct ib_qp *qp, @@ -3516,15 +3694,14 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, struct mlx5_qp_context *context; struct mlx5_ib_pd *pd; enum mlx5_qp_state mlx5_cur, mlx5_new; - enum mlx5_qp_optpar optpar; + enum mlx5_qp_optpar optpar = 0; u32 set_id = 0; int mlx5_st; int err; u16 op; u8 tx_affinity = 0; - mlx5_st = to_mlx5_st(ibqp->qp_type == IB_QPT_DRIVER ? - qp->qp_sub_type : ibqp->qp_type); + mlx5_st = to_mlx5_st(qp->type); if (mlx5_st < 0) return -EINVAL; @@ -3532,7 +3709,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, if (!context) return -ENOMEM; - pd = get_pd(qp); + pd = to_mpd(qp->ibqp.pd); context->flags = cpu_to_be32(mlx5_st << 16); if (!(attr_mask & IB_QP_PATH_MIG_STATE)) { @@ -3551,27 +3728,20 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, } } - if ((cur_state == IB_QPS_RESET) && (new_state == IB_QPS_INIT)) { - if ((ibqp->qp_type == IB_QPT_RC) || - (ibqp->qp_type == IB_QPT_UD && - !(qp->flags & MLX5_IB_QP_SQPN_QP1)) || - (ibqp->qp_type == IB_QPT_UC) || - (ibqp->qp_type == IB_QPT_RAW_PACKET) || - (ibqp->qp_type == IB_QPT_XRC_INI) || - (ibqp->qp_type == IB_QPT_XRC_TGT)) { - if (dev->lag_active) { - u8 p = mlx5_core_native_port_num(dev->mdev) - 1; - tx_affinity = get_tx_affinity(dev, pd, base, p, - udata); - context->flags |= cpu_to_be32(tx_affinity << 24); - } - } + tx_affinity = get_tx_affinity(ibqp, attr, attr_mask, + cur_state == IB_QPS_RESET && + new_state == IB_QPS_INIT, udata); + if (tx_affinity) { + context->flags |= cpu_to_be32(tx_affinity << 24); + if (new_state == IB_QPS_RTR && + MLX5_CAP_GEN(dev->mdev, init2_lag_tx_port_affinity)) + optpar |= MLX5_QP_OPTPAR_LAG_TX_AFF; } if (is_sqp(ibqp->qp_type)) { context->mtu_msgmax = (IB_MTU_256 << 5) | 8; } else if ((ibqp->qp_type == IB_QPT_UD && - !(qp->flags & MLX5_IB_QP_UNDERLAY)) || + !(qp->flags & IB_QP_CREATE_SOURCE_QPN)) || ibqp->qp_type == MLX5_IB_QPT_REG_UMR) { context->mtu_msgmax = (IB_MTU_4096 << 5) | 12; } else if (attr_mask & IB_QP_PATH_MTU) { @@ -3676,7 +3846,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, qp->port) - 1; /* Underlay port should be used - index 0 function per port */ - if (qp->flags & MLX5_IB_QP_UNDERLAY) + if (qp->flags & IB_QP_CREATE_SOURCE_QPN) port_num = 0; if (ibqp->counter) @@ -3690,7 +3860,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) context->sq_crq_size |= cpu_to_be16(1 << 4); - if (qp->flags & MLX5_IB_QP_SQPN_QP1) + if (qp->flags & MLX5_IB_QP_CREATE_SQPN_QP1) context->deth_sqpn = cpu_to_be32(1); mlx5_cur = to_mlx5_state(cur_state); @@ -3703,11 +3873,11 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, } op = optab[mlx5_cur][mlx5_new]; - optpar = ib_mask_to_mlx5_opt(attr_mask); + optpar |= ib_mask_to_mlx5_opt(attr_mask); optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st]; if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET || - qp->flags & MLX5_IB_QP_UNDERLAY) { + qp->flags & IB_QP_CREATE_SOURCE_QPN) { struct mlx5_modify_raw_qp_param raw_qp_param = {}; raw_qp_param.operation = op; @@ -3985,11 +4155,8 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (unlikely(ibqp->qp_type == IB_QPT_GSI)) return mlx5_ib_gsi_modify_qp(ibqp, attr, attr_mask); - if (ibqp->qp_type == IB_QPT_DRIVER) - qp_type = qp->qp_sub_type; - else - qp_type = (unlikely(ibqp->qp_type == MLX5_IB_QPT_HW_GSI)) ? - IB_QPT_GSI : ibqp->qp_type; + qp_type = (unlikely(ibqp->qp_type == MLX5_IB_QPT_HW_GSI)) ? IB_QPT_GSI : + qp->type; if (qp_type == MLX5_IB_QPT_DCT) return mlx5_ib_modify_dct(ibqp, attr, attr_mask, udata); @@ -4003,7 +4170,7 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; } - if (qp->flags & MLX5_IB_QP_UNDERLAY) { + if (qp->flags & IB_QP_CREATE_SOURCE_QPN) { if (attr_mask & ~(IB_QP_STATE | IB_QP_CUR_STATE)) { mlx5_ib_dbg(dev, "invalid attr_mask 0x%x when underlay QP is used\n", attr_mask); @@ -4070,1432 +4237,6 @@ out: return err; } -static void _handle_post_send_edge(struct mlx5_ib_wq *sq, void **seg, - u32 wqe_sz, void **cur_edge) -{ - u32 idx; - - idx = (sq->cur_post + (wqe_sz >> 2)) & (sq->wqe_cnt - 1); - *cur_edge = get_sq_edge(sq, idx); - - *seg = mlx5_frag_buf_get_wqe(&sq->fbc, idx); -} - -/* handle_post_send_edge - Check if we get to SQ edge. If yes, update to the - * next nearby edge and get new address translation for current WQE position. - * @sq - SQ buffer. - * @seg: Current WQE position (16B aligned). - * @wqe_sz: Total current WQE size [16B]. - * @cur_edge: Updated current edge. - */ -static inline void handle_post_send_edge(struct mlx5_ib_wq *sq, void **seg, - u32 wqe_sz, void **cur_edge) -{ - if (likely(*seg != *cur_edge)) - return; - - _handle_post_send_edge(sq, seg, wqe_sz, cur_edge); -} - -/* memcpy_send_wqe - copy data from src to WQE and update the relevant WQ's - * pointers. At the end @seg is aligned to 16B regardless the copied size. - * @sq - SQ buffer. - * @cur_edge: Updated current edge. - * @seg: Current WQE position (16B aligned). - * @wqe_sz: Total current WQE size [16B]. - * @src: Pointer to copy from. - * @n: Number of bytes to copy. - */ -static inline void memcpy_send_wqe(struct mlx5_ib_wq *sq, void **cur_edge, - void **seg, u32 *wqe_sz, const void *src, - size_t n) -{ - while (likely(n)) { - size_t leftlen = *cur_edge - *seg; - size_t copysz = min_t(size_t, leftlen, n); - size_t stride; - - memcpy(*seg, src, copysz); - - n -= copysz; - src += copysz; - stride = !n ? ALIGN(copysz, 16) : copysz; - *seg += stride; - *wqe_sz += stride >> 4; - handle_post_send_edge(sq, seg, *wqe_sz, cur_edge); - } -} - -static int mlx5_wq_overflow(struct mlx5_ib_wq *wq, int nreq, struct ib_cq *ib_cq) -{ - struct mlx5_ib_cq *cq; - unsigned cur; - - cur = wq->head - wq->tail; - if (likely(cur + nreq < wq->max_post)) - return 0; - - cq = to_mcq(ib_cq); - spin_lock(&cq->lock); - cur = wq->head - wq->tail; - spin_unlock(&cq->lock); - - return cur + nreq >= wq->max_post; -} - -static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg, - u64 remote_addr, u32 rkey) -{ - rseg->raddr = cpu_to_be64(remote_addr); - rseg->rkey = cpu_to_be32(rkey); - rseg->reserved = 0; -} - -static void set_eth_seg(const struct ib_send_wr *wr, struct mlx5_ib_qp *qp, - void **seg, int *size, void **cur_edge) -{ - struct mlx5_wqe_eth_seg *eseg = *seg; - - memset(eseg, 0, sizeof(struct mlx5_wqe_eth_seg)); - - if (wr->send_flags & IB_SEND_IP_CSUM) - eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM | - MLX5_ETH_WQE_L4_CSUM; - - if (wr->opcode == IB_WR_LSO) { - struct ib_ud_wr *ud_wr = container_of(wr, struct ib_ud_wr, wr); - size_t left, copysz; - void *pdata = ud_wr->header; - size_t stride; - - left = ud_wr->hlen; - eseg->mss = cpu_to_be16(ud_wr->mss); - eseg->inline_hdr.sz = cpu_to_be16(left); - - /* memcpy_send_wqe should get a 16B align address. Hence, we - * first copy up to the current edge and then, if needed, - * fall-through to memcpy_send_wqe. - */ - copysz = min_t(u64, *cur_edge - (void *)eseg->inline_hdr.start, - left); - memcpy(eseg->inline_hdr.start, pdata, copysz); - stride = ALIGN(sizeof(struct mlx5_wqe_eth_seg) - - sizeof(eseg->inline_hdr.start) + copysz, 16); - *size += stride / 16; - *seg += stride; - - if (copysz < left) { - handle_post_send_edge(&qp->sq, seg, *size, cur_edge); - left -= copysz; - pdata += copysz; - memcpy_send_wqe(&qp->sq, cur_edge, seg, size, pdata, - left); - } - - return; - } - - *seg += sizeof(struct mlx5_wqe_eth_seg); - *size += sizeof(struct mlx5_wqe_eth_seg) / 16; -} - -static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg, - const struct ib_send_wr *wr) -{ - memcpy(&dseg->av, &to_mah(ud_wr(wr)->ah)->av, sizeof(struct mlx5_av)); - dseg->av.dqp_dct = cpu_to_be32(ud_wr(wr)->remote_qpn | MLX5_EXTENDED_UD_AV); - dseg->av.key.qkey.qkey = cpu_to_be32(ud_wr(wr)->remote_qkey); -} - -static void set_data_ptr_seg(struct mlx5_wqe_data_seg *dseg, struct ib_sge *sg) -{ - dseg->byte_count = cpu_to_be32(sg->length); - dseg->lkey = cpu_to_be32(sg->lkey); - dseg->addr = cpu_to_be64(sg->addr); -} - -static u64 get_xlt_octo(u64 bytes) -{ - return ALIGN(bytes, MLX5_IB_UMR_XLT_ALIGNMENT) / - MLX5_IB_UMR_OCTOWORD; -} - -static __be64 frwr_mkey_mask(bool atomic) -{ - u64 result; - - result = MLX5_MKEY_MASK_LEN | - MLX5_MKEY_MASK_PAGE_SIZE | - MLX5_MKEY_MASK_START_ADDR | - MLX5_MKEY_MASK_EN_RINVAL | - MLX5_MKEY_MASK_KEY | - MLX5_MKEY_MASK_LR | - MLX5_MKEY_MASK_LW | - MLX5_MKEY_MASK_RR | - MLX5_MKEY_MASK_RW | - MLX5_MKEY_MASK_SMALL_FENCE | - MLX5_MKEY_MASK_FREE; - - if (atomic) - result |= MLX5_MKEY_MASK_A; - - return cpu_to_be64(result); -} - -static __be64 sig_mkey_mask(void) -{ - u64 result; - - result = MLX5_MKEY_MASK_LEN | - MLX5_MKEY_MASK_PAGE_SIZE | - MLX5_MKEY_MASK_START_ADDR | - MLX5_MKEY_MASK_EN_SIGERR | - MLX5_MKEY_MASK_EN_RINVAL | - MLX5_MKEY_MASK_KEY | - MLX5_MKEY_MASK_LR | - MLX5_MKEY_MASK_LW | - MLX5_MKEY_MASK_RR | - MLX5_MKEY_MASK_RW | - MLX5_MKEY_MASK_SMALL_FENCE | - MLX5_MKEY_MASK_FREE | - MLX5_MKEY_MASK_BSF_EN; - - return cpu_to_be64(result); -} - -static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr, - struct mlx5_ib_mr *mr, u8 flags, bool atomic) -{ - int size = (mr->ndescs + mr->meta_ndescs) * mr->desc_size; - - memset(umr, 0, sizeof(*umr)); - - umr->flags = flags; - umr->xlt_octowords = cpu_to_be16(get_xlt_octo(size)); - umr->mkey_mask = frwr_mkey_mask(atomic); -} - -static void set_linv_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr) -{ - memset(umr, 0, sizeof(*umr)); - umr->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); - umr->flags = MLX5_UMR_INLINE; -} - -static __be64 get_umr_enable_mr_mask(void) -{ - u64 result; - - result = MLX5_MKEY_MASK_KEY | - MLX5_MKEY_MASK_FREE; - - return cpu_to_be64(result); -} - -static __be64 get_umr_disable_mr_mask(void) -{ - u64 result; - - result = MLX5_MKEY_MASK_FREE; - - return cpu_to_be64(result); -} - -static __be64 get_umr_update_translation_mask(void) -{ - u64 result; - - result = MLX5_MKEY_MASK_LEN | - MLX5_MKEY_MASK_PAGE_SIZE | - MLX5_MKEY_MASK_START_ADDR; - - return cpu_to_be64(result); -} - -static __be64 get_umr_update_access_mask(int atomic) -{ - u64 result; - - result = MLX5_MKEY_MASK_LR | - MLX5_MKEY_MASK_LW | - MLX5_MKEY_MASK_RR | - MLX5_MKEY_MASK_RW; - - if (atomic) - result |= MLX5_MKEY_MASK_A; - - return cpu_to_be64(result); -} - -static __be64 get_umr_update_pd_mask(void) -{ - u64 result; - - result = MLX5_MKEY_MASK_PD; - - return cpu_to_be64(result); -} - -static int umr_check_mkey_mask(struct mlx5_ib_dev *dev, u64 mask) -{ - if ((mask & MLX5_MKEY_MASK_PAGE_SIZE && - MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled)) || - (mask & MLX5_MKEY_MASK_A && - MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled))) - return -EPERM; - return 0; -} - -static int set_reg_umr_segment(struct mlx5_ib_dev *dev, - struct mlx5_wqe_umr_ctrl_seg *umr, - const struct ib_send_wr *wr, int atomic) -{ - const struct mlx5_umr_wr *umrwr = umr_wr(wr); - - memset(umr, 0, sizeof(*umr)); - - if (!umrwr->ignore_free_state) { - if (wr->send_flags & MLX5_IB_SEND_UMR_FAIL_IF_FREE) - /* fail if free */ - umr->flags = MLX5_UMR_CHECK_FREE; - else - /* fail if not free */ - umr->flags = MLX5_UMR_CHECK_NOT_FREE; - } - - umr->xlt_octowords = cpu_to_be16(get_xlt_octo(umrwr->xlt_size)); - if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_XLT) { - u64 offset = get_xlt_octo(umrwr->offset); - - umr->xlt_offset = cpu_to_be16(offset & 0xffff); - umr->xlt_offset_47_16 = cpu_to_be32(offset >> 16); - umr->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN; - } - if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION) - umr->mkey_mask |= get_umr_update_translation_mask(); - if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS) { - umr->mkey_mask |= get_umr_update_access_mask(atomic); - umr->mkey_mask |= get_umr_update_pd_mask(); - } - if (wr->send_flags & MLX5_IB_SEND_UMR_ENABLE_MR) - umr->mkey_mask |= get_umr_enable_mr_mask(); - if (wr->send_flags & MLX5_IB_SEND_UMR_DISABLE_MR) - umr->mkey_mask |= get_umr_disable_mr_mask(); - - if (!wr->num_sge) - umr->flags |= MLX5_UMR_INLINE; - - return umr_check_mkey_mask(dev, be64_to_cpu(umr->mkey_mask)); -} - -static u8 get_umr_flags(int acc) -{ - return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX5_PERM_ATOMIC : 0) | - (acc & IB_ACCESS_REMOTE_WRITE ? MLX5_PERM_REMOTE_WRITE : 0) | - (acc & IB_ACCESS_REMOTE_READ ? MLX5_PERM_REMOTE_READ : 0) | - (acc & IB_ACCESS_LOCAL_WRITE ? MLX5_PERM_LOCAL_WRITE : 0) | - MLX5_PERM_LOCAL_READ | MLX5_PERM_UMR_EN; -} - -static void set_reg_mkey_seg(struct mlx5_mkey_seg *seg, - struct mlx5_ib_mr *mr, - u32 key, int access) -{ - int ndescs = ALIGN(mr->ndescs + mr->meta_ndescs, 8) >> 1; - - memset(seg, 0, sizeof(*seg)); - - if (mr->access_mode == MLX5_MKC_ACCESS_MODE_MTT) - seg->log2_page_size = ilog2(mr->ibmr.page_size); - else if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS) - /* KLMs take twice the size of MTTs */ - ndescs *= 2; - - seg->flags = get_umr_flags(access) | mr->access_mode; - seg->qpn_mkey7_0 = cpu_to_be32((key & 0xff) | 0xffffff00); - seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL); - seg->start_addr = cpu_to_be64(mr->ibmr.iova); - seg->len = cpu_to_be64(mr->ibmr.length); - seg->xlt_oct_size = cpu_to_be32(ndescs); -} - -static void set_linv_mkey_seg(struct mlx5_mkey_seg *seg) -{ - memset(seg, 0, sizeof(*seg)); - seg->status = MLX5_MKEY_STATUS_FREE; -} - -static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, - const struct ib_send_wr *wr) -{ - const struct mlx5_umr_wr *umrwr = umr_wr(wr); - - memset(seg, 0, sizeof(*seg)); - if (wr->send_flags & MLX5_IB_SEND_UMR_DISABLE_MR) - seg->status = MLX5_MKEY_STATUS_FREE; - - seg->flags = convert_access(umrwr->access_flags); - if (umrwr->pd) - seg->flags_pd = cpu_to_be32(to_mpd(umrwr->pd)->pdn); - if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION && - !umrwr->length) - seg->flags_pd |= cpu_to_be32(MLX5_MKEY_LEN64); - - seg->start_addr = cpu_to_be64(umrwr->virt_addr); - seg->len = cpu_to_be64(umrwr->length); - seg->log2_page_size = umrwr->page_shift; - seg->qpn_mkey7_0 = cpu_to_be32(0xffffff00 | - mlx5_mkey_variant(umrwr->mkey)); -} - -static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg, - struct mlx5_ib_mr *mr, - struct mlx5_ib_pd *pd) -{ - int bcount = mr->desc_size * (mr->ndescs + mr->meta_ndescs); - - dseg->addr = cpu_to_be64(mr->desc_map); - dseg->byte_count = cpu_to_be32(ALIGN(bcount, 64)); - dseg->lkey = cpu_to_be32(pd->ibpd.local_dma_lkey); -} - -static __be32 send_ieth(const struct ib_send_wr *wr) -{ - switch (wr->opcode) { - case IB_WR_SEND_WITH_IMM: - case IB_WR_RDMA_WRITE_WITH_IMM: - return wr->ex.imm_data; - - case IB_WR_SEND_WITH_INV: - return cpu_to_be32(wr->ex.invalidate_rkey); - - default: - return 0; - } -} - -static u8 calc_sig(void *wqe, int size) -{ - u8 *p = wqe; - u8 res = 0; - int i; - - for (i = 0; i < size; i++) - res ^= p[i]; - - return ~res; -} - -static u8 wq_sig(void *wqe) -{ - return calc_sig(wqe, (*((u8 *)wqe + 8) & 0x3f) << 4); -} - -static int set_data_inl_seg(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr, - void **wqe, int *wqe_sz, void **cur_edge) -{ - struct mlx5_wqe_inline_seg *seg; - size_t offset; - int inl = 0; - int i; - - seg = *wqe; - *wqe += sizeof(*seg); - offset = sizeof(*seg); - - for (i = 0; i < wr->num_sge; i++) { - size_t len = wr->sg_list[i].length; - void *addr = (void *)(unsigned long)(wr->sg_list[i].addr); - - inl += len; - - if (unlikely(inl > qp->max_inline_data)) - return -ENOMEM; - - while (likely(len)) { - size_t leftlen; - size_t copysz; - - handle_post_send_edge(&qp->sq, wqe, - *wqe_sz + (offset >> 4), - cur_edge); - - leftlen = *cur_edge - *wqe; - copysz = min_t(size_t, leftlen, len); - - memcpy(*wqe, addr, copysz); - len -= copysz; - addr += copysz; - *wqe += copysz; - offset += copysz; - } - } - - seg->byte_count = cpu_to_be32(inl | MLX5_INLINE_SEG); - - *wqe_sz += ALIGN(inl + sizeof(seg->byte_count), 16) / 16; - - return 0; -} - -static u16 prot_field_size(enum ib_signature_type type) -{ - switch (type) { - case IB_SIG_TYPE_T10_DIF: - return MLX5_DIF_SIZE; - default: - return 0; - } -} - -static u8 bs_selector(int block_size) -{ - switch (block_size) { - case 512: return 0x1; - case 520: return 0x2; - case 4096: return 0x3; - case 4160: return 0x4; - case 1073741824: return 0x5; - default: return 0; - } -} - -static void mlx5_fill_inl_bsf(struct ib_sig_domain *domain, - struct mlx5_bsf_inl *inl) -{ - /* Valid inline section and allow BSF refresh */ - inl->vld_refresh = cpu_to_be16(MLX5_BSF_INL_VALID | - MLX5_BSF_REFRESH_DIF); - inl->dif_apptag = cpu_to_be16(domain->sig.dif.app_tag); - inl->dif_reftag = cpu_to_be32(domain->sig.dif.ref_tag); - /* repeating block */ - inl->rp_inv_seed = MLX5_BSF_REPEAT_BLOCK; - inl->sig_type = domain->sig.dif.bg_type == IB_T10DIF_CRC ? - MLX5_DIF_CRC : MLX5_DIF_IPCS; - - if (domain->sig.dif.ref_remap) - inl->dif_inc_ref_guard_check |= MLX5_BSF_INC_REFTAG; - - if (domain->sig.dif.app_escape) { - if (domain->sig.dif.ref_escape) - inl->dif_inc_ref_guard_check |= MLX5_BSF_APPREF_ESCAPE; - else - inl->dif_inc_ref_guard_check |= MLX5_BSF_APPTAG_ESCAPE; - } - - inl->dif_app_bitmask_check = - cpu_to_be16(domain->sig.dif.apptag_check_mask); -} - -static int mlx5_set_bsf(struct ib_mr *sig_mr, - struct ib_sig_attrs *sig_attrs, - struct mlx5_bsf *bsf, u32 data_size) -{ - struct mlx5_core_sig_ctx *msig = to_mmr(sig_mr)->sig; - struct mlx5_bsf_basic *basic = &bsf->basic; - struct ib_sig_domain *mem = &sig_attrs->mem; - struct ib_sig_domain *wire = &sig_attrs->wire; - - memset(bsf, 0, sizeof(*bsf)); - - /* Basic + Extended + Inline */ - basic->bsf_size_sbs = 1 << 7; - /* Input domain check byte mask */ - basic->check_byte_mask = sig_attrs->check_mask; - basic->raw_data_size = cpu_to_be32(data_size); - - /* Memory domain */ - switch (sig_attrs->mem.sig_type) { - case IB_SIG_TYPE_NONE: - break; - case IB_SIG_TYPE_T10_DIF: - basic->mem.bs_selector = bs_selector(mem->sig.dif.pi_interval); - basic->m_bfs_psv = cpu_to_be32(msig->psv_memory.psv_idx); - mlx5_fill_inl_bsf(mem, &bsf->m_inl); - break; - default: - return -EINVAL; - } - - /* Wire domain */ - switch (sig_attrs->wire.sig_type) { - case IB_SIG_TYPE_NONE: - break; - case IB_SIG_TYPE_T10_DIF: - if (mem->sig.dif.pi_interval == wire->sig.dif.pi_interval && - mem->sig_type == wire->sig_type) { - /* Same block structure */ - basic->bsf_size_sbs |= 1 << 4; - if (mem->sig.dif.bg_type == wire->sig.dif.bg_type) - basic->wire.copy_byte_mask |= MLX5_CPY_GRD_MASK; - if (mem->sig.dif.app_tag == wire->sig.dif.app_tag) - basic->wire.copy_byte_mask |= MLX5_CPY_APP_MASK; - if (mem->sig.dif.ref_tag == wire->sig.dif.ref_tag) - basic->wire.copy_byte_mask |= MLX5_CPY_REF_MASK; - } else - basic->wire.bs_selector = bs_selector(wire->sig.dif.pi_interval); - - basic->w_bfs_psv = cpu_to_be32(msig->psv_wire.psv_idx); - mlx5_fill_inl_bsf(wire, &bsf->w_inl); - break; - default: - return -EINVAL; - } - - return 0; -} - -static int set_sig_data_segment(const struct ib_send_wr *send_wr, - struct ib_mr *sig_mr, - struct ib_sig_attrs *sig_attrs, - struct mlx5_ib_qp *qp, void **seg, int *size, - void **cur_edge) -{ - struct mlx5_bsf *bsf; - u32 data_len; - u32 data_key; - u64 data_va; - u32 prot_len = 0; - u32 prot_key = 0; - u64 prot_va = 0; - bool prot = false; - int ret; - int wqe_size; - struct mlx5_ib_mr *mr = to_mmr(sig_mr); - struct mlx5_ib_mr *pi_mr = mr->pi_mr; - - data_len = pi_mr->data_length; - data_key = pi_mr->ibmr.lkey; - data_va = pi_mr->data_iova; - if (pi_mr->meta_ndescs) { - prot_len = pi_mr->meta_length; - prot_key = pi_mr->ibmr.lkey; - prot_va = pi_mr->pi_iova; - prot = true; - } - - if (!prot || (data_key == prot_key && data_va == prot_va && - data_len == prot_len)) { - /** - * Source domain doesn't contain signature information - * or data and protection are interleaved in memory. - * So need construct: - * ------------------ - * | data_klm | - * ------------------ - * | BSF | - * ------------------ - **/ - struct mlx5_klm *data_klm = *seg; - - data_klm->bcount = cpu_to_be32(data_len); - data_klm->key = cpu_to_be32(data_key); - data_klm->va = cpu_to_be64(data_va); - wqe_size = ALIGN(sizeof(*data_klm), 64); - } else { - /** - * Source domain contains signature information - * So need construct a strided block format: - * --------------------------- - * | stride_block_ctrl | - * --------------------------- - * | data_klm | - * --------------------------- - * | prot_klm | - * --------------------------- - * | BSF | - * --------------------------- - **/ - struct mlx5_stride_block_ctrl_seg *sblock_ctrl; - struct mlx5_stride_block_entry *data_sentry; - struct mlx5_stride_block_entry *prot_sentry; - u16 block_size = sig_attrs->mem.sig.dif.pi_interval; - int prot_size; - - sblock_ctrl = *seg; - data_sentry = (void *)sblock_ctrl + sizeof(*sblock_ctrl); - prot_sentry = (void *)data_sentry + sizeof(*data_sentry); - - prot_size = prot_field_size(sig_attrs->mem.sig_type); - if (!prot_size) { - pr_err("Bad block size given: %u\n", block_size); - return -EINVAL; - } - sblock_ctrl->bcount_per_cycle = cpu_to_be32(block_size + - prot_size); - sblock_ctrl->op = cpu_to_be32(MLX5_STRIDE_BLOCK_OP); - sblock_ctrl->repeat_count = cpu_to_be32(data_len / block_size); - sblock_ctrl->num_entries = cpu_to_be16(2); - - data_sentry->bcount = cpu_to_be16(block_size); - data_sentry->key = cpu_to_be32(data_key); - data_sentry->va = cpu_to_be64(data_va); - data_sentry->stride = cpu_to_be16(block_size); - - prot_sentry->bcount = cpu_to_be16(prot_size); - prot_sentry->key = cpu_to_be32(prot_key); - prot_sentry->va = cpu_to_be64(prot_va); - prot_sentry->stride = cpu_to_be16(prot_size); - - wqe_size = ALIGN(sizeof(*sblock_ctrl) + sizeof(*data_sentry) + - sizeof(*prot_sentry), 64); - } - - *seg += wqe_size; - *size += wqe_size / 16; - handle_post_send_edge(&qp->sq, seg, *size, cur_edge); - - bsf = *seg; - ret = mlx5_set_bsf(sig_mr, sig_attrs, bsf, data_len); - if (ret) - return -EINVAL; - - *seg += sizeof(*bsf); - *size += sizeof(*bsf) / 16; - handle_post_send_edge(&qp->sq, seg, *size, cur_edge); - - return 0; -} - -static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg, - struct ib_mr *sig_mr, int access_flags, - u32 size, u32 length, u32 pdn) -{ - u32 sig_key = sig_mr->rkey; - u8 sigerr = to_mmr(sig_mr)->sig->sigerr_count & 1; - - memset(seg, 0, sizeof(*seg)); - - seg->flags = get_umr_flags(access_flags) | MLX5_MKC_ACCESS_MODE_KLMS; - seg->qpn_mkey7_0 = cpu_to_be32((sig_key & 0xff) | 0xffffff00); - seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 | - MLX5_MKEY_BSF_EN | pdn); - seg->len = cpu_to_be64(length); - seg->xlt_oct_size = cpu_to_be32(get_xlt_octo(size)); - seg->bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE); -} - -static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, - u32 size) -{ - memset(umr, 0, sizeof(*umr)); - - umr->flags = MLX5_FLAGS_INLINE | MLX5_FLAGS_CHECK_FREE; - umr->xlt_octowords = cpu_to_be16(get_xlt_octo(size)); - umr->bsf_octowords = cpu_to_be16(MLX5_MKEY_BSF_OCTO_SIZE); - umr->mkey_mask = sig_mkey_mask(); -} - -static int set_pi_umr_wr(const struct ib_send_wr *send_wr, - struct mlx5_ib_qp *qp, void **seg, int *size, - void **cur_edge) -{ - const struct ib_reg_wr *wr = reg_wr(send_wr); - struct mlx5_ib_mr *sig_mr = to_mmr(wr->mr); - struct mlx5_ib_mr *pi_mr = sig_mr->pi_mr; - struct ib_sig_attrs *sig_attrs = sig_mr->ibmr.sig_attrs; - u32 pdn = get_pd(qp)->pdn; - u32 xlt_size; - int region_len, ret; - - if (unlikely(send_wr->num_sge != 0) || - unlikely(wr->access & IB_ACCESS_REMOTE_ATOMIC) || - unlikely(!sig_mr->sig) || unlikely(!qp->ibqp.integrity_en) || - unlikely(!sig_mr->sig->sig_status_checked)) - return -EINVAL; - - /* length of the protected region, data + protection */ - region_len = pi_mr->ibmr.length; - - /** - * KLM octoword size - if protection was provided - * then we use strided block format (3 octowords), - * else we use single KLM (1 octoword) - **/ - if (sig_attrs->mem.sig_type != IB_SIG_TYPE_NONE) - xlt_size = 0x30; - else - xlt_size = sizeof(struct mlx5_klm); - - set_sig_umr_segment(*seg, xlt_size); - *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); - *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; - handle_post_send_edge(&qp->sq, seg, *size, cur_edge); - - set_sig_mkey_segment(*seg, wr->mr, wr->access, xlt_size, region_len, - pdn); - *seg += sizeof(struct mlx5_mkey_seg); - *size += sizeof(struct mlx5_mkey_seg) / 16; - handle_post_send_edge(&qp->sq, seg, *size, cur_edge); - - ret = set_sig_data_segment(send_wr, wr->mr, sig_attrs, qp, seg, size, - cur_edge); - if (ret) - return ret; - - sig_mr->sig->sig_status_checked = false; - return 0; -} - -static int set_psv_wr(struct ib_sig_domain *domain, - u32 psv_idx, void **seg, int *size) -{ - struct mlx5_seg_set_psv *psv_seg = *seg; - - memset(psv_seg, 0, sizeof(*psv_seg)); - psv_seg->psv_num = cpu_to_be32(psv_idx); - switch (domain->sig_type) { - case IB_SIG_TYPE_NONE: - break; - case IB_SIG_TYPE_T10_DIF: - psv_seg->transient_sig = cpu_to_be32(domain->sig.dif.bg << 16 | - domain->sig.dif.app_tag); - psv_seg->ref_tag = cpu_to_be32(domain->sig.dif.ref_tag); - break; - default: - pr_err("Bad signature type (%d) is given.\n", - domain->sig_type); - return -EINVAL; - } - - *seg += sizeof(*psv_seg); - *size += sizeof(*psv_seg) / 16; - - return 0; -} - -static int set_reg_wr(struct mlx5_ib_qp *qp, - const struct ib_reg_wr *wr, - void **seg, int *size, void **cur_edge, - bool check_not_free) -{ - struct mlx5_ib_mr *mr = to_mmr(wr->mr); - struct mlx5_ib_pd *pd = to_mpd(qp->ibqp.pd); - struct mlx5_ib_dev *dev = to_mdev(pd->ibpd.device); - int mr_list_size = (mr->ndescs + mr->meta_ndescs) * mr->desc_size; - bool umr_inline = mr_list_size <= MLX5_IB_SQ_UMR_INLINE_THRESHOLD; - bool atomic = wr->access & IB_ACCESS_REMOTE_ATOMIC; - u8 flags = 0; - - if (!mlx5_ib_can_use_umr(dev, atomic, wr->access)) { - mlx5_ib_warn(to_mdev(qp->ibqp.device), - "Fast update of %s for MR is disabled\n", - (MLX5_CAP_GEN(dev->mdev, - umr_modify_entity_size_disabled)) ? - "entity size" : - "atomic access"); - return -EINVAL; - } - - if (unlikely(wr->wr.send_flags & IB_SEND_INLINE)) { - mlx5_ib_warn(to_mdev(qp->ibqp.device), - "Invalid IB_SEND_INLINE send flag\n"); - return -EINVAL; - } - - if (check_not_free) - flags |= MLX5_UMR_CHECK_NOT_FREE; - if (umr_inline) - flags |= MLX5_UMR_INLINE; - - set_reg_umr_seg(*seg, mr, flags, atomic); - *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); - *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; - handle_post_send_edge(&qp->sq, seg, *size, cur_edge); - - set_reg_mkey_seg(*seg, mr, wr->key, wr->access); - *seg += sizeof(struct mlx5_mkey_seg); - *size += sizeof(struct mlx5_mkey_seg) / 16; - handle_post_send_edge(&qp->sq, seg, *size, cur_edge); - - if (umr_inline) { - memcpy_send_wqe(&qp->sq, cur_edge, seg, size, mr->descs, - mr_list_size); - *size = ALIGN(*size, MLX5_SEND_WQE_BB >> 4); - } else { - set_reg_data_seg(*seg, mr, pd); - *seg += sizeof(struct mlx5_wqe_data_seg); - *size += (sizeof(struct mlx5_wqe_data_seg) / 16); - } - return 0; -} - -static void set_linv_wr(struct mlx5_ib_qp *qp, void **seg, int *size, - void **cur_edge) -{ - set_linv_umr_seg(*seg); - *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); - *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; - handle_post_send_edge(&qp->sq, seg, *size, cur_edge); - set_linv_mkey_seg(*seg); - *seg += sizeof(struct mlx5_mkey_seg); - *size += sizeof(struct mlx5_mkey_seg) / 16; - handle_post_send_edge(&qp->sq, seg, *size, cur_edge); -} - -static void dump_wqe(struct mlx5_ib_qp *qp, u32 idx, int size_16) -{ - __be32 *p = NULL; - int i, j; - - pr_debug("dump WQE index %u:\n", idx); - for (i = 0, j = 0; i < size_16 * 4; i += 4, j += 4) { - if ((i & 0xf) == 0) { - p = mlx5_frag_buf_get_wqe(&qp->sq.fbc, idx); - pr_debug("WQBB at %p:\n", (void *)p); - j = 0; - idx = (idx + 1) & (qp->sq.wqe_cnt - 1); - } - pr_debug("%08x %08x %08x %08x\n", be32_to_cpu(p[j]), - be32_to_cpu(p[j + 1]), be32_to_cpu(p[j + 2]), - be32_to_cpu(p[j + 3])); - } -} - -static int __begin_wqe(struct mlx5_ib_qp *qp, void **seg, - struct mlx5_wqe_ctrl_seg **ctrl, - const struct ib_send_wr *wr, unsigned int *idx, - int *size, void **cur_edge, int nreq, - bool send_signaled, bool solicited) -{ - if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) - return -ENOMEM; - - *idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1); - *seg = mlx5_frag_buf_get_wqe(&qp->sq.fbc, *idx); - *ctrl = *seg; - *(uint32_t *)(*seg + 8) = 0; - (*ctrl)->imm = send_ieth(wr); - (*ctrl)->fm_ce_se = qp->sq_signal_bits | - (send_signaled ? MLX5_WQE_CTRL_CQ_UPDATE : 0) | - (solicited ? MLX5_WQE_CTRL_SOLICITED : 0); - - *seg += sizeof(**ctrl); - *size = sizeof(**ctrl) / 16; - *cur_edge = qp->sq.cur_edge; - - return 0; -} - -static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, - struct mlx5_wqe_ctrl_seg **ctrl, - const struct ib_send_wr *wr, unsigned *idx, - int *size, void **cur_edge, int nreq) -{ - return __begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq, - wr->send_flags & IB_SEND_SIGNALED, - wr->send_flags & IB_SEND_SOLICITED); -} - -static void finish_wqe(struct mlx5_ib_qp *qp, - struct mlx5_wqe_ctrl_seg *ctrl, - void *seg, u8 size, void *cur_edge, - unsigned int idx, u64 wr_id, int nreq, u8 fence, - u32 mlx5_opcode) -{ - u8 opmod = 0; - - ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8) | - mlx5_opcode | ((u32)opmod << 24)); - ctrl->qpn_ds = cpu_to_be32(size | (qp->trans_qp.base.mqp.qpn << 8)); - ctrl->fm_ce_se |= fence; - if (unlikely(qp->wq_sig)) - ctrl->signature = wq_sig(ctrl); - - qp->sq.wrid[idx] = wr_id; - qp->sq.w_list[idx].opcode = mlx5_opcode; - qp->sq.wqe_head[idx] = qp->sq.head + nreq; - qp->sq.cur_post += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB); - qp->sq.w_list[idx].next = qp->sq.cur_post; - - /* We save the edge which was possibly updated during the WQE - * construction, into SQ's cache. - */ - seg = PTR_ALIGN(seg, MLX5_SEND_WQE_BB); - qp->sq.cur_edge = (unlikely(seg == cur_edge)) ? - get_sq_edge(&qp->sq, qp->sq.cur_post & - (qp->sq.wqe_cnt - 1)) : - cur_edge; -} - -static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, - const struct ib_send_wr **bad_wr, bool drain) -{ - struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */ - struct mlx5_ib_dev *dev = to_mdev(ibqp->device); - struct mlx5_core_dev *mdev = dev->mdev; - struct ib_reg_wr reg_pi_wr; - struct mlx5_ib_qp *qp; - struct mlx5_ib_mr *mr; - struct mlx5_ib_mr *pi_mr; - struct mlx5_ib_mr pa_pi_mr; - struct ib_sig_attrs *sig_attrs; - struct mlx5_wqe_xrc_seg *xrc; - struct mlx5_bf *bf; - void *cur_edge; - int uninitialized_var(size); - unsigned long flags; - unsigned idx; - int err = 0; - int num_sge; - void *seg; - int nreq; - int i; - u8 next_fence = 0; - u8 fence; - - if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR && - !drain)) { - *bad_wr = wr; - return -EIO; - } - - if (unlikely(ibqp->qp_type == IB_QPT_GSI)) - return mlx5_ib_gsi_post_send(ibqp, wr, bad_wr); - - qp = to_mqp(ibqp); - bf = &qp->bf; - - spin_lock_irqsave(&qp->sq.lock, flags); - - for (nreq = 0; wr; nreq++, wr = wr->next) { - if (unlikely(wr->opcode >= ARRAY_SIZE(mlx5_ib_opcode))) { - mlx5_ib_warn(dev, "\n"); - err = -EINVAL; - *bad_wr = wr; - goto out; - } - - num_sge = wr->num_sge; - if (unlikely(num_sge > qp->sq.max_gs)) { - mlx5_ib_warn(dev, "\n"); - err = -EINVAL; - *bad_wr = wr; - goto out; - } - - err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, &cur_edge, - nreq); - if (err) { - mlx5_ib_warn(dev, "\n"); - err = -ENOMEM; - *bad_wr = wr; - goto out; - } - - if (wr->opcode == IB_WR_REG_MR || - wr->opcode == IB_WR_REG_MR_INTEGRITY) { - fence = dev->umr_fence; - next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; - } else { - if (wr->send_flags & IB_SEND_FENCE) { - if (qp->next_fence) - fence = MLX5_FENCE_MODE_SMALL_AND_FENCE; - else - fence = MLX5_FENCE_MODE_FENCE; - } else { - fence = qp->next_fence; - } - } - - switch (ibqp->qp_type) { - case IB_QPT_XRC_INI: - xrc = seg; - seg += sizeof(*xrc); - size += sizeof(*xrc) / 16; - /* fall through */ - case IB_QPT_RC: - switch (wr->opcode) { - case IB_WR_RDMA_READ: - case IB_WR_RDMA_WRITE: - case IB_WR_RDMA_WRITE_WITH_IMM: - set_raddr_seg(seg, rdma_wr(wr)->remote_addr, - rdma_wr(wr)->rkey); - seg += sizeof(struct mlx5_wqe_raddr_seg); - size += sizeof(struct mlx5_wqe_raddr_seg) / 16; - break; - - case IB_WR_ATOMIC_CMP_AND_SWP: - case IB_WR_ATOMIC_FETCH_AND_ADD: - case IB_WR_MASKED_ATOMIC_CMP_AND_SWP: - mlx5_ib_warn(dev, "Atomic operations are not supported yet\n"); - err = -ENOSYS; - *bad_wr = wr; - goto out; - - case IB_WR_LOCAL_INV: - qp->sq.wr_data[idx] = IB_WR_LOCAL_INV; - ctrl->imm = cpu_to_be32(wr->ex.invalidate_rkey); - set_linv_wr(qp, &seg, &size, &cur_edge); - num_sge = 0; - break; - - case IB_WR_REG_MR: - qp->sq.wr_data[idx] = IB_WR_REG_MR; - ctrl->imm = cpu_to_be32(reg_wr(wr)->key); - err = set_reg_wr(qp, reg_wr(wr), &seg, &size, - &cur_edge, true); - if (err) { - *bad_wr = wr; - goto out; - } - num_sge = 0; - break; - - case IB_WR_REG_MR_INTEGRITY: - qp->sq.wr_data[idx] = IB_WR_REG_MR_INTEGRITY; - - mr = to_mmr(reg_wr(wr)->mr); - pi_mr = mr->pi_mr; - - if (pi_mr) { - memset(®_pi_wr, 0, - sizeof(struct ib_reg_wr)); - - reg_pi_wr.mr = &pi_mr->ibmr; - reg_pi_wr.access = reg_wr(wr)->access; - reg_pi_wr.key = pi_mr->ibmr.rkey; - - ctrl->imm = cpu_to_be32(reg_pi_wr.key); - /* UMR for data + prot registration */ - err = set_reg_wr(qp, ®_pi_wr, &seg, - &size, &cur_edge, - false); - if (err) { - *bad_wr = wr; - goto out; - } - finish_wqe(qp, ctrl, seg, size, - cur_edge, idx, wr->wr_id, - nreq, fence, - MLX5_OPCODE_UMR); - - err = begin_wqe(qp, &seg, &ctrl, wr, - &idx, &size, &cur_edge, - nreq); - if (err) { - mlx5_ib_warn(dev, "\n"); - err = -ENOMEM; - *bad_wr = wr; - goto out; - } - } else { - memset(&pa_pi_mr, 0, - sizeof(struct mlx5_ib_mr)); - /* No UMR, use local_dma_lkey */ - pa_pi_mr.ibmr.lkey = - mr->ibmr.pd->local_dma_lkey; - - pa_pi_mr.ndescs = mr->ndescs; - pa_pi_mr.data_length = mr->data_length; - pa_pi_mr.data_iova = mr->data_iova; - if (mr->meta_ndescs) { - pa_pi_mr.meta_ndescs = - mr->meta_ndescs; - pa_pi_mr.meta_length = - mr->meta_length; - pa_pi_mr.pi_iova = mr->pi_iova; - } - - pa_pi_mr.ibmr.length = mr->ibmr.length; - mr->pi_mr = &pa_pi_mr; - } - ctrl->imm = cpu_to_be32(mr->ibmr.rkey); - /* UMR for sig MR */ - err = set_pi_umr_wr(wr, qp, &seg, &size, - &cur_edge); - if (err) { - mlx5_ib_warn(dev, "\n"); - *bad_wr = wr; - goto out; - } - finish_wqe(qp, ctrl, seg, size, cur_edge, idx, - wr->wr_id, nreq, fence, - MLX5_OPCODE_UMR); - - /* - * SET_PSV WQEs are not signaled and solicited - * on error - */ - sig_attrs = mr->ibmr.sig_attrs; - err = __begin_wqe(qp, &seg, &ctrl, wr, &idx, - &size, &cur_edge, nreq, false, - true); - if (err) { - mlx5_ib_warn(dev, "\n"); - err = -ENOMEM; - *bad_wr = wr; - goto out; - } - err = set_psv_wr(&sig_attrs->mem, - mr->sig->psv_memory.psv_idx, - &seg, &size); - if (err) { - mlx5_ib_warn(dev, "\n"); - *bad_wr = wr; - goto out; - } - finish_wqe(qp, ctrl, seg, size, cur_edge, idx, - wr->wr_id, nreq, next_fence, - MLX5_OPCODE_SET_PSV); - - err = __begin_wqe(qp, &seg, &ctrl, wr, &idx, - &size, &cur_edge, nreq, false, - true); - if (err) { - mlx5_ib_warn(dev, "\n"); - err = -ENOMEM; - *bad_wr = wr; - goto out; - } - err = set_psv_wr(&sig_attrs->wire, - mr->sig->psv_wire.psv_idx, - &seg, &size); - if (err) { - mlx5_ib_warn(dev, "\n"); - *bad_wr = wr; - goto out; - } - finish_wqe(qp, ctrl, seg, size, cur_edge, idx, - wr->wr_id, nreq, next_fence, - MLX5_OPCODE_SET_PSV); - - qp->next_fence = - MLX5_FENCE_MODE_INITIATOR_SMALL; - num_sge = 0; - goto skip_psv; - - default: - break; - } - break; - - case IB_QPT_UC: - switch (wr->opcode) { - case IB_WR_RDMA_WRITE: - case IB_WR_RDMA_WRITE_WITH_IMM: - set_raddr_seg(seg, rdma_wr(wr)->remote_addr, - rdma_wr(wr)->rkey); - seg += sizeof(struct mlx5_wqe_raddr_seg); - size += sizeof(struct mlx5_wqe_raddr_seg) / 16; - break; - - default: - break; - } - break; - - case IB_QPT_SMI: - if (unlikely(!mdev->port_caps[qp->port - 1].has_smi)) { - mlx5_ib_warn(dev, "Send SMP MADs is not allowed\n"); - err = -EPERM; - *bad_wr = wr; - goto out; - } - /* fall through */ - case MLX5_IB_QPT_HW_GSI: - set_datagram_seg(seg, wr); - seg += sizeof(struct mlx5_wqe_datagram_seg); - size += sizeof(struct mlx5_wqe_datagram_seg) / 16; - handle_post_send_edge(&qp->sq, &seg, size, &cur_edge); - - break; - case IB_QPT_UD: - set_datagram_seg(seg, wr); - seg += sizeof(struct mlx5_wqe_datagram_seg); - size += sizeof(struct mlx5_wqe_datagram_seg) / 16; - handle_post_send_edge(&qp->sq, &seg, size, &cur_edge); - - /* handle qp that supports ud offload */ - if (qp->flags & IB_QP_CREATE_IPOIB_UD_LSO) { - struct mlx5_wqe_eth_pad *pad; - - pad = seg; - memset(pad, 0, sizeof(struct mlx5_wqe_eth_pad)); - seg += sizeof(struct mlx5_wqe_eth_pad); - size += sizeof(struct mlx5_wqe_eth_pad) / 16; - set_eth_seg(wr, qp, &seg, &size, &cur_edge); - handle_post_send_edge(&qp->sq, &seg, size, - &cur_edge); - } - break; - case MLX5_IB_QPT_REG_UMR: - if (wr->opcode != MLX5_IB_WR_UMR) { - err = -EINVAL; - mlx5_ib_warn(dev, "bad opcode\n"); - goto out; - } - qp->sq.wr_data[idx] = MLX5_IB_WR_UMR; - ctrl->imm = cpu_to_be32(umr_wr(wr)->mkey); - err = set_reg_umr_segment(dev, seg, wr, !!(MLX5_CAP_GEN(mdev, atomic))); - if (unlikely(err)) - goto out; - seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); - size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; - handle_post_send_edge(&qp->sq, &seg, size, &cur_edge); - set_reg_mkey_segment(seg, wr); - seg += sizeof(struct mlx5_mkey_seg); - size += sizeof(struct mlx5_mkey_seg) / 16; - handle_post_send_edge(&qp->sq, &seg, size, &cur_edge); - break; - - default: - break; - } - - if (wr->send_flags & IB_SEND_INLINE && num_sge) { - err = set_data_inl_seg(qp, wr, &seg, &size, &cur_edge); - if (unlikely(err)) { - mlx5_ib_warn(dev, "\n"); - *bad_wr = wr; - goto out; - } - } else { - for (i = 0; i < num_sge; i++) { - handle_post_send_edge(&qp->sq, &seg, size, - &cur_edge); - if (likely(wr->sg_list[i].length)) { - set_data_ptr_seg - ((struct mlx5_wqe_data_seg *)seg, - wr->sg_list + i); - size += sizeof(struct mlx5_wqe_data_seg) / 16; - seg += sizeof(struct mlx5_wqe_data_seg); - } - } - } - - qp->next_fence = next_fence; - finish_wqe(qp, ctrl, seg, size, cur_edge, idx, wr->wr_id, nreq, - fence, mlx5_ib_opcode[wr->opcode]); -skip_psv: - if (0) - dump_wqe(qp, idx, size); - } - -out: - if (likely(nreq)) { - qp->sq.head += nreq; - - /* Make sure that descriptors are written before - * updating doorbell record and ringing the doorbell - */ - wmb(); - - qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post); - - /* Make sure doorbell record is visible to the HCA before - * we hit doorbell */ - wmb(); - - mlx5_write64((__be32 *)ctrl, bf->bfreg->map + bf->offset); - /* Make sure doorbells don't leak out of SQ spinlock - * and reach the HCA out of order. - */ - bf->offset ^= bf->buf_size; - } - - spin_unlock_irqrestore(&qp->sq.lock, flags); - - return err; -} - -int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, - const struct ib_send_wr **bad_wr) -{ - return _mlx5_ib_post_send(ibqp, wr, bad_wr, false); -} - -static void set_sig_seg(struct mlx5_rwqe_sig *sig, int size) -{ - sig->signature = calc_sig(sig, size); -} - -static int _mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, - const struct ib_recv_wr **bad_wr, bool drain) -{ - struct mlx5_ib_qp *qp = to_mqp(ibqp); - struct mlx5_wqe_data_seg *scat; - struct mlx5_rwqe_sig *sig; - struct mlx5_ib_dev *dev = to_mdev(ibqp->device); - struct mlx5_core_dev *mdev = dev->mdev; - unsigned long flags; - int err = 0; - int nreq; - int ind; - int i; - - if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR && - !drain)) { - *bad_wr = wr; - return -EIO; - } - - if (unlikely(ibqp->qp_type == IB_QPT_GSI)) - return mlx5_ib_gsi_post_recv(ibqp, wr, bad_wr); - - spin_lock_irqsave(&qp->rq.lock, flags); - - ind = qp->rq.head & (qp->rq.wqe_cnt - 1); - - for (nreq = 0; wr; nreq++, wr = wr->next) { - if (mlx5_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) { - err = -ENOMEM; - *bad_wr = wr; - goto out; - } - - if (unlikely(wr->num_sge > qp->rq.max_gs)) { - err = -EINVAL; - *bad_wr = wr; - goto out; - } - - scat = mlx5_frag_buf_get_wqe(&qp->rq.fbc, ind); - if (qp->wq_sig) - scat++; - - for (i = 0; i < wr->num_sge; i++) - set_data_ptr_seg(scat + i, wr->sg_list + i); - - if (i < qp->rq.max_gs) { - scat[i].byte_count = 0; - scat[i].lkey = cpu_to_be32(MLX5_INVALID_LKEY); - scat[i].addr = 0; - } - - if (qp->wq_sig) { - sig = (struct mlx5_rwqe_sig *)scat; - set_sig_seg(sig, (qp->rq.max_gs + 1) << 2); - } - - qp->rq.wrid[ind] = wr->wr_id; - - ind = (ind + 1) & (qp->rq.wqe_cnt - 1); - } - -out: - if (likely(nreq)) { - qp->rq.head += nreq; - - /* Make sure that descriptors are written before - * doorbell record. - */ - wmb(); - - *qp->db.db = cpu_to_be32(qp->rq.head & 0xffff); - } - - spin_unlock_irqrestore(&qp->rq.lock, flags); - - return err; -} - -int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, - const struct ib_recv_wr **bad_wr) -{ - return _mlx5_ib_post_recv(ibqp, wr, bad_wr, false); -} - static inline enum ib_qp_state to_ib_qp_state(enum mlx5_qp_state mlx5_state) { switch (mlx5_state) { @@ -5828,14 +4569,14 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, memset(qp_init_attr, 0, sizeof(*qp_init_attr)); memset(qp_attr, 0, sizeof(*qp_attr)); - if (unlikely(qp->qp_sub_type == MLX5_IB_QPT_DCT)) + if (unlikely(qp->type == MLX5_IB_QPT_DCT)) return mlx5_ib_dct_query_qp(dev, qp, qp_attr, qp_attr_mask, qp_init_attr); mutex_lock(&qp->mutex); if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET || - qp->flags & MLX5_IB_QP_UNDERLAY) { + qp->flags & IB_QP_CREATE_SOURCE_QPN) { err = query_raw_packet_qp_state(dev, qp, &raw_packet_qp_state); if (err) goto out; @@ -5869,18 +4610,7 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, qp_init_attr->cap = qp_attr->cap; - qp_init_attr->create_flags = 0; - if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK) - qp_init_attr->create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK; - - if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL) - qp_init_attr->create_flags |= IB_QP_CREATE_CROSS_CHANNEL; - if (qp->flags & MLX5_IB_QP_MANAGED_SEND) - qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_SEND; - if (qp->flags & MLX5_IB_QP_MANAGED_RECV) - qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_RECV; - if (qp->flags & MLX5_IB_QP_SQPN_QP1) - qp_init_attr->create_flags |= MLX5_IB_QP_CREATE_SQPN_QP1; + qp_init_attr->create_flags = qp->flags; qp_init_attr->sq_sig_type = qp->sq_signal_bits & MLX5_WQE_CTRL_CQ_UPDATE ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; @@ -6541,7 +5271,7 @@ void mlx5_ib_drain_sq(struct ib_qp *qp) sdrain.cqe.done = mlx5_ib_drain_qp_done; init_completion(&sdrain.done); - ret = _mlx5_ib_post_send(qp, &swr.wr, &bad_swr, true); + ret = mlx5_ib_post_send_drain(qp, &swr.wr, &bad_swr); if (ret) { WARN_ONCE(ret, "failed to drain send queue: %d\n", ret); return; @@ -6571,7 +5301,7 @@ void mlx5_ib_drain_rq(struct ib_qp *qp) rdrain.cqe.done = mlx5_ib_drain_qp_done; init_completion(&rdrain.done); - ret = _mlx5_ib_post_recv(qp, &rwr, &bad_rwr, true); + ret = mlx5_ib_post_recv_drain(qp, &rwr, &bad_rwr); if (ret) { WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret); return; diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index b1a8a9175040..6d1ff13d2283 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -310,12 +310,18 @@ int mlx5_ib_create_srq(struct ib_srq *ib_srq, srq->msrq.event = mlx5_ib_srq_event; srq->ibsrq.ext.xrc.srq_num = srq->msrq.srqn; - if (udata) - if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof(__u32))) { + if (udata) { + struct mlx5_ib_create_srq_resp resp = { + .srqn = srq->msrq.srqn, + }; + + if (ib_copy_to_udata(udata, &resp, min(udata->outlen, + sizeof(resp)))) { mlx5_ib_dbg(dev, "copy to user failed\n"); err = -EFAULT; goto err_core; } + } init_attr->attr.max_wr = srq->msrq.max - 1; diff --git a/drivers/infiniband/hw/mlx5/srq_cmd.c b/drivers/infiniband/hw/mlx5/srq_cmd.c index c851570791af..bc50a712bf2e 100644 --- a/drivers/infiniband/hw/mlx5/srq_cmd.c +++ b/drivers/infiniband/hw/mlx5/srq_cmd.c @@ -132,38 +132,33 @@ static int create_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, static int destroy_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq) { - u32 srq_in[MLX5_ST_SZ_DW(destroy_srq_in)] = {0}; - u32 srq_out[MLX5_ST_SZ_DW(destroy_srq_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(destroy_srq_in)] = {}; - MLX5_SET(destroy_srq_in, srq_in, opcode, - MLX5_CMD_OP_DESTROY_SRQ); - MLX5_SET(destroy_srq_in, srq_in, srqn, srq->srqn); - MLX5_SET(destroy_srq_in, srq_in, uid, srq->uid); + MLX5_SET(destroy_srq_in, in, opcode, MLX5_CMD_OP_DESTROY_SRQ); + MLX5_SET(destroy_srq_in, in, srqn, srq->srqn); + MLX5_SET(destroy_srq_in, in, uid, srq->uid); - return mlx5_cmd_exec(dev->mdev, srq_in, sizeof(srq_in), srq_out, - sizeof(srq_out)); + return mlx5_cmd_exec_in(dev->mdev, destroy_srq, in); } static int arm_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, u16 lwm, int is_srq) { - u32 srq_in[MLX5_ST_SZ_DW(arm_rq_in)] = {0}; - u32 srq_out[MLX5_ST_SZ_DW(arm_rq_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(arm_rq_in)] = {}; - MLX5_SET(arm_rq_in, srq_in, opcode, MLX5_CMD_OP_ARM_RQ); - MLX5_SET(arm_rq_in, srq_in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_SRQ); - MLX5_SET(arm_rq_in, srq_in, srq_number, srq->srqn); - MLX5_SET(arm_rq_in, srq_in, lwm, lwm); - MLX5_SET(arm_rq_in, srq_in, uid, srq->uid); + MLX5_SET(arm_rq_in, in, opcode, MLX5_CMD_OP_ARM_RQ); + MLX5_SET(arm_rq_in, in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_SRQ); + MLX5_SET(arm_rq_in, in, srq_number, srq->srqn); + MLX5_SET(arm_rq_in, in, lwm, lwm); + MLX5_SET(arm_rq_in, in, uid, srq->uid); - return mlx5_cmd_exec(dev->mdev, srq_in, sizeof(srq_in), srq_out, - sizeof(srq_out)); + return mlx5_cmd_exec_in(dev->mdev, arm_rq, in); } static int query_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, struct mlx5_srq_attr *out) { - u32 srq_in[MLX5_ST_SZ_DW(query_srq_in)] = {0}; + u32 in[MLX5_ST_SZ_DW(query_srq_in)] = {}; u32 *srq_out; void *srqc; int err; @@ -172,20 +167,18 @@ static int query_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, if (!srq_out) return -ENOMEM; - MLX5_SET(query_srq_in, srq_in, opcode, - MLX5_CMD_OP_QUERY_SRQ); - MLX5_SET(query_srq_in, srq_in, srqn, srq->srqn); - err = mlx5_cmd_exec(dev->mdev, srq_in, sizeof(srq_in), srq_out, - MLX5_ST_SZ_BYTES(query_srq_out)); + MLX5_SET(query_srq_in, in, opcode, MLX5_CMD_OP_QUERY_SRQ); + MLX5_SET(query_srq_in, in, srqn, srq->srqn); + err = mlx5_cmd_exec_inout(dev->mdev, query_srq, in, out); if (err) goto out; - srqc = MLX5_ADDR_OF(query_srq_out, srq_out, srq_context_entry); + srqc = MLX5_ADDR_OF(query_srq_out, out, srq_context_entry); get_srqc(srqc, out); if (MLX5_GET(srqc, srqc, state) != MLX5_SRQC_STATE_GOOD) out->flags |= MLX5_SRQ_FLAG_ERR; out: - kvfree(srq_out); + kvfree(out); return err; } @@ -234,39 +227,35 @@ out: static int destroy_xrc_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq) { - u32 xrcsrq_in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)] = {0}; - u32 xrcsrq_out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)] = {}; - MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, opcode, - MLX5_CMD_OP_DESTROY_XRC_SRQ); - MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); - MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, uid, srq->uid); + MLX5_SET(destroy_xrc_srq_in, in, opcode, MLX5_CMD_OP_DESTROY_XRC_SRQ); + MLX5_SET(destroy_xrc_srq_in, in, xrc_srqn, srq->srqn); + MLX5_SET(destroy_xrc_srq_in, in, uid, srq->uid); - return mlx5_cmd_exec(dev->mdev, xrcsrq_in, sizeof(xrcsrq_in), - xrcsrq_out, sizeof(xrcsrq_out)); + return mlx5_cmd_exec_in(dev->mdev, destroy_xrc_srq, in); } static int arm_xrc_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, u16 lwm) { - u32 xrcsrq_in[MLX5_ST_SZ_DW(arm_xrc_srq_in)] = {0}; - u32 xrcsrq_out[MLX5_ST_SZ_DW(arm_xrc_srq_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(arm_xrc_srq_in)] = {}; - MLX5_SET(arm_xrc_srq_in, xrcsrq_in, opcode, MLX5_CMD_OP_ARM_XRC_SRQ); - MLX5_SET(arm_xrc_srq_in, xrcsrq_in, op_mod, MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ); - MLX5_SET(arm_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); - MLX5_SET(arm_xrc_srq_in, xrcsrq_in, lwm, lwm); - MLX5_SET(arm_xrc_srq_in, xrcsrq_in, uid, srq->uid); + MLX5_SET(arm_xrc_srq_in, in, opcode, MLX5_CMD_OP_ARM_XRC_SRQ); + MLX5_SET(arm_xrc_srq_in, in, op_mod, + MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ); + MLX5_SET(arm_xrc_srq_in, in, xrc_srqn, srq->srqn); + MLX5_SET(arm_xrc_srq_in, in, lwm, lwm); + MLX5_SET(arm_xrc_srq_in, in, uid, srq->uid); - return mlx5_cmd_exec(dev->mdev, xrcsrq_in, sizeof(xrcsrq_in), - xrcsrq_out, sizeof(xrcsrq_out)); + return mlx5_cmd_exec_in(dev->mdev, arm_xrc_srq, in); } static int query_xrc_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, struct mlx5_srq_attr *out) { - u32 xrcsrq_in[MLX5_ST_SZ_DW(query_xrc_srq_in)]; + u32 in[MLX5_ST_SZ_DW(query_xrc_srq_in)] = {}; u32 *xrcsrq_out; void *xrc_srqc; int err; @@ -274,14 +263,11 @@ static int query_xrc_srq_cmd(struct mlx5_ib_dev *dev, xrcsrq_out = kvzalloc(MLX5_ST_SZ_BYTES(query_xrc_srq_out), GFP_KERNEL); if (!xrcsrq_out) return -ENOMEM; - memset(xrcsrq_in, 0, sizeof(xrcsrq_in)); - MLX5_SET(query_xrc_srq_in, xrcsrq_in, opcode, - MLX5_CMD_OP_QUERY_XRC_SRQ); - MLX5_SET(query_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); + MLX5_SET(query_xrc_srq_in, in, opcode, MLX5_CMD_OP_QUERY_XRC_SRQ); + MLX5_SET(query_xrc_srq_in, in, xrc_srqn, srq->srqn); - err = mlx5_cmd_exec(dev->mdev, xrcsrq_in, sizeof(xrcsrq_in), - xrcsrq_out, MLX5_ST_SZ_BYTES(query_xrc_srq_out)); + err = mlx5_cmd_exec_inout(dev->mdev, query_xrc_srq, in, xrcsrq_out); if (err) goto out; @@ -341,13 +327,12 @@ out: static int destroy_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq) { - u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)] = {}; - u32 out[MLX5_ST_SZ_DW(destroy_rmp_out)] = {}; + u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)] = {}; MLX5_SET(destroy_rmp_in, in, opcode, MLX5_CMD_OP_DESTROY_RMP); MLX5_SET(destroy_rmp_in, in, rmpn, srq->srqn); MLX5_SET(destroy_rmp_in, in, uid, srq->uid); - return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); + return mlx5_cmd_exec_in(dev->mdev, destroy_rmp, in); } static int arm_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, @@ -384,7 +369,7 @@ static int arm_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); MLX5_SET(modify_rmp_in, in, opcode, MLX5_CMD_OP_MODIFY_RMP); - err = mlx5_cmd_exec(dev->mdev, in, inlen, out, outlen); + err = mlx5_cmd_exec_inout(dev->mdev, modify_rmp, in, out); out: kvfree(in); @@ -414,7 +399,7 @@ static int query_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, MLX5_SET(query_rmp_in, rmp_in, opcode, MLX5_CMD_OP_QUERY_RMP); MLX5_SET(query_rmp_in, rmp_in, rmpn, srq->srqn); - err = mlx5_cmd_exec(dev->mdev, rmp_in, inlen, rmp_out, outlen); + err = mlx5_cmd_exec_inout(dev->mdev, query_rmp, rmp_in, rmp_out); if (err) goto out; @@ -477,36 +462,34 @@ static int create_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, static int destroy_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq) { - u32 in[MLX5_ST_SZ_DW(destroy_xrq_in)] = {0}; - u32 out[MLX5_ST_SZ_DW(destroy_xrq_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(destroy_xrq_in)] = {}; MLX5_SET(destroy_xrq_in, in, opcode, MLX5_CMD_OP_DESTROY_XRQ); - MLX5_SET(destroy_xrq_in, in, xrqn, srq->srqn); + MLX5_SET(destroy_xrq_in, in, xrqn, srq->srqn); MLX5_SET(destroy_xrq_in, in, uid, srq->uid); - return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); + return mlx5_cmd_exec_in(dev->mdev, destroy_xrq, in); } static int arm_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, u16 lwm) { - u32 out[MLX5_ST_SZ_DW(arm_rq_out)] = {0}; - u32 in[MLX5_ST_SZ_DW(arm_rq_in)] = {0}; + u32 in[MLX5_ST_SZ_DW(arm_rq_in)] = {}; - MLX5_SET(arm_rq_in, in, opcode, MLX5_CMD_OP_ARM_RQ); - MLX5_SET(arm_rq_in, in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_XRQ); + MLX5_SET(arm_rq_in, in, opcode, MLX5_CMD_OP_ARM_RQ); + MLX5_SET(arm_rq_in, in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_XRQ); MLX5_SET(arm_rq_in, in, srq_number, srq->srqn); - MLX5_SET(arm_rq_in, in, lwm, lwm); + MLX5_SET(arm_rq_in, in, lwm, lwm); MLX5_SET(arm_rq_in, in, uid, srq->uid); - return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); + return mlx5_cmd_exec_in(dev->mdev, arm_rq, in); } static int query_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, struct mlx5_srq_attr *out) { - u32 in[MLX5_ST_SZ_DW(query_xrq_in)] = {0}; + u32 in[MLX5_ST_SZ_DW(query_xrq_in)] = {}; u32 *xrq_out; int outlen = MLX5_ST_SZ_BYTES(query_xrq_out); void *xrqc; @@ -519,7 +502,7 @@ static int query_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, MLX5_SET(query_xrq_in, in, opcode, MLX5_CMD_OP_QUERY_XRQ); MLX5_SET(query_xrq_in, in, xrqn, srq->srqn); - err = mlx5_cmd_exec(dev->mdev, in, sizeof(in), xrq_out, outlen); + err = mlx5_cmd_exec_inout(dev->mdev, query_xrq, in, xrq_out); if (err) goto out; diff --git a/drivers/infiniband/hw/mlx5/wr.c b/drivers/infiniband/hw/mlx5/wr.c new file mode 100644 index 000000000000..2c6df1c43b55 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/wr.c @@ -0,0 +1,1504 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. + */ + +#include <linux/gfp.h> +#include <linux/mlx5/qp.h> +#include <linux/mlx5/driver.h> +#include "wr.h" + +static const u32 mlx5_ib_opcode[] = { + [IB_WR_SEND] = MLX5_OPCODE_SEND, + [IB_WR_LSO] = MLX5_OPCODE_LSO, + [IB_WR_SEND_WITH_IMM] = MLX5_OPCODE_SEND_IMM, + [IB_WR_RDMA_WRITE] = MLX5_OPCODE_RDMA_WRITE, + [IB_WR_RDMA_WRITE_WITH_IMM] = MLX5_OPCODE_RDMA_WRITE_IMM, + [IB_WR_RDMA_READ] = MLX5_OPCODE_RDMA_READ, + [IB_WR_ATOMIC_CMP_AND_SWP] = MLX5_OPCODE_ATOMIC_CS, + [IB_WR_ATOMIC_FETCH_AND_ADD] = MLX5_OPCODE_ATOMIC_FA, + [IB_WR_SEND_WITH_INV] = MLX5_OPCODE_SEND_INVAL, + [IB_WR_LOCAL_INV] = MLX5_OPCODE_UMR, + [IB_WR_REG_MR] = MLX5_OPCODE_UMR, + [IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = MLX5_OPCODE_ATOMIC_MASKED_CS, + [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = MLX5_OPCODE_ATOMIC_MASKED_FA, + [MLX5_IB_WR_UMR] = MLX5_OPCODE_UMR, +}; + +/* handle_post_send_edge - Check if we get to SQ edge. If yes, update to the + * next nearby edge and get new address translation for current WQE position. + * @sq - SQ buffer. + * @seg: Current WQE position (16B aligned). + * @wqe_sz: Total current WQE size [16B]. + * @cur_edge: Updated current edge. + */ +static inline void handle_post_send_edge(struct mlx5_ib_wq *sq, void **seg, + u32 wqe_sz, void **cur_edge) +{ + u32 idx; + + if (likely(*seg != *cur_edge)) + return; + + idx = (sq->cur_post + (wqe_sz >> 2)) & (sq->wqe_cnt - 1); + *cur_edge = get_sq_edge(sq, idx); + + *seg = mlx5_frag_buf_get_wqe(&sq->fbc, idx); +} + +/* memcpy_send_wqe - copy data from src to WQE and update the relevant WQ's + * pointers. At the end @seg is aligned to 16B regardless the copied size. + * @sq - SQ buffer. + * @cur_edge: Updated current edge. + * @seg: Current WQE position (16B aligned). + * @wqe_sz: Total current WQE size [16B]. + * @src: Pointer to copy from. + * @n: Number of bytes to copy. + */ +static inline void memcpy_send_wqe(struct mlx5_ib_wq *sq, void **cur_edge, + void **seg, u32 *wqe_sz, const void *src, + size_t n) +{ + while (likely(n)) { + size_t leftlen = *cur_edge - *seg; + size_t copysz = min_t(size_t, leftlen, n); + size_t stride; + + memcpy(*seg, src, copysz); + + n -= copysz; + src += copysz; + stride = !n ? ALIGN(copysz, 16) : copysz; + *seg += stride; + *wqe_sz += stride >> 4; + handle_post_send_edge(sq, seg, *wqe_sz, cur_edge); + } +} + +static int mlx5_wq_overflow(struct mlx5_ib_wq *wq, int nreq, + struct ib_cq *ib_cq) +{ + struct mlx5_ib_cq *cq; + unsigned int cur; + + cur = wq->head - wq->tail; + if (likely(cur + nreq < wq->max_post)) + return 0; + + cq = to_mcq(ib_cq); + spin_lock(&cq->lock); + cur = wq->head - wq->tail; + spin_unlock(&cq->lock); + + return cur + nreq >= wq->max_post; +} + +static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg, + u64 remote_addr, u32 rkey) +{ + rseg->raddr = cpu_to_be64(remote_addr); + rseg->rkey = cpu_to_be32(rkey); + rseg->reserved = 0; +} + +static void set_eth_seg(const struct ib_send_wr *wr, struct mlx5_ib_qp *qp, + void **seg, int *size, void **cur_edge) +{ + struct mlx5_wqe_eth_seg *eseg = *seg; + + memset(eseg, 0, sizeof(struct mlx5_wqe_eth_seg)); + + if (wr->send_flags & IB_SEND_IP_CSUM) + eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM | + MLX5_ETH_WQE_L4_CSUM; + + if (wr->opcode == IB_WR_LSO) { + struct ib_ud_wr *ud_wr = container_of(wr, struct ib_ud_wr, wr); + size_t left, copysz; + void *pdata = ud_wr->header; + size_t stride; + + left = ud_wr->hlen; + eseg->mss = cpu_to_be16(ud_wr->mss); + eseg->inline_hdr.sz = cpu_to_be16(left); + + /* memcpy_send_wqe should get a 16B align address. Hence, we + * first copy up to the current edge and then, if needed, + * continue to memcpy_send_wqe. + */ + copysz = min_t(u64, *cur_edge - (void *)eseg->inline_hdr.start, + left); + memcpy(eseg->inline_hdr.start, pdata, copysz); + stride = ALIGN(sizeof(struct mlx5_wqe_eth_seg) - + sizeof(eseg->inline_hdr.start) + copysz, 16); + *size += stride / 16; + *seg += stride; + + if (copysz < left) { + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); + left -= copysz; + pdata += copysz; + memcpy_send_wqe(&qp->sq, cur_edge, seg, size, pdata, + left); + } + + return; + } + + *seg += sizeof(struct mlx5_wqe_eth_seg); + *size += sizeof(struct mlx5_wqe_eth_seg) / 16; +} + +static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg, + const struct ib_send_wr *wr) +{ + memcpy(&dseg->av, &to_mah(ud_wr(wr)->ah)->av, sizeof(struct mlx5_av)); + dseg->av.dqp_dct = + cpu_to_be32(ud_wr(wr)->remote_qpn | MLX5_EXTENDED_UD_AV); + dseg->av.key.qkey.qkey = cpu_to_be32(ud_wr(wr)->remote_qkey); +} + +static void set_data_ptr_seg(struct mlx5_wqe_data_seg *dseg, struct ib_sge *sg) +{ + dseg->byte_count = cpu_to_be32(sg->length); + dseg->lkey = cpu_to_be32(sg->lkey); + dseg->addr = cpu_to_be64(sg->addr); +} + +static u64 get_xlt_octo(u64 bytes) +{ + return ALIGN(bytes, MLX5_IB_UMR_XLT_ALIGNMENT) / + MLX5_IB_UMR_OCTOWORD; +} + +static __be64 frwr_mkey_mask(bool atomic) +{ + u64 result; + + result = MLX5_MKEY_MASK_LEN | + MLX5_MKEY_MASK_PAGE_SIZE | + MLX5_MKEY_MASK_START_ADDR | + MLX5_MKEY_MASK_EN_RINVAL | + MLX5_MKEY_MASK_KEY | + MLX5_MKEY_MASK_LR | + MLX5_MKEY_MASK_LW | + MLX5_MKEY_MASK_RR | + MLX5_MKEY_MASK_RW | + MLX5_MKEY_MASK_SMALL_FENCE | + MLX5_MKEY_MASK_FREE; + + if (atomic) + result |= MLX5_MKEY_MASK_A; + + return cpu_to_be64(result); +} + +static __be64 sig_mkey_mask(void) +{ + u64 result; + + result = MLX5_MKEY_MASK_LEN | + MLX5_MKEY_MASK_PAGE_SIZE | + MLX5_MKEY_MASK_START_ADDR | + MLX5_MKEY_MASK_EN_SIGERR | + MLX5_MKEY_MASK_EN_RINVAL | + MLX5_MKEY_MASK_KEY | + MLX5_MKEY_MASK_LR | + MLX5_MKEY_MASK_LW | + MLX5_MKEY_MASK_RR | + MLX5_MKEY_MASK_RW | + MLX5_MKEY_MASK_SMALL_FENCE | + MLX5_MKEY_MASK_FREE | + MLX5_MKEY_MASK_BSF_EN; + + return cpu_to_be64(result); +} + +static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr, + struct mlx5_ib_mr *mr, u8 flags, bool atomic) +{ + int size = (mr->ndescs + mr->meta_ndescs) * mr->desc_size; + + memset(umr, 0, sizeof(*umr)); + + umr->flags = flags; + umr->xlt_octowords = cpu_to_be16(get_xlt_octo(size)); + umr->mkey_mask = frwr_mkey_mask(atomic); +} + +static void set_linv_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr) +{ + memset(umr, 0, sizeof(*umr)); + umr->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); + umr->flags = MLX5_UMR_INLINE; +} + +static __be64 get_umr_enable_mr_mask(void) +{ + u64 result; + + result = MLX5_MKEY_MASK_KEY | + MLX5_MKEY_MASK_FREE; + + return cpu_to_be64(result); +} + +static __be64 get_umr_disable_mr_mask(void) +{ + u64 result; + + result = MLX5_MKEY_MASK_FREE; + + return cpu_to_be64(result); +} + +static __be64 get_umr_update_translation_mask(void) +{ + u64 result; + + result = MLX5_MKEY_MASK_LEN | + MLX5_MKEY_MASK_PAGE_SIZE | + MLX5_MKEY_MASK_START_ADDR; + + return cpu_to_be64(result); +} + +static __be64 get_umr_update_access_mask(int atomic) +{ + u64 result; + + result = MLX5_MKEY_MASK_LR | + MLX5_MKEY_MASK_LW | + MLX5_MKEY_MASK_RR | + MLX5_MKEY_MASK_RW; + + if (atomic) + result |= MLX5_MKEY_MASK_A; + + return cpu_to_be64(result); +} + +static __be64 get_umr_update_pd_mask(void) +{ + u64 result; + + result = MLX5_MKEY_MASK_PD; + + return cpu_to_be64(result); +} + +static int umr_check_mkey_mask(struct mlx5_ib_dev *dev, u64 mask) +{ + if ((mask & MLX5_MKEY_MASK_PAGE_SIZE && + MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled)) || + (mask & MLX5_MKEY_MASK_A && + MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled))) + return -EPERM; + return 0; +} + +static int set_reg_umr_segment(struct mlx5_ib_dev *dev, + struct mlx5_wqe_umr_ctrl_seg *umr, + const struct ib_send_wr *wr, int atomic) +{ + const struct mlx5_umr_wr *umrwr = umr_wr(wr); + + memset(umr, 0, sizeof(*umr)); + + if (!umrwr->ignore_free_state) { + if (wr->send_flags & MLX5_IB_SEND_UMR_FAIL_IF_FREE) + /* fail if free */ + umr->flags = MLX5_UMR_CHECK_FREE; + else + /* fail if not free */ + umr->flags = MLX5_UMR_CHECK_NOT_FREE; + } + + umr->xlt_octowords = cpu_to_be16(get_xlt_octo(umrwr->xlt_size)); + if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_XLT) { + u64 offset = get_xlt_octo(umrwr->offset); + + umr->xlt_offset = cpu_to_be16(offset & 0xffff); + umr->xlt_offset_47_16 = cpu_to_be32(offset >> 16); + umr->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN; + } + if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION) + umr->mkey_mask |= get_umr_update_translation_mask(); + if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS) { + umr->mkey_mask |= get_umr_update_access_mask(atomic); + umr->mkey_mask |= get_umr_update_pd_mask(); + } + if (wr->send_flags & MLX5_IB_SEND_UMR_ENABLE_MR) + umr->mkey_mask |= get_umr_enable_mr_mask(); + if (wr->send_flags & MLX5_IB_SEND_UMR_DISABLE_MR) + umr->mkey_mask |= get_umr_disable_mr_mask(); + + if (!wr->num_sge) + umr->flags |= MLX5_UMR_INLINE; + + return umr_check_mkey_mask(dev, be64_to_cpu(umr->mkey_mask)); +} + +static u8 get_umr_flags(int acc) +{ + return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX5_PERM_ATOMIC : 0) | + (acc & IB_ACCESS_REMOTE_WRITE ? MLX5_PERM_REMOTE_WRITE : 0) | + (acc & IB_ACCESS_REMOTE_READ ? MLX5_PERM_REMOTE_READ : 0) | + (acc & IB_ACCESS_LOCAL_WRITE ? MLX5_PERM_LOCAL_WRITE : 0) | + MLX5_PERM_LOCAL_READ | MLX5_PERM_UMR_EN; +} + +static void set_reg_mkey_seg(struct mlx5_mkey_seg *seg, + struct mlx5_ib_mr *mr, + u32 key, int access) +{ + int ndescs = ALIGN(mr->ndescs + mr->meta_ndescs, 8) >> 1; + + memset(seg, 0, sizeof(*seg)); + + if (mr->access_mode == MLX5_MKC_ACCESS_MODE_MTT) + seg->log2_page_size = ilog2(mr->ibmr.page_size); + else if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS) + /* KLMs take twice the size of MTTs */ + ndescs *= 2; + + seg->flags = get_umr_flags(access) | mr->access_mode; + seg->qpn_mkey7_0 = cpu_to_be32((key & 0xff) | 0xffffff00); + seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL); + seg->start_addr = cpu_to_be64(mr->ibmr.iova); + seg->len = cpu_to_be64(mr->ibmr.length); + seg->xlt_oct_size = cpu_to_be32(ndescs); +} + +static void set_linv_mkey_seg(struct mlx5_mkey_seg *seg) +{ + memset(seg, 0, sizeof(*seg)); + seg->status = MLX5_MKEY_STATUS_FREE; +} + +static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, + const struct ib_send_wr *wr) +{ + const struct mlx5_umr_wr *umrwr = umr_wr(wr); + + memset(seg, 0, sizeof(*seg)); + if (wr->send_flags & MLX5_IB_SEND_UMR_DISABLE_MR) + seg->status = MLX5_MKEY_STATUS_FREE; + + seg->flags = convert_access(umrwr->access_flags); + if (umrwr->pd) + seg->flags_pd = cpu_to_be32(to_mpd(umrwr->pd)->pdn); + if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION && + !umrwr->length) + seg->flags_pd |= cpu_to_be32(MLX5_MKEY_LEN64); + + seg->start_addr = cpu_to_be64(umrwr->virt_addr); + seg->len = cpu_to_be64(umrwr->length); + seg->log2_page_size = umrwr->page_shift; + seg->qpn_mkey7_0 = cpu_to_be32(0xffffff00 | + mlx5_mkey_variant(umrwr->mkey)); +} + +static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg, + struct mlx5_ib_mr *mr, + struct mlx5_ib_pd *pd) +{ + int bcount = mr->desc_size * (mr->ndescs + mr->meta_ndescs); + + dseg->addr = cpu_to_be64(mr->desc_map); + dseg->byte_count = cpu_to_be32(ALIGN(bcount, 64)); + dseg->lkey = cpu_to_be32(pd->ibpd.local_dma_lkey); +} + +static __be32 send_ieth(const struct ib_send_wr *wr) +{ + switch (wr->opcode) { + case IB_WR_SEND_WITH_IMM: + case IB_WR_RDMA_WRITE_WITH_IMM: + return wr->ex.imm_data; + + case IB_WR_SEND_WITH_INV: + return cpu_to_be32(wr->ex.invalidate_rkey); + + default: + return 0; + } +} + +static u8 calc_sig(void *wqe, int size) +{ + u8 *p = wqe; + u8 res = 0; + int i; + + for (i = 0; i < size; i++) + res ^= p[i]; + + return ~res; +} + +static u8 wq_sig(void *wqe) +{ + return calc_sig(wqe, (*((u8 *)wqe + 8) & 0x3f) << 4); +} + +static int set_data_inl_seg(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr, + void **wqe, int *wqe_sz, void **cur_edge) +{ + struct mlx5_wqe_inline_seg *seg; + size_t offset; + int inl = 0; + int i; + + seg = *wqe; + *wqe += sizeof(*seg); + offset = sizeof(*seg); + + for (i = 0; i < wr->num_sge; i++) { + size_t len = wr->sg_list[i].length; + void *addr = (void *)(unsigned long)(wr->sg_list[i].addr); + + inl += len; + + if (unlikely(inl > qp->max_inline_data)) + return -ENOMEM; + + while (likely(len)) { + size_t leftlen; + size_t copysz; + + handle_post_send_edge(&qp->sq, wqe, + *wqe_sz + (offset >> 4), + cur_edge); + + leftlen = *cur_edge - *wqe; + copysz = min_t(size_t, leftlen, len); + + memcpy(*wqe, addr, copysz); + len -= copysz; + addr += copysz; + *wqe += copysz; + offset += copysz; + } + } + + seg->byte_count = cpu_to_be32(inl | MLX5_INLINE_SEG); + + *wqe_sz += ALIGN(inl + sizeof(seg->byte_count), 16) / 16; + + return 0; +} + +static u16 prot_field_size(enum ib_signature_type type) +{ + switch (type) { + case IB_SIG_TYPE_T10_DIF: + return MLX5_DIF_SIZE; + default: + return 0; + } +} + +static u8 bs_selector(int block_size) +{ + switch (block_size) { + case 512: return 0x1; + case 520: return 0x2; + case 4096: return 0x3; + case 4160: return 0x4; + case 1073741824: return 0x5; + default: return 0; + } +} + +static void mlx5_fill_inl_bsf(struct ib_sig_domain *domain, + struct mlx5_bsf_inl *inl) +{ + /* Valid inline section and allow BSF refresh */ + inl->vld_refresh = cpu_to_be16(MLX5_BSF_INL_VALID | + MLX5_BSF_REFRESH_DIF); + inl->dif_apptag = cpu_to_be16(domain->sig.dif.app_tag); + inl->dif_reftag = cpu_to_be32(domain->sig.dif.ref_tag); + /* repeating block */ + inl->rp_inv_seed = MLX5_BSF_REPEAT_BLOCK; + inl->sig_type = domain->sig.dif.bg_type == IB_T10DIF_CRC ? + MLX5_DIF_CRC : MLX5_DIF_IPCS; + + if (domain->sig.dif.ref_remap) + inl->dif_inc_ref_guard_check |= MLX5_BSF_INC_REFTAG; + + if (domain->sig.dif.app_escape) { + if (domain->sig.dif.ref_escape) + inl->dif_inc_ref_guard_check |= MLX5_BSF_APPREF_ESCAPE; + else + inl->dif_inc_ref_guard_check |= MLX5_BSF_APPTAG_ESCAPE; + } + + inl->dif_app_bitmask_check = + cpu_to_be16(domain->sig.dif.apptag_check_mask); +} + +static int mlx5_set_bsf(struct ib_mr *sig_mr, + struct ib_sig_attrs *sig_attrs, + struct mlx5_bsf *bsf, u32 data_size) +{ + struct mlx5_core_sig_ctx *msig = to_mmr(sig_mr)->sig; + struct mlx5_bsf_basic *basic = &bsf->basic; + struct ib_sig_domain *mem = &sig_attrs->mem; + struct ib_sig_domain *wire = &sig_attrs->wire; + + memset(bsf, 0, sizeof(*bsf)); + + /* Basic + Extended + Inline */ + basic->bsf_size_sbs = 1 << 7; + /* Input domain check byte mask */ + basic->check_byte_mask = sig_attrs->check_mask; + basic->raw_data_size = cpu_to_be32(data_size); + + /* Memory domain */ + switch (sig_attrs->mem.sig_type) { + case IB_SIG_TYPE_NONE: + break; + case IB_SIG_TYPE_T10_DIF: + basic->mem.bs_selector = bs_selector(mem->sig.dif.pi_interval); + basic->m_bfs_psv = cpu_to_be32(msig->psv_memory.psv_idx); + mlx5_fill_inl_bsf(mem, &bsf->m_inl); + break; + default: + return -EINVAL; + } + + /* Wire domain */ + switch (sig_attrs->wire.sig_type) { + case IB_SIG_TYPE_NONE: + break; + case IB_SIG_TYPE_T10_DIF: + if (mem->sig.dif.pi_interval == wire->sig.dif.pi_interval && + mem->sig_type == wire->sig_type) { + /* Same block structure */ + basic->bsf_size_sbs |= 1 << 4; + if (mem->sig.dif.bg_type == wire->sig.dif.bg_type) + basic->wire.copy_byte_mask |= MLX5_CPY_GRD_MASK; + if (mem->sig.dif.app_tag == wire->sig.dif.app_tag) + basic->wire.copy_byte_mask |= MLX5_CPY_APP_MASK; + if (mem->sig.dif.ref_tag == wire->sig.dif.ref_tag) + basic->wire.copy_byte_mask |= MLX5_CPY_REF_MASK; + } else + basic->wire.bs_selector = + bs_selector(wire->sig.dif.pi_interval); + + basic->w_bfs_psv = cpu_to_be32(msig->psv_wire.psv_idx); + mlx5_fill_inl_bsf(wire, &bsf->w_inl); + break; + default: + return -EINVAL; + } + + return 0; +} + + +static int set_sig_data_segment(const struct ib_send_wr *send_wr, + struct ib_mr *sig_mr, + struct ib_sig_attrs *sig_attrs, + struct mlx5_ib_qp *qp, void **seg, int *size, + void **cur_edge) +{ + struct mlx5_bsf *bsf; + u32 data_len; + u32 data_key; + u64 data_va; + u32 prot_len = 0; + u32 prot_key = 0; + u64 prot_va = 0; + bool prot = false; + int ret; + int wqe_size; + struct mlx5_ib_mr *mr = to_mmr(sig_mr); + struct mlx5_ib_mr *pi_mr = mr->pi_mr; + + data_len = pi_mr->data_length; + data_key = pi_mr->ibmr.lkey; + data_va = pi_mr->data_iova; + if (pi_mr->meta_ndescs) { + prot_len = pi_mr->meta_length; + prot_key = pi_mr->ibmr.lkey; + prot_va = pi_mr->pi_iova; + prot = true; + } + + if (!prot || (data_key == prot_key && data_va == prot_va && + data_len == prot_len)) { + /** + * Source domain doesn't contain signature information + * or data and protection are interleaved in memory. + * So need construct: + * ------------------ + * | data_klm | + * ------------------ + * | BSF | + * ------------------ + **/ + struct mlx5_klm *data_klm = *seg; + + data_klm->bcount = cpu_to_be32(data_len); + data_klm->key = cpu_to_be32(data_key); + data_klm->va = cpu_to_be64(data_va); + wqe_size = ALIGN(sizeof(*data_klm), 64); + } else { + /** + * Source domain contains signature information + * So need construct a strided block format: + * --------------------------- + * | stride_block_ctrl | + * --------------------------- + * | data_klm | + * --------------------------- + * | prot_klm | + * --------------------------- + * | BSF | + * --------------------------- + **/ + struct mlx5_stride_block_ctrl_seg *sblock_ctrl; + struct mlx5_stride_block_entry *data_sentry; + struct mlx5_stride_block_entry *prot_sentry; + u16 block_size = sig_attrs->mem.sig.dif.pi_interval; + int prot_size; + + sblock_ctrl = *seg; + data_sentry = (void *)sblock_ctrl + sizeof(*sblock_ctrl); + prot_sentry = (void *)data_sentry + sizeof(*data_sentry); + + prot_size = prot_field_size(sig_attrs->mem.sig_type); + if (!prot_size) { + pr_err("Bad block size given: %u\n", block_size); + return -EINVAL; + } + sblock_ctrl->bcount_per_cycle = cpu_to_be32(block_size + + prot_size); + sblock_ctrl->op = cpu_to_be32(MLX5_STRIDE_BLOCK_OP); + sblock_ctrl->repeat_count = cpu_to_be32(data_len / block_size); + sblock_ctrl->num_entries = cpu_to_be16(2); + + data_sentry->bcount = cpu_to_be16(block_size); + data_sentry->key = cpu_to_be32(data_key); + data_sentry->va = cpu_to_be64(data_va); + data_sentry->stride = cpu_to_be16(block_size); + + prot_sentry->bcount = cpu_to_be16(prot_size); + prot_sentry->key = cpu_to_be32(prot_key); + prot_sentry->va = cpu_to_be64(prot_va); + prot_sentry->stride = cpu_to_be16(prot_size); + + wqe_size = ALIGN(sizeof(*sblock_ctrl) + sizeof(*data_sentry) + + sizeof(*prot_sentry), 64); + } + + *seg += wqe_size; + *size += wqe_size / 16; + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); + + bsf = *seg; + ret = mlx5_set_bsf(sig_mr, sig_attrs, bsf, data_len); + if (ret) + return -EINVAL; + + *seg += sizeof(*bsf); + *size += sizeof(*bsf) / 16; + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); + + return 0; +} + +static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg, + struct ib_mr *sig_mr, int access_flags, + u32 size, u32 length, u32 pdn) +{ + u32 sig_key = sig_mr->rkey; + u8 sigerr = to_mmr(sig_mr)->sig->sigerr_count & 1; + + memset(seg, 0, sizeof(*seg)); + + seg->flags = get_umr_flags(access_flags) | MLX5_MKC_ACCESS_MODE_KLMS; + seg->qpn_mkey7_0 = cpu_to_be32((sig_key & 0xff) | 0xffffff00); + seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 | + MLX5_MKEY_BSF_EN | pdn); + seg->len = cpu_to_be64(length); + seg->xlt_oct_size = cpu_to_be32(get_xlt_octo(size)); + seg->bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE); +} + +static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, + u32 size) +{ + memset(umr, 0, sizeof(*umr)); + + umr->flags = MLX5_FLAGS_INLINE | MLX5_FLAGS_CHECK_FREE; + umr->xlt_octowords = cpu_to_be16(get_xlt_octo(size)); + umr->bsf_octowords = cpu_to_be16(MLX5_MKEY_BSF_OCTO_SIZE); + umr->mkey_mask = sig_mkey_mask(); +} + +static int set_pi_umr_wr(const struct ib_send_wr *send_wr, + struct mlx5_ib_qp *qp, void **seg, int *size, + void **cur_edge) +{ + const struct ib_reg_wr *wr = reg_wr(send_wr); + struct mlx5_ib_mr *sig_mr = to_mmr(wr->mr); + struct mlx5_ib_mr *pi_mr = sig_mr->pi_mr; + struct ib_sig_attrs *sig_attrs = sig_mr->ibmr.sig_attrs; + u32 pdn = to_mpd(qp->ibqp.pd)->pdn; + u32 xlt_size; + int region_len, ret; + + if (unlikely(send_wr->num_sge != 0) || + unlikely(wr->access & IB_ACCESS_REMOTE_ATOMIC) || + unlikely(!sig_mr->sig) || unlikely(!qp->ibqp.integrity_en) || + unlikely(!sig_mr->sig->sig_status_checked)) + return -EINVAL; + + /* length of the protected region, data + protection */ + region_len = pi_mr->ibmr.length; + + /** + * KLM octoword size - if protection was provided + * then we use strided block format (3 octowords), + * else we use single KLM (1 octoword) + **/ + if (sig_attrs->mem.sig_type != IB_SIG_TYPE_NONE) + xlt_size = 0x30; + else + xlt_size = sizeof(struct mlx5_klm); + + set_sig_umr_segment(*seg, xlt_size); + *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); + *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); + + set_sig_mkey_segment(*seg, wr->mr, wr->access, xlt_size, region_len, + pdn); + *seg += sizeof(struct mlx5_mkey_seg); + *size += sizeof(struct mlx5_mkey_seg) / 16; + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); + + ret = set_sig_data_segment(send_wr, wr->mr, sig_attrs, qp, seg, size, + cur_edge); + if (ret) + return ret; + + sig_mr->sig->sig_status_checked = false; + return 0; +} + +static int set_psv_wr(struct ib_sig_domain *domain, + u32 psv_idx, void **seg, int *size) +{ + struct mlx5_seg_set_psv *psv_seg = *seg; + + memset(psv_seg, 0, sizeof(*psv_seg)); + psv_seg->psv_num = cpu_to_be32(psv_idx); + switch (domain->sig_type) { + case IB_SIG_TYPE_NONE: + break; + case IB_SIG_TYPE_T10_DIF: + psv_seg->transient_sig = cpu_to_be32(domain->sig.dif.bg << 16 | + domain->sig.dif.app_tag); + psv_seg->ref_tag = cpu_to_be32(domain->sig.dif.ref_tag); + break; + default: + pr_err("Bad signature type (%d) is given.\n", + domain->sig_type); + return -EINVAL; + } + + *seg += sizeof(*psv_seg); + *size += sizeof(*psv_seg) / 16; + + return 0; +} + +static int set_reg_wr(struct mlx5_ib_qp *qp, + const struct ib_reg_wr *wr, + void **seg, int *size, void **cur_edge, + bool check_not_free) +{ + struct mlx5_ib_mr *mr = to_mmr(wr->mr); + struct mlx5_ib_pd *pd = to_mpd(qp->ibqp.pd); + struct mlx5_ib_dev *dev = to_mdev(pd->ibpd.device); + int mr_list_size = (mr->ndescs + mr->meta_ndescs) * mr->desc_size; + bool umr_inline = mr_list_size <= MLX5_IB_SQ_UMR_INLINE_THRESHOLD; + bool atomic = wr->access & IB_ACCESS_REMOTE_ATOMIC; + u8 flags = 0; + + if (!mlx5_ib_can_use_umr(dev, atomic, wr->access)) { + mlx5_ib_warn(to_mdev(qp->ibqp.device), + "Fast update of %s for MR is disabled\n", + (MLX5_CAP_GEN(dev->mdev, + umr_modify_entity_size_disabled)) ? + "entity size" : + "atomic access"); + return -EINVAL; + } + + if (unlikely(wr->wr.send_flags & IB_SEND_INLINE)) { + mlx5_ib_warn(to_mdev(qp->ibqp.device), + "Invalid IB_SEND_INLINE send flag\n"); + return -EINVAL; + } + + if (check_not_free) + flags |= MLX5_UMR_CHECK_NOT_FREE; + if (umr_inline) + flags |= MLX5_UMR_INLINE; + + set_reg_umr_seg(*seg, mr, flags, atomic); + *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); + *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); + + set_reg_mkey_seg(*seg, mr, wr->key, wr->access); + *seg += sizeof(struct mlx5_mkey_seg); + *size += sizeof(struct mlx5_mkey_seg) / 16; + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); + + if (umr_inline) { + memcpy_send_wqe(&qp->sq, cur_edge, seg, size, mr->descs, + mr_list_size); + *size = ALIGN(*size, MLX5_SEND_WQE_BB >> 4); + } else { + set_reg_data_seg(*seg, mr, pd); + *seg += sizeof(struct mlx5_wqe_data_seg); + *size += (sizeof(struct mlx5_wqe_data_seg) / 16); + } + return 0; +} + +static void set_linv_wr(struct mlx5_ib_qp *qp, void **seg, int *size, + void **cur_edge) +{ + set_linv_umr_seg(*seg); + *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); + *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); + set_linv_mkey_seg(*seg); + *seg += sizeof(struct mlx5_mkey_seg); + *size += sizeof(struct mlx5_mkey_seg) / 16; + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); +} + +static void dump_wqe(struct mlx5_ib_qp *qp, u32 idx, int size_16) +{ + __be32 *p = NULL; + int i, j; + + pr_debug("dump WQE index %u:\n", idx); + for (i = 0, j = 0; i < size_16 * 4; i += 4, j += 4) { + if ((i & 0xf) == 0) { + p = mlx5_frag_buf_get_wqe(&qp->sq.fbc, idx); + pr_debug("WQBB at %p:\n", (void *)p); + j = 0; + idx = (idx + 1) & (qp->sq.wqe_cnt - 1); + } + pr_debug("%08x %08x %08x %08x\n", be32_to_cpu(p[j]), + be32_to_cpu(p[j + 1]), be32_to_cpu(p[j + 2]), + be32_to_cpu(p[j + 3])); + } +} + +static int __begin_wqe(struct mlx5_ib_qp *qp, void **seg, + struct mlx5_wqe_ctrl_seg **ctrl, + const struct ib_send_wr *wr, unsigned int *idx, + int *size, void **cur_edge, int nreq, + bool send_signaled, bool solicited) +{ + if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) + return -ENOMEM; + + *idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1); + *seg = mlx5_frag_buf_get_wqe(&qp->sq.fbc, *idx); + *ctrl = *seg; + *(uint32_t *)(*seg + 8) = 0; + (*ctrl)->imm = send_ieth(wr); + (*ctrl)->fm_ce_se = qp->sq_signal_bits | + (send_signaled ? MLX5_WQE_CTRL_CQ_UPDATE : 0) | + (solicited ? MLX5_WQE_CTRL_SOLICITED : 0); + + *seg += sizeof(**ctrl); + *size = sizeof(**ctrl) / 16; + *cur_edge = qp->sq.cur_edge; + + return 0; +} + +static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, + struct mlx5_wqe_ctrl_seg **ctrl, + const struct ib_send_wr *wr, unsigned int *idx, int *size, + void **cur_edge, int nreq) +{ + return __begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq, + wr->send_flags & IB_SEND_SIGNALED, + wr->send_flags & IB_SEND_SOLICITED); +} + +static void finish_wqe(struct mlx5_ib_qp *qp, + struct mlx5_wqe_ctrl_seg *ctrl, + void *seg, u8 size, void *cur_edge, + unsigned int idx, u64 wr_id, int nreq, u8 fence, + u32 mlx5_opcode) +{ + u8 opmod = 0; + + ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8) | + mlx5_opcode | ((u32)opmod << 24)); + ctrl->qpn_ds = cpu_to_be32(size | (qp->trans_qp.base.mqp.qpn << 8)); + ctrl->fm_ce_se |= fence; + if (unlikely(qp->flags_en & MLX5_QP_FLAG_SIGNATURE)) + ctrl->signature = wq_sig(ctrl); + + qp->sq.wrid[idx] = wr_id; + qp->sq.w_list[idx].opcode = mlx5_opcode; + qp->sq.wqe_head[idx] = qp->sq.head + nreq; + qp->sq.cur_post += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB); + qp->sq.w_list[idx].next = qp->sq.cur_post; + + /* We save the edge which was possibly updated during the WQE + * construction, into SQ's cache. + */ + seg = PTR_ALIGN(seg, MLX5_SEND_WQE_BB); + qp->sq.cur_edge = (unlikely(seg == cur_edge)) ? + get_sq_edge(&qp->sq, qp->sq.cur_post & + (qp->sq.wqe_cnt - 1)) : + cur_edge; +} + +static void handle_rdma_op(const struct ib_send_wr *wr, void **seg, int *size) +{ + set_raddr_seg(*seg, rdma_wr(wr)->remote_addr, rdma_wr(wr)->rkey); + *seg += sizeof(struct mlx5_wqe_raddr_seg); + *size += sizeof(struct mlx5_wqe_raddr_seg) / 16; +} + +static void handle_local_inv(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr, + struct mlx5_wqe_ctrl_seg **ctrl, void **seg, + int *size, void **cur_edge, unsigned int idx) +{ + qp->sq.wr_data[idx] = IB_WR_LOCAL_INV; + (*ctrl)->imm = cpu_to_be32(wr->ex.invalidate_rkey); + set_linv_wr(qp, seg, size, cur_edge); +} + +static int handle_reg_mr(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr, + struct mlx5_wqe_ctrl_seg **ctrl, void **seg, int *size, + void **cur_edge, unsigned int idx) +{ + qp->sq.wr_data[idx] = IB_WR_REG_MR; + (*ctrl)->imm = cpu_to_be32(reg_wr(wr)->key); + return set_reg_wr(qp, reg_wr(wr), seg, size, cur_edge, true); +} + +static int handle_psv(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, + const struct ib_send_wr *wr, + struct mlx5_wqe_ctrl_seg **ctrl, void **seg, int *size, + void **cur_edge, unsigned int *idx, int nreq, + struct ib_sig_domain *domain, u32 psv_index, + u8 next_fence) +{ + int err; + + /* + * SET_PSV WQEs are not signaled and solicited on error. + */ + err = __begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq, + false, true); + if (unlikely(err)) { + mlx5_ib_warn(dev, "\n"); + err = -ENOMEM; + goto out; + } + err = set_psv_wr(domain, psv_index, seg, size); + if (unlikely(err)) { + mlx5_ib_warn(dev, "\n"); + goto out; + } + finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id, nreq, + next_fence, MLX5_OPCODE_SET_PSV); + +out: + return err; +} + +static int handle_reg_mr_integrity(struct mlx5_ib_dev *dev, + struct mlx5_ib_qp *qp, + const struct ib_send_wr *wr, + struct mlx5_wqe_ctrl_seg **ctrl, void **seg, + int *size, void **cur_edge, + unsigned int *idx, int nreq, u8 fence, + u8 next_fence) +{ + struct mlx5_ib_mr *mr; + struct mlx5_ib_mr *pi_mr; + struct mlx5_ib_mr pa_pi_mr; + struct ib_sig_attrs *sig_attrs; + struct ib_reg_wr reg_pi_wr; + int err; + + qp->sq.wr_data[*idx] = IB_WR_REG_MR_INTEGRITY; + + mr = to_mmr(reg_wr(wr)->mr); + pi_mr = mr->pi_mr; + + if (pi_mr) { + memset(®_pi_wr, 0, + sizeof(struct ib_reg_wr)); + + reg_pi_wr.mr = &pi_mr->ibmr; + reg_pi_wr.access = reg_wr(wr)->access; + reg_pi_wr.key = pi_mr->ibmr.rkey; + + (*ctrl)->imm = cpu_to_be32(reg_pi_wr.key); + /* UMR for data + prot registration */ + err = set_reg_wr(qp, ®_pi_wr, seg, size, cur_edge, false); + if (unlikely(err)) + goto out; + + finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id, + nreq, fence, MLX5_OPCODE_UMR); + + err = begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq); + if (unlikely(err)) { + mlx5_ib_warn(dev, "\n"); + err = -ENOMEM; + goto out; + } + } else { + memset(&pa_pi_mr, 0, sizeof(struct mlx5_ib_mr)); + /* No UMR, use local_dma_lkey */ + pa_pi_mr.ibmr.lkey = mr->ibmr.pd->local_dma_lkey; + pa_pi_mr.ndescs = mr->ndescs; + pa_pi_mr.data_length = mr->data_length; + pa_pi_mr.data_iova = mr->data_iova; + if (mr->meta_ndescs) { + pa_pi_mr.meta_ndescs = mr->meta_ndescs; + pa_pi_mr.meta_length = mr->meta_length; + pa_pi_mr.pi_iova = mr->pi_iova; + } + + pa_pi_mr.ibmr.length = mr->ibmr.length; + mr->pi_mr = &pa_pi_mr; + } + (*ctrl)->imm = cpu_to_be32(mr->ibmr.rkey); + /* UMR for sig MR */ + err = set_pi_umr_wr(wr, qp, seg, size, cur_edge); + if (unlikely(err)) { + mlx5_ib_warn(dev, "\n"); + goto out; + } + finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id, nreq, + fence, MLX5_OPCODE_UMR); + + sig_attrs = mr->ibmr.sig_attrs; + err = handle_psv(dev, qp, wr, ctrl, seg, size, cur_edge, idx, nreq, + &sig_attrs->mem, mr->sig->psv_memory.psv_idx, + next_fence); + if (unlikely(err)) + goto out; + + err = handle_psv(dev, qp, wr, ctrl, seg, size, cur_edge, idx, nreq, + &sig_attrs->wire, mr->sig->psv_wire.psv_idx, + next_fence); + if (unlikely(err)) + goto out; + + qp->next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; + +out: + return err; +} + +static int handle_qpt_rc(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, + const struct ib_send_wr *wr, + struct mlx5_wqe_ctrl_seg **ctrl, void **seg, int *size, + void **cur_edge, unsigned int *idx, int nreq, u8 fence, + u8 next_fence, int *num_sge) +{ + int err = 0; + + switch (wr->opcode) { + case IB_WR_RDMA_READ: + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: + handle_rdma_op(wr, seg, size); + break; + + case IB_WR_ATOMIC_CMP_AND_SWP: + case IB_WR_ATOMIC_FETCH_AND_ADD: + case IB_WR_MASKED_ATOMIC_CMP_AND_SWP: + mlx5_ib_warn(dev, "Atomic operations are not supported yet\n"); + err = -EOPNOTSUPP; + goto out; + + case IB_WR_LOCAL_INV: + handle_local_inv(qp, wr, ctrl, seg, size, cur_edge, *idx); + *num_sge = 0; + break; + + case IB_WR_REG_MR: + err = handle_reg_mr(qp, wr, ctrl, seg, size, cur_edge, *idx); + if (unlikely(err)) + goto out; + *num_sge = 0; + break; + + case IB_WR_REG_MR_INTEGRITY: + err = handle_reg_mr_integrity(dev, qp, wr, ctrl, seg, size, + cur_edge, idx, nreq, fence, + next_fence); + if (unlikely(err)) + goto out; + *num_sge = 0; + break; + + default: + break; + } + +out: + return err; +} + +static void handle_qpt_uc(const struct ib_send_wr *wr, void **seg, int *size) +{ + switch (wr->opcode) { + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: + handle_rdma_op(wr, seg, size); + break; + default: + break; + } +} + +static void handle_qpt_hw_gsi(struct mlx5_ib_qp *qp, + const struct ib_send_wr *wr, void **seg, + int *size, void **cur_edge) +{ + set_datagram_seg(*seg, wr); + *seg += sizeof(struct mlx5_wqe_datagram_seg); + *size += sizeof(struct mlx5_wqe_datagram_seg) / 16; + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); +} + +static void handle_qpt_ud(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr, + void **seg, int *size, void **cur_edge) +{ + set_datagram_seg(*seg, wr); + *seg += sizeof(struct mlx5_wqe_datagram_seg); + *size += sizeof(struct mlx5_wqe_datagram_seg) / 16; + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); + + /* handle qp that supports ud offload */ + if (qp->flags & IB_QP_CREATE_IPOIB_UD_LSO) { + struct mlx5_wqe_eth_pad *pad; + + pad = *seg; + memset(pad, 0, sizeof(struct mlx5_wqe_eth_pad)); + *seg += sizeof(struct mlx5_wqe_eth_pad); + *size += sizeof(struct mlx5_wqe_eth_pad) / 16; + set_eth_seg(wr, qp, seg, size, cur_edge); + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); + } +} + +static int handle_qpt_reg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, + const struct ib_send_wr *wr, + struct mlx5_wqe_ctrl_seg **ctrl, void **seg, + int *size, void **cur_edge, unsigned int idx) +{ + int err = 0; + + if (unlikely(wr->opcode != MLX5_IB_WR_UMR)) { + err = -EINVAL; + mlx5_ib_warn(dev, "bad opcode %d\n", wr->opcode); + goto out; + } + + qp->sq.wr_data[idx] = MLX5_IB_WR_UMR; + (*ctrl)->imm = cpu_to_be32(umr_wr(wr)->mkey); + err = set_reg_umr_segment(dev, *seg, wr, + !!(MLX5_CAP_GEN(dev->mdev, atomic))); + if (unlikely(err)) + goto out; + *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); + *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); + set_reg_mkey_segment(*seg, wr); + *seg += sizeof(struct mlx5_mkey_seg); + *size += sizeof(struct mlx5_mkey_seg) / 16; + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); +out: + return err; +} + +int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr, bool drain) +{ + struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */ + struct mlx5_ib_dev *dev = to_mdev(ibqp->device); + struct mlx5_core_dev *mdev = dev->mdev; + struct mlx5_ib_qp *qp; + struct mlx5_wqe_xrc_seg *xrc; + struct mlx5_bf *bf; + void *cur_edge; + int uninitialized_var(size); + unsigned long flags; + unsigned int idx; + int err = 0; + int num_sge; + void *seg; + int nreq; + int i; + u8 next_fence = 0; + u8 fence; + + if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR && + !drain)) { + *bad_wr = wr; + return -EIO; + } + + if (unlikely(ibqp->qp_type == IB_QPT_GSI)) + return mlx5_ib_gsi_post_send(ibqp, wr, bad_wr); + + qp = to_mqp(ibqp); + bf = &qp->bf; + + spin_lock_irqsave(&qp->sq.lock, flags); + + for (nreq = 0; wr; nreq++, wr = wr->next) { + if (unlikely(wr->opcode >= ARRAY_SIZE(mlx5_ib_opcode))) { + mlx5_ib_warn(dev, "\n"); + err = -EINVAL; + *bad_wr = wr; + goto out; + } + + num_sge = wr->num_sge; + if (unlikely(num_sge > qp->sq.max_gs)) { + mlx5_ib_warn(dev, "\n"); + err = -EINVAL; + *bad_wr = wr; + goto out; + } + + err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, &cur_edge, + nreq); + if (err) { + mlx5_ib_warn(dev, "\n"); + err = -ENOMEM; + *bad_wr = wr; + goto out; + } + + if (wr->opcode == IB_WR_REG_MR || + wr->opcode == IB_WR_REG_MR_INTEGRITY) { + fence = dev->umr_fence; + next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; + } else { + if (wr->send_flags & IB_SEND_FENCE) { + if (qp->next_fence) + fence = MLX5_FENCE_MODE_SMALL_AND_FENCE; + else + fence = MLX5_FENCE_MODE_FENCE; + } else { + fence = qp->next_fence; + } + } + + switch (ibqp->qp_type) { + case IB_QPT_XRC_INI: + xrc = seg; + seg += sizeof(*xrc); + size += sizeof(*xrc) / 16; + fallthrough; + case IB_QPT_RC: + err = handle_qpt_rc(dev, qp, wr, &ctrl, &seg, &size, + &cur_edge, &idx, nreq, fence, + next_fence, &num_sge); + if (unlikely(err)) { + *bad_wr = wr; + goto out; + } else if (wr->opcode == IB_WR_REG_MR_INTEGRITY) { + goto skip_psv; + } + break; + + case IB_QPT_UC: + handle_qpt_uc(wr, &seg, &size); + break; + case IB_QPT_SMI: + if (unlikely(!mdev->port_caps[qp->port - 1].has_smi)) { + mlx5_ib_warn(dev, "Send SMP MADs is not allowed\n"); + err = -EPERM; + *bad_wr = wr; + goto out; + } + fallthrough; + case MLX5_IB_QPT_HW_GSI: + handle_qpt_hw_gsi(qp, wr, &seg, &size, &cur_edge); + break; + case IB_QPT_UD: + handle_qpt_ud(qp, wr, &seg, &size, &cur_edge); + break; + case MLX5_IB_QPT_REG_UMR: + err = handle_qpt_reg_umr(dev, qp, wr, &ctrl, &seg, + &size, &cur_edge, idx); + if (unlikely(err)) + goto out; + break; + + default: + break; + } + + if (wr->send_flags & IB_SEND_INLINE && num_sge) { + err = set_data_inl_seg(qp, wr, &seg, &size, &cur_edge); + if (unlikely(err)) { + mlx5_ib_warn(dev, "\n"); + *bad_wr = wr; + goto out; + } + } else { + for (i = 0; i < num_sge; i++) { + handle_post_send_edge(&qp->sq, &seg, size, + &cur_edge); + if (unlikely(!wr->sg_list[i].length)) + continue; + + set_data_ptr_seg( + (struct mlx5_wqe_data_seg *)seg, + wr->sg_list + i); + size += sizeof(struct mlx5_wqe_data_seg) / 16; + seg += sizeof(struct mlx5_wqe_data_seg); + } + } + + qp->next_fence = next_fence; + finish_wqe(qp, ctrl, seg, size, cur_edge, idx, wr->wr_id, nreq, + fence, mlx5_ib_opcode[wr->opcode]); +skip_psv: + if (0) + dump_wqe(qp, idx, size); + } + +out: + if (likely(nreq)) { + qp->sq.head += nreq; + + /* Make sure that descriptors are written before + * updating doorbell record and ringing the doorbell + */ + wmb(); + + qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post); + + /* Make sure doorbell record is visible to the HCA before + * we hit doorbell. + */ + wmb(); + + mlx5_write64((__be32 *)ctrl, bf->bfreg->map + bf->offset); + /* Make sure doorbells don't leak out of SQ spinlock + * and reach the HCA out of order. + */ + bf->offset ^= bf->buf_size; + } + + spin_unlock_irqrestore(&qp->sq.lock, flags); + + return err; +} + +static void set_sig_seg(struct mlx5_rwqe_sig *sig, int max_gs) +{ + sig->signature = calc_sig(sig, (max_gs + 1) << 2); +} + +int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr, bool drain) +{ + struct mlx5_ib_qp *qp = to_mqp(ibqp); + struct mlx5_wqe_data_seg *scat; + struct mlx5_rwqe_sig *sig; + struct mlx5_ib_dev *dev = to_mdev(ibqp->device); + struct mlx5_core_dev *mdev = dev->mdev; + unsigned long flags; + int err = 0; + int nreq; + int ind; + int i; + + if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR && + !drain)) { + *bad_wr = wr; + return -EIO; + } + + if (unlikely(ibqp->qp_type == IB_QPT_GSI)) + return mlx5_ib_gsi_post_recv(ibqp, wr, bad_wr); + + spin_lock_irqsave(&qp->rq.lock, flags); + + ind = qp->rq.head & (qp->rq.wqe_cnt - 1); + + for (nreq = 0; wr; nreq++, wr = wr->next) { + if (mlx5_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) { + err = -ENOMEM; + *bad_wr = wr; + goto out; + } + + if (unlikely(wr->num_sge > qp->rq.max_gs)) { + err = -EINVAL; + *bad_wr = wr; + goto out; + } + + scat = mlx5_frag_buf_get_wqe(&qp->rq.fbc, ind); + if (qp->flags_en & MLX5_QP_FLAG_SIGNATURE) + scat++; + + for (i = 0; i < wr->num_sge; i++) + set_data_ptr_seg(scat + i, wr->sg_list + i); + + if (i < qp->rq.max_gs) { + scat[i].byte_count = 0; + scat[i].lkey = cpu_to_be32(MLX5_INVALID_LKEY); + scat[i].addr = 0; + } + + if (qp->flags_en & MLX5_QP_FLAG_SIGNATURE) { + sig = (struct mlx5_rwqe_sig *)scat; + set_sig_seg(sig, qp->rq.max_gs); + } + + qp->rq.wrid[ind] = wr->wr_id; + + ind = (ind + 1) & (qp->rq.wqe_cnt - 1); + } + +out: + if (likely(nreq)) { + qp->rq.head += nreq; + + /* Make sure that descriptors are written before + * doorbell record. + */ + wmb(); + + *qp->db.db = cpu_to_be32(qp->rq.head & 0xffff); + } + + spin_unlock_irqrestore(&qp->rq.lock, flags); + + return err; +} diff --git a/drivers/infiniband/hw/mlx5/wr.h b/drivers/infiniband/hw/mlx5/wr.h new file mode 100644 index 000000000000..4f0057516402 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/wr.h @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* + * Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. + */ + +#ifndef _MLX5_IB_WR_H +#define _MLX5_IB_WR_H + +#include "mlx5_ib.h" + +enum { + MLX5_IB_SQ_UMR_INLINE_THRESHOLD = 64, +}; + +struct mlx5_wqe_eth_pad { + u8 rsvd0[16]; +}; + + +/* get_sq_edge - Get the next nearby edge. + * + * An 'edge' is defined as the first following address after the end + * of the fragment or the SQ. Accordingly, during the WQE construction + * which repetitively increases the pointer to write the next data, it + * simply should check if it gets to an edge. + * + * @sq - SQ buffer. + * @idx - Stride index in the SQ buffer. + * + * Return: + * The new edge. + */ +static inline void *get_sq_edge(struct mlx5_ib_wq *sq, u32 idx) +{ + void *fragment_end; + + fragment_end = mlx5_frag_buf_get_wqe + (&sq->fbc, + mlx5_frag_buf_get_idx_last_contig_stride(&sq->fbc, idx)); + + return fragment_end + MLX5_SEND_WQE_BB; +} + +int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr, bool drain); +int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr, bool drain); + +static inline int mlx5_ib_post_send_nodrain(struct ib_qp *ibqp, + const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) +{ + return mlx5_ib_post_send(ibqp, wr, bad_wr, false); +} + +static inline int mlx5_ib_post_send_drain(struct ib_qp *ibqp, + const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) +{ + return mlx5_ib_post_send(ibqp, wr, bad_wr, true); +} + +static inline int mlx5_ib_post_recv_nodrain(struct ib_qp *ibqp, + const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) +{ + return mlx5_ib_post_recv(ibqp, wr, bad_wr, false); +} + +static inline int mlx5_ib_post_recv_drain(struct ib_qp *ibqp, + const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) +{ + return mlx5_ib_post_recv(ibqp, wr, bad_wr, true); +} +#endif /* _MLX5_IB_WR_H */ diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 69a3e4f62fb1..bc3e3d741ca3 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -388,14 +388,15 @@ static void mthca_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) mthca_pd_free(to_mdev(pd->device), to_mpd(pd)); } -static int mthca_ah_create(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr, - u32 flags, struct ib_udata *udata) +static int mthca_ah_create(struct ib_ah *ibah, + struct rdma_ah_init_attr *init_attr, + struct ib_udata *udata) { struct mthca_ah *ah = to_mah(ibah); - return mthca_create_ah(to_mdev(ibah->device), to_mpd(ibah->pd), ah_attr, - ah); + return mthca_create_ah(to_mdev(ibah->device), to_mpd(ibah->pd), + init_attr->ah_attr, ah); } static void mthca_ah_destroy(struct ib_ah *ah, u32 flags) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index 2b7f00ac41b0..6eea02b18968 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -155,7 +155,7 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah, return status; } -int ocrdma_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr, u32 flags, +int ocrdma_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata) { u32 *ahid_addr; @@ -165,6 +165,7 @@ int ocrdma_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr, u32 flags, u16 vlan_tag = 0xffff; const struct ib_gid_attr *sgid_attr; struct ocrdma_pd *pd = get_ocrdma_pd(ibah->pd); + struct rdma_ah_attr *attr = init_attr->ah_attr; struct ocrdma_dev *dev = get_ocrdma_dev(ibah->device); if ((attr->type != RDMA_AH_ATTR_TYPE_ROCE) || diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h index 9780afcde780..8b73b3489f3a 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h @@ -51,7 +51,7 @@ enum { OCRDMA_AH_L3_TYPE_SHIFT = 0x1D /* 29 bits */ }; -int ocrdma_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, u32 flags, +int ocrdma_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata); void ocrdma_destroy_ah(struct ib_ah *ah, u32 flags); int ocrdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index a5bd3adaf90a..d6b94a713573 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -2750,12 +2750,12 @@ int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) return 0; } -int qedr_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr, u32 flags, +int qedr_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata) { struct qedr_ah *ah = get_qedr_ah(ibah); - rdma_copy_ah_attr(&ah->attr, attr); + rdma_copy_ah_attr(&ah->attr, init_attr->ah_attr); return 0; } diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h index 18027844eb87..5e02387e068d 100644 --- a/drivers/infiniband/hw/qedr/verbs.h +++ b/drivers/infiniband/hw/qedr/verbs.h @@ -70,7 +70,7 @@ int qedr_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr); void qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata); int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_recv_wr); -int qedr_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr, u32 flags, +int qedr_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata); void qedr_destroy_ah(struct ib_ah *ibah, u32 flags); diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index 91d64dd71a8a..8bcbc884e5b6 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -2375,7 +2375,6 @@ static int qib_7322_bringup_serdes(struct qib_pportdata *ppd) struct qib_devdata *dd = ppd->dd; u64 val, guid, ibc; unsigned long flags; - int ret = 0; /* * SerDes model not in Pd, but still need to @@ -2510,7 +2509,7 @@ static int qib_7322_bringup_serdes(struct qib_pportdata *ppd) val | ERR_MASK_N(IBStatusChanged)); /* Always zero until we start messing with SerDes for real */ - return ret; + return 0; } /** @@ -6875,7 +6874,7 @@ static int init_sdma_7322_regs(struct qib_pportdata *ppd) struct qib_devdata *dd = ppd->dd; unsigned lastbuf, erstbuf; u64 senddmabufmask[3] = { 0 }; - int n, ret = 0; + int n; qib_write_kreg_port(ppd, krp_senddmabase, ppd->sdma_descq_phys); qib_sdma_7322_setlengen(ppd); @@ -6904,7 +6903,7 @@ static int init_sdma_7322_regs(struct qib_pportdata *ppd) qib_write_kreg_port(ppd, krp_senddmabufmask0, senddmabufmask[0]); qib_write_kreg_port(ppd, krp_senddmabufmask1, senddmabufmask[1]); qib_write_kreg_port(ppd, krp_senddmabufmask2, senddmabufmask[2]); - return ret; + return 0; } /* sdma_lock must be held */ diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c index faf7ecd7b3fa..ccbded2d26ce 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c @@ -509,9 +509,10 @@ void pvrdma_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) * * @return: 0 on success, otherwise errno. */ -int pvrdma_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr, - u32 flags, struct ib_udata *udata) +int pvrdma_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, + struct ib_udata *udata) { + struct rdma_ah_attr *ah_attr = init_attr->ah_attr; struct pvrdma_dev *dev = to_vdev(ibah->device); struct pvrdma_ah *ah = to_vah(ibah); const struct ib_global_route *grh; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h index e4a48f5c0c85..267702226f10 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h @@ -414,7 +414,7 @@ int pvrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, void pvrdma_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); int pvrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int pvrdma_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags); -int pvrdma_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, u32 flags, +int pvrdma_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata); void pvrdma_destroy_ah(struct ib_ah *ah, u32 flags); |