diff options
Diffstat (limited to 'drivers/infiniband/hw/bnxt_re/qplib_fp.c')
| -rw-r--r-- | drivers/infiniband/hw/bnxt_re/qplib_fp.c | 2750 |
1 files changed, 1883 insertions, 867 deletions
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index 9af1514e5944..c88f049136fc 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -36,12 +36,17 @@ * Description: Fast Path Operators */ +#define dev_fmt(fmt) "QPLIB: " fmt + #include <linux/interrupt.h> #include <linux/spinlock.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/pci.h> +#include <linux/delay.h> #include <linux/prefetch.h> +#include <linux/if_ether.h> +#include <rdma/ib_mad.h> #include "roce_hsi.h" @@ -49,8 +54,126 @@ #include "qplib_rcfw.h" #include "qplib_sp.h" #include "qplib_fp.h" +#include <rdma/ib_addr.h> +#include "bnxt_ulp.h" +#include "bnxt_re.h" +#include "ib_verbs.h" + +static void __clean_cq(struct bnxt_qplib_cq *cq, u64 qp); + +static void bnxt_qplib_cancel_phantom_processing(struct bnxt_qplib_qp *qp) +{ + qp->sq.condition = false; + qp->sq.send_phantom = false; + qp->sq.single = false; +} + +/* Flush list */ +static void __bnxt_qplib_add_flush_qp(struct bnxt_qplib_qp *qp) +{ + struct bnxt_qplib_cq *scq, *rcq; + + scq = qp->scq; + rcq = qp->rcq; + + if (!qp->sq.flushed) { + dev_dbg(&scq->hwq.pdev->dev, + "FP: Adding to SQ Flush list = %p\n", qp); + bnxt_qplib_cancel_phantom_processing(qp); + list_add_tail(&qp->sq_flush, &scq->sqf_head); + qp->sq.flushed = true; + } + if (!qp->srq) { + if (!qp->rq.flushed) { + dev_dbg(&rcq->hwq.pdev->dev, + "FP: Adding to RQ Flush list = %p\n", qp); + list_add_tail(&qp->rq_flush, &rcq->rqf_head); + qp->rq.flushed = true; + } + } +} + +static void bnxt_qplib_acquire_cq_flush_locks(struct bnxt_qplib_qp *qp, + unsigned long *flags) + __acquires(&qp->scq->flush_lock) __acquires(&qp->rcq->flush_lock) +{ + spin_lock_irqsave(&qp->scq->flush_lock, *flags); + if (qp->scq == qp->rcq) + __acquire(&qp->rcq->flush_lock); + else + spin_lock(&qp->rcq->flush_lock); +} + +static void bnxt_qplib_release_cq_flush_locks(struct bnxt_qplib_qp *qp, + unsigned long *flags) + __releases(&qp->scq->flush_lock) __releases(&qp->rcq->flush_lock) +{ + if (qp->scq == qp->rcq) + __release(&qp->rcq->flush_lock); + else + spin_unlock(&qp->rcq->flush_lock); + spin_unlock_irqrestore(&qp->scq->flush_lock, *flags); +} + +void bnxt_qplib_add_flush_qp(struct bnxt_qplib_qp *qp) +{ + unsigned long flags; + + bnxt_qplib_acquire_cq_flush_locks(qp, &flags); + __bnxt_qplib_add_flush_qp(qp); + bnxt_qplib_release_cq_flush_locks(qp, &flags); +} -static void bnxt_qplib_arm_cq_enable(struct bnxt_qplib_cq *cq); +static void __bnxt_qplib_del_flush_qp(struct bnxt_qplib_qp *qp) +{ + if (qp->sq.flushed) { + qp->sq.flushed = false; + list_del(&qp->sq_flush); + } + if (!qp->srq) { + if (qp->rq.flushed) { + qp->rq.flushed = false; + list_del(&qp->rq_flush); + } + } +} + +void bnxt_qplib_clean_qp(struct bnxt_qplib_qp *qp) +{ + unsigned long flags; + + bnxt_qplib_acquire_cq_flush_locks(qp, &flags); + __clean_cq(qp->scq, (u64)(unsigned long)qp); + qp->sq.hwq.prod = 0; + qp->sq.hwq.cons = 0; + __clean_cq(qp->rcq, (u64)(unsigned long)qp); + qp->rq.hwq.prod = 0; + qp->rq.hwq.cons = 0; + + __bnxt_qplib_del_flush_qp(qp); + bnxt_qplib_release_cq_flush_locks(qp, &flags); +} + +static void bnxt_qpn_cqn_sched_task(struct work_struct *work) +{ + struct bnxt_qplib_nq_work *nq_work = + container_of(work, struct bnxt_qplib_nq_work, work); + + struct bnxt_qplib_cq *cq = nq_work->cq; + struct bnxt_qplib_nq *nq = nq_work->nq; + + if (cq && nq) { + spin_lock_bh(&cq->compl_lock); + if (atomic_read(&cq->arm_state) && nq->cqn_handler) { + dev_dbg(&nq->pdev->dev, + "%s:Trigger cq = %p event nq = %p\n", + __func__, cq, nq); + nq->cqn_handler(nq, cq); + } + spin_unlock_bh(&cq->compl_lock); + } + kfree(nq_work); +} static void bnxt_qplib_free_qp_hdr_buf(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) @@ -60,11 +183,11 @@ static void bnxt_qplib_free_qp_hdr_buf(struct bnxt_qplib_res *res, if (qp->rq_hdr_buf) dma_free_coherent(&res->pdev->dev, - rq->hwq.max_elements * qp->rq_hdr_buf_size, + rq->max_wqe * qp->rq_hdr_buf_size, qp->rq_hdr_buf, qp->rq_hdr_buf_map); if (qp->sq_hdr_buf) dma_free_coherent(&res->pdev->dev, - sq->hwq.max_elements * qp->sq_hdr_buf_size, + sq->max_wqe * qp->sq_hdr_buf_size, qp->sq_hdr_buf, qp->sq_hdr_buf_map); qp->rq_hdr_buf = NULL; qp->sq_hdr_buf = NULL; @@ -78,32 +201,31 @@ static int bnxt_qplib_alloc_qp_hdr_buf(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) { struct bnxt_qplib_q *rq = &qp->rq; - struct bnxt_qplib_q *sq = &qp->rq; + struct bnxt_qplib_q *sq = &qp->sq; int rc = 0; - if (qp->sq_hdr_buf_size && sq->hwq.max_elements) { + if (qp->sq_hdr_buf_size && sq->max_wqe) { qp->sq_hdr_buf = dma_alloc_coherent(&res->pdev->dev, - sq->hwq.max_elements * - qp->sq_hdr_buf_size, + sq->max_wqe * qp->sq_hdr_buf_size, &qp->sq_hdr_buf_map, GFP_KERNEL); if (!qp->sq_hdr_buf) { rc = -ENOMEM; dev_err(&res->pdev->dev, - "QPLIB: Failed to create sq_hdr_buf"); + "Failed to create sq_hdr_buf\n"); goto fail; } } - if (qp->rq_hdr_buf_size && rq->hwq.max_elements) { + if (qp->rq_hdr_buf_size && rq->max_wqe) { qp->rq_hdr_buf = dma_alloc_coherent(&res->pdev->dev, - rq->hwq.max_elements * + rq->max_wqe * qp->rq_hdr_buf_size, &qp->rq_hdr_buf_map, GFP_KERNEL); if (!qp->rq_hdr_buf) { rc = -ENOMEM; dev_err(&res->pdev->dev, - "QPLIB: Failed to create rq_hdr_buf"); + "Failed to create rq_hdr_buf\n"); goto fail; } } @@ -114,26 +236,28 @@ fail: return rc; } -static void bnxt_qplib_service_nq(unsigned long data) +static void clean_nq(struct bnxt_qplib_nq *nq, struct bnxt_qplib_cq *cq) { - struct bnxt_qplib_nq *nq = (struct bnxt_qplib_nq *)data; struct bnxt_qplib_hwq *hwq = &nq->hwq; struct nq_base *nqe, **nq_ptr; - int num_cqne_processed = 0; - u32 sw_cons, raw_cons; - u16 type; int budget = nq->budget; - u64 q_handle; + uintptr_t q_handle; + u16 type; + spin_lock_bh(&hwq->lock); /* Service the NQ until empty */ - raw_cons = hwq->cons; while (budget--) { - sw_cons = HWQ_CMP(raw_cons, hwq); nq_ptr = (struct nq_base **)hwq->pbl_ptr; - nqe = &nq_ptr[NQE_PG(sw_cons)][NQE_IDX(sw_cons)]; - if (!NQE_CMP_VALID(nqe, raw_cons, hwq->max_elements)) + nqe = &nq_ptr[NQE_PG(hwq->cons)][NQE_IDX(hwq->cons)]; + if (!NQE_CMP_VALID(nqe, nq->nq_db.dbinfo.flags)) break; + /* + * The valid test of the entry must be done first before + * reading any further. + */ + dma_rmb(); + type = le16_to_cpu(nqe->info10_type) & NQ_BASE_TYPE_MASK; switch (type) { case NQ_BASE_TYPE_CQ_NOTIFICATION: @@ -143,112 +267,312 @@ static void bnxt_qplib_service_nq(unsigned long data) q_handle = le32_to_cpu(nqcne->cq_handle_low); q_handle |= (u64)le32_to_cpu(nqcne->cq_handle_high) << 32; - bnxt_qplib_arm_cq_enable((struct bnxt_qplib_cq *) - ((unsigned long)q_handle)); - if (!nq->cqn_handler(nq, (struct bnxt_qplib_cq *) - ((unsigned long)q_handle))) - num_cqne_processed++; - else + if ((unsigned long)cq == q_handle) { + nqcne->cq_handle_low = 0; + nqcne->cq_handle_high = 0; + cq->cnq_events++; + } + break; + } + default: + break; + } + bnxt_qplib_hwq_incr_cons(hwq->max_elements, &hwq->cons, + 1, &nq->nq_db.dbinfo.flags); + } + spin_unlock_bh(&hwq->lock); +} + +/* Wait for receiving all NQEs for this CQ and clean the NQEs associated with + * this CQ. + */ +static void __wait_for_all_nqes(struct bnxt_qplib_cq *cq, u16 cnq_events) +{ + u32 retry_cnt = 100; + + while (retry_cnt--) { + if (cnq_events == cq->cnq_events) + return; + usleep_range(50, 100); + clean_nq(cq->nq, cq); + } +} + +static void bnxt_qplib_service_nq(struct tasklet_struct *t) +{ + struct bnxt_qplib_nq *nq = from_tasklet(nq, t, nq_tasklet); + struct bnxt_qplib_hwq *hwq = &nq->hwq; + struct bnxt_qplib_cq *cq; + int budget = nq->budget; + struct nq_base *nqe; + uintptr_t q_handle; + u32 hw_polled = 0; + u16 type; + + spin_lock_bh(&hwq->lock); + /* Service the NQ until empty */ + while (budget--) { + nqe = bnxt_qplib_get_qe(hwq, hwq->cons, NULL); + if (!NQE_CMP_VALID(nqe, nq->nq_db.dbinfo.flags)) + break; + + /* + * The valid test of the entry must be done first before + * reading any further. + */ + dma_rmb(); + + type = le16_to_cpu(nqe->info10_type) & NQ_BASE_TYPE_MASK; + switch (type) { + case NQ_BASE_TYPE_CQ_NOTIFICATION: + { + struct nq_cn *nqcne = (struct nq_cn *)nqe; + struct bnxt_re_cq *cq_p; + + q_handle = le32_to_cpu(nqcne->cq_handle_low); + q_handle |= (u64)le32_to_cpu(nqcne->cq_handle_high) + << 32; + cq = (struct bnxt_qplib_cq *)(unsigned long)q_handle; + if (!cq) + break; + cq->toggle = (le16_to_cpu(nqe->info10_type) & + NQ_CN_TOGGLE_MASK) >> NQ_CN_TOGGLE_SFT; + cq->dbinfo.toggle = cq->toggle; + cq_p = container_of(cq, struct bnxt_re_cq, qplib_cq); + if (cq_p->uctx_cq_page) + *((u32 *)cq_p->uctx_cq_page) = cq->toggle; + + bnxt_qplib_armen_db(&cq->dbinfo, + DBC_DBC_TYPE_CQ_ARMENA); + spin_lock_bh(&cq->compl_lock); + atomic_set(&cq->arm_state, 0); + if (nq->cqn_handler(nq, (cq))) + dev_warn(&nq->pdev->dev, + "cqn - type 0x%x not handled\n", type); + cq->cnq_events++; + spin_unlock_bh(&cq->compl_lock); + break; + } + case NQ_BASE_TYPE_SRQ_EVENT: + { + struct bnxt_qplib_srq *srq; + struct bnxt_re_srq *srq_p; + struct nq_srq_event *nqsrqe = + (struct nq_srq_event *)nqe; + + q_handle = le32_to_cpu(nqsrqe->srq_handle_low); + q_handle |= (u64)le32_to_cpu(nqsrqe->srq_handle_high) + << 32; + srq = (struct bnxt_qplib_srq *)q_handle; + srq->toggle = (le16_to_cpu(nqe->info10_type) & NQ_CN_TOGGLE_MASK) + >> NQ_CN_TOGGLE_SFT; + srq->dbinfo.toggle = srq->toggle; + srq_p = container_of(srq, struct bnxt_re_srq, qplib_srq); + if (srq_p->uctx_srq_page) + *((u32 *)srq_p->uctx_srq_page) = srq->toggle; + bnxt_qplib_armen_db(&srq->dbinfo, + DBC_DBC_TYPE_SRQ_ARMENA); + if (nq->srqn_handler(nq, + (struct bnxt_qplib_srq *)q_handle, + nqsrqe->event)) dev_warn(&nq->pdev->dev, - "QPLIB: cqn - type 0x%x not handled", - type); + "SRQ event 0x%x not handled\n", + nqsrqe->event); break; } case NQ_BASE_TYPE_DBQ_EVENT: break; default: dev_warn(&nq->pdev->dev, - "QPLIB: nqe with type = 0x%x not handled", - type); + "nqe with type = 0x%x not handled\n", type); break; } - raw_cons++; - } - if (hwq->cons != raw_cons) { - hwq->cons = raw_cons; - NQ_DB_REARM(nq->bar_reg_iomem, hwq->cons, hwq->max_elements); + hw_polled++; + bnxt_qplib_hwq_incr_cons(hwq->max_elements, &hwq->cons, + 1, &nq->nq_db.dbinfo.flags); } + if (hw_polled) + bnxt_qplib_ring_nq_db(&nq->nq_db.dbinfo, nq->res->cctx, true); + spin_unlock_bh(&hwq->lock); +} + +/* bnxt_re_synchronize_nq - self polling notification queue. + * @nq - notification queue pointer + * + * This function will start polling entries of a given notification queue + * for all pending entries. + * This function is useful to synchronize notification entries while resources + * are going away. + */ + +void bnxt_re_synchronize_nq(struct bnxt_qplib_nq *nq) +{ + int budget = nq->budget; + + nq->budget = nq->hwq.max_elements; + bnxt_qplib_service_nq(&nq->nq_tasklet); + nq->budget = budget; } static irqreturn_t bnxt_qplib_nq_irq(int irq, void *dev_instance) { struct bnxt_qplib_nq *nq = dev_instance; struct bnxt_qplib_hwq *hwq = &nq->hwq; - struct nq_base **nq_ptr; u32 sw_cons; /* Prefetch the NQ element */ sw_cons = HWQ_CMP(hwq->cons, hwq); - nq_ptr = (struct nq_base **)nq->hwq.pbl_ptr; - prefetch(&nq_ptr[NQE_PG(sw_cons)][NQE_IDX(sw_cons)]); + prefetch(bnxt_qplib_get_qe(hwq, sw_cons, NULL)); /* Fan out to CPU affinitized kthreads? */ - tasklet_schedule(&nq->worker); + tasklet_schedule(&nq->nq_tasklet); return IRQ_HANDLED; } +void bnxt_qplib_nq_stop_irq(struct bnxt_qplib_nq *nq, bool kill) +{ + if (!nq->requested) + return; + + nq->requested = false; + /* Mask h/w interrupt */ + bnxt_qplib_ring_nq_db(&nq->nq_db.dbinfo, nq->res->cctx, false); + /* Sync with last running IRQ handler */ + synchronize_irq(nq->msix_vec); + irq_set_affinity_hint(nq->msix_vec, NULL); + free_irq(nq->msix_vec, nq); + kfree(nq->name); + nq->name = NULL; + + if (kill) + tasklet_kill(&nq->nq_tasklet); + tasklet_disable(&nq->nq_tasklet); +} + void bnxt_qplib_disable_nq(struct bnxt_qplib_nq *nq) { + if (nq->cqn_wq) { + destroy_workqueue(nq->cqn_wq); + nq->cqn_wq = NULL; + } + /* Make sure the HW is stopped! */ - synchronize_irq(nq->vector); - tasklet_disable(&nq->worker); - tasklet_kill(&nq->worker); + bnxt_qplib_nq_stop_irq(nq, true); - if (nq->requested) { - free_irq(nq->vector, nq); - nq->requested = false; + if (nq->nq_db.reg.bar_reg) { + iounmap(nq->nq_db.reg.bar_reg); + nq->nq_db.reg.bar_reg = NULL; } - if (nq->bar_reg_iomem) - iounmap(nq->bar_reg_iomem); - nq->bar_reg_iomem = NULL; nq->cqn_handler = NULL; nq->srqn_handler = NULL; - nq->vector = 0; + nq->msix_vec = 0; +} + +int bnxt_qplib_nq_start_irq(struct bnxt_qplib_nq *nq, int nq_indx, + int msix_vector, bool need_init) +{ + struct bnxt_qplib_res *res = nq->res; + int rc; + + if (nq->requested) + return -EFAULT; + + nq->msix_vec = msix_vector; + if (need_init) + tasklet_setup(&nq->nq_tasklet, bnxt_qplib_service_nq); + else + tasklet_enable(&nq->nq_tasklet); + + nq->name = kasprintf(GFP_KERNEL, "bnxt_re-nq-%d@pci:%s", + nq_indx, pci_name(res->pdev)); + if (!nq->name) + return -ENOMEM; + rc = request_irq(nq->msix_vec, bnxt_qplib_nq_irq, 0, nq->name, nq); + if (rc) { + kfree(nq->name); + nq->name = NULL; + tasklet_disable(&nq->nq_tasklet); + return rc; + } + + cpumask_clear(&nq->mask); + cpumask_set_cpu(nq_indx, &nq->mask); + rc = irq_set_affinity_hint(nq->msix_vec, &nq->mask); + if (rc) { + dev_warn(&nq->pdev->dev, + "set affinity failed; vector: %d nq_idx: %d\n", + nq->msix_vec, nq_indx); + } + nq->requested = true; + bnxt_qplib_ring_nq_db(&nq->nq_db.dbinfo, res->cctx, true); + + return rc; +} + +static int bnxt_qplib_map_nq_db(struct bnxt_qplib_nq *nq, u32 reg_offt) +{ + resource_size_t reg_base; + struct bnxt_qplib_nq_db *nq_db; + struct pci_dev *pdev; + + pdev = nq->pdev; + nq_db = &nq->nq_db; + + nq_db->dbinfo.flags = 0; + nq_db->reg.bar_id = NQ_CONS_PCI_BAR_REGION; + nq_db->reg.bar_base = pci_resource_start(pdev, nq_db->reg.bar_id); + if (!nq_db->reg.bar_base) { + dev_err(&pdev->dev, "QPLIB: NQ BAR region %d resc start is 0!", + nq_db->reg.bar_id); + return -ENOMEM; + } + + reg_base = nq_db->reg.bar_base + reg_offt; + /* Unconditionally map 8 bytes to support 57500 series */ + nq_db->reg.len = 8; + nq_db->reg.bar_reg = ioremap(reg_base, nq_db->reg.len); + if (!nq_db->reg.bar_reg) { + dev_err(&pdev->dev, "QPLIB: NQ BAR region %d mapping failed", + nq_db->reg.bar_id); + return -ENOMEM; + } + + nq_db->dbinfo.db = nq_db->reg.bar_reg; + nq_db->dbinfo.hwq = &nq->hwq; + nq_db->dbinfo.xid = nq->ring_id; + + return 0; } int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq, - int msix_vector, int bar_reg_offset, - int (*cqn_handler)(struct bnxt_qplib_nq *nq, - struct bnxt_qplib_cq *), - int (*srqn_handler)(struct bnxt_qplib_nq *nq, - void *, u8 event)) + int nq_idx, int msix_vector, int bar_reg_offset, + cqn_handler_t cqn_handler, + srqn_handler_t srqn_handler) { - resource_size_t nq_base; int rc; nq->pdev = pdev; - nq->vector = msix_vector; - nq->cqn_handler = cqn_handler; - nq->srqn_handler = srqn_handler; + nq->load = 0; - tasklet_init(&nq->worker, bnxt_qplib_service_nq, (unsigned long)nq); + /* Have a task to schedule CQ notifiers in post send case */ + nq->cqn_wq = create_singlethread_workqueue("bnxt_qplib_nq"); + if (!nq->cqn_wq) + return -ENOMEM; - nq->requested = false; - rc = request_irq(nq->vector, bnxt_qplib_nq_irq, 0, "bnxt_qplib_nq", nq); + rc = bnxt_qplib_map_nq_db(nq, bar_reg_offset); + if (rc) + goto fail; + + rc = bnxt_qplib_nq_start_irq(nq, nq_idx, msix_vector, true); if (rc) { dev_err(&nq->pdev->dev, - "Failed to request IRQ for NQ: %#x", rc); - bnxt_qplib_disable_nq(nq); - goto fail; - } - nq->requested = true; - nq->bar_reg = NQ_CONS_PCI_BAR_REGION; - nq->bar_reg_off = bar_reg_offset; - nq_base = pci_resource_start(pdev, nq->bar_reg); - if (!nq_base) { - rc = -ENOMEM; + "Failed to request irq for nq-idx %d\n", nq_idx); goto fail; } - nq->bar_reg_iomem = ioremap_nocache(nq_base + nq->bar_reg_off, 4); - if (!nq->bar_reg_iomem) { - rc = -ENOMEM; - goto fail; - } - NQ_DB_REARM(nq->bar_reg_iomem, nq->hwq.cons, nq->hwq.max_elements); return 0; fail: @@ -258,183 +582,402 @@ fail: void bnxt_qplib_free_nq(struct bnxt_qplib_nq *nq) { - if (nq->hwq.max_elements) - bnxt_qplib_free_hwq(nq->pdev, &nq->hwq); + if (nq->hwq.max_elements) { + bnxt_qplib_free_hwq(nq->res, &nq->hwq); + nq->hwq.max_elements = 0; + } } -int bnxt_qplib_alloc_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq) +int bnxt_qplib_alloc_nq(struct bnxt_qplib_res *res, struct bnxt_qplib_nq *nq) { - nq->pdev = pdev; + struct bnxt_qplib_hwq_attr hwq_attr = {}; + struct bnxt_qplib_sg_info sginfo = {}; + + nq->pdev = res->pdev; + nq->res = res; if (!nq->hwq.max_elements || nq->hwq.max_elements > BNXT_QPLIB_NQE_MAX_CNT) nq->hwq.max_elements = BNXT_QPLIB_NQE_MAX_CNT; - if (bnxt_qplib_alloc_init_hwq(nq->pdev, &nq->hwq, NULL, 0, - &nq->hwq.max_elements, - BNXT_QPLIB_MAX_NQE_ENTRY_SIZE, 0, - PAGE_SIZE, HWQ_TYPE_L2_CMPL)) + sginfo.pgsize = PAGE_SIZE; + sginfo.pgshft = PAGE_SHIFT; + hwq_attr.res = res; + hwq_attr.sginfo = &sginfo; + hwq_attr.depth = nq->hwq.max_elements; + hwq_attr.stride = sizeof(struct nq_base); + hwq_attr.type = bnxt_qplib_get_hwq_type(nq->res); + if (bnxt_qplib_alloc_init_hwq(&nq->hwq, &hwq_attr)) { + dev_err(&nq->pdev->dev, "FP NQ allocation failed"); return -ENOMEM; - + } nq->budget = 8; return 0; } +/* SRQ */ +void bnxt_qplib_destroy_srq(struct bnxt_qplib_res *res, + struct bnxt_qplib_srq *srq) +{ + struct bnxt_qplib_rcfw *rcfw = res->rcfw; + struct creq_destroy_srq_resp resp = {}; + struct bnxt_qplib_cmdqmsg msg = {}; + struct cmdq_destroy_srq req = {}; + int rc; + + bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req, + CMDQ_BASE_OPCODE_DESTROY_SRQ, + sizeof(req)); + + /* Configure the request */ + req.srq_cid = cpu_to_le32(srq->id); + + bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req), sizeof(resp), 0); + rc = bnxt_qplib_rcfw_send_message(rcfw, &msg); + kfree(srq->swq); + if (rc) + return; + bnxt_qplib_free_hwq(res, &srq->hwq); +} + +int bnxt_qplib_create_srq(struct bnxt_qplib_res *res, + struct bnxt_qplib_srq *srq) +{ + struct bnxt_qplib_rcfw *rcfw = res->rcfw; + struct bnxt_qplib_hwq_attr hwq_attr = {}; + struct creq_create_srq_resp resp = {}; + struct bnxt_qplib_cmdqmsg msg = {}; + struct cmdq_create_srq req = {}; + struct bnxt_qplib_pbl *pbl; + u16 pg_sz_lvl; + int rc, idx; + + hwq_attr.res = res; + hwq_attr.sginfo = &srq->sg_info; + hwq_attr.depth = srq->max_wqe; + hwq_attr.stride = srq->wqe_size; + hwq_attr.type = HWQ_TYPE_QUEUE; + rc = bnxt_qplib_alloc_init_hwq(&srq->hwq, &hwq_attr); + if (rc) + return rc; + srq->dbinfo.flags = 0; + bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req, + CMDQ_BASE_OPCODE_CREATE_SRQ, + sizeof(req)); + + /* Configure the request */ + req.dpi = cpu_to_le32(srq->dpi->dpi); + req.srq_handle = cpu_to_le64((uintptr_t)srq); + + req.srq_size = cpu_to_le16((u16)srq->hwq.max_elements); + pbl = &srq->hwq.pbl[PBL_LVL_0]; + pg_sz_lvl = ((u16)bnxt_qplib_base_pg_size(&srq->hwq) << + CMDQ_CREATE_SRQ_PG_SIZE_SFT); + pg_sz_lvl |= (srq->hwq.level & CMDQ_CREATE_SRQ_LVL_MASK) << + CMDQ_CREATE_SRQ_LVL_SFT; + req.pg_size_lvl = cpu_to_le16(pg_sz_lvl); + req.pbl = cpu_to_le64(pbl->pg_map_arr[0]); + req.pd_id = cpu_to_le32(srq->pd->id); + req.eventq_id = cpu_to_le16(srq->eventq_hw_ring_id); + + bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req), sizeof(resp), 0); + rc = bnxt_qplib_rcfw_send_message(rcfw, &msg); + if (rc) + goto fail; + + spin_lock_init(&srq->lock); + srq->start_idx = 0; + srq->last_idx = srq->hwq.max_elements - 1; + if (!srq->hwq.is_user) { + srq->swq = kcalloc(srq->hwq.max_elements, sizeof(*srq->swq), + GFP_KERNEL); + if (!srq->swq) { + rc = -ENOMEM; + goto fail; + } + for (idx = 0; idx < srq->hwq.max_elements; idx++) + srq->swq[idx].next_idx = idx + 1; + srq->swq[srq->last_idx].next_idx = -1; + } + + srq->id = le32_to_cpu(resp.xid); + srq->dbinfo.hwq = &srq->hwq; + srq->dbinfo.xid = srq->id; + srq->dbinfo.db = srq->dpi->dbr; + srq->dbinfo.max_slot = 1; + srq->dbinfo.priv_db = res->dpi_tbl.priv_db; + bnxt_qplib_armen_db(&srq->dbinfo, DBC_DBC_TYPE_SRQ_ARMENA); + + return 0; +fail: + bnxt_qplib_free_hwq(res, &srq->hwq); + kfree(srq->swq); + + return rc; +} + +int bnxt_qplib_query_srq(struct bnxt_qplib_res *res, + struct bnxt_qplib_srq *srq) +{ + struct bnxt_qplib_rcfw *rcfw = res->rcfw; + struct creq_query_srq_resp resp = {}; + struct bnxt_qplib_cmdqmsg msg = {}; + struct bnxt_qplib_rcfw_sbuf sbuf; + struct creq_query_srq_resp_sb *sb; + struct cmdq_query_srq req = {}; + int rc; + + bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req, + CMDQ_BASE_OPCODE_QUERY_SRQ, + sizeof(req)); + + /* Configure the request */ + sbuf.size = ALIGN(sizeof(*sb), BNXT_QPLIB_CMDQE_UNITS); + sbuf.sb = dma_alloc_coherent(&rcfw->pdev->dev, sbuf.size, + &sbuf.dma_addr, GFP_KERNEL); + if (!sbuf.sb) + return -ENOMEM; + req.resp_size = sbuf.size / BNXT_QPLIB_CMDQE_UNITS; + req.srq_cid = cpu_to_le32(srq->id); + sb = sbuf.sb; + bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, &sbuf, sizeof(req), + sizeof(resp), 0); + rc = bnxt_qplib_rcfw_send_message(rcfw, &msg); + if (!rc) + srq->threshold = le16_to_cpu(sb->srq_limit); + dma_free_coherent(&rcfw->pdev->dev, sbuf.size, + sbuf.sb, sbuf.dma_addr); + + return rc; +} + +int bnxt_qplib_post_srq_recv(struct bnxt_qplib_srq *srq, + struct bnxt_qplib_swqe *wqe) +{ + struct bnxt_qplib_hwq *srq_hwq = &srq->hwq; + struct rq_wqe *srqe; + struct sq_sge *hw_sge; + int i, next; + + spin_lock(&srq_hwq->lock); + if (srq->start_idx == srq->last_idx) { + dev_err(&srq_hwq->pdev->dev, + "FP: SRQ (0x%x) is full!\n", srq->id); + spin_unlock(&srq_hwq->lock); + return -EINVAL; + } + next = srq->start_idx; + srq->start_idx = srq->swq[next].next_idx; + spin_unlock(&srq_hwq->lock); + + srqe = bnxt_qplib_get_qe(srq_hwq, srq_hwq->prod, NULL); + memset(srqe, 0, srq->wqe_size); + /* Calculate wqe_size16 and data_len */ + for (i = 0, hw_sge = (struct sq_sge *)srqe->data; + i < wqe->num_sge; i++, hw_sge++) { + hw_sge->va_or_pa = cpu_to_le64(wqe->sg_list[i].addr); + hw_sge->l_key = cpu_to_le32(wqe->sg_list[i].lkey); + hw_sge->size = cpu_to_le32(wqe->sg_list[i].size); + } + srqe->wqe_type = wqe->type; + srqe->flags = wqe->flags; + srqe->wqe_size = wqe->num_sge + + ((offsetof(typeof(*srqe), data) + 15) >> 4); + srqe->wr_id[0] = cpu_to_le32((u32)next); + srq->swq[next].wr_id = wqe->wr_id; + + bnxt_qplib_hwq_incr_prod(&srq->dbinfo, srq_hwq, srq->dbinfo.max_slot); + + /* Ring DB */ + bnxt_qplib_ring_prod_db(&srq->dbinfo, DBC_DBC_TYPE_SRQ); + + return 0; +} + /* QP */ + +static int bnxt_qplib_alloc_init_swq(struct bnxt_qplib_q *que) +{ + int indx; + + que->swq = kcalloc(que->max_sw_wqe, sizeof(*que->swq), GFP_KERNEL); + if (!que->swq) + return -ENOMEM; + + que->swq_start = 0; + que->swq_last = que->max_sw_wqe - 1; + for (indx = 0; indx < que->max_sw_wqe; indx++) + que->swq[indx].next_idx = indx + 1; + que->swq[que->swq_last].next_idx = 0; /* Make it circular */ + que->swq_last = 0; + + return 0; +} + int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) { + struct bnxt_qplib_hwq_attr hwq_attr = {}; struct bnxt_qplib_rcfw *rcfw = res->rcfw; - struct cmdq_create_qp1 req; - struct creq_create_qp1_resp resp; - struct bnxt_qplib_pbl *pbl; + struct creq_create_qp1_resp resp = {}; + struct bnxt_qplib_cmdqmsg msg = {}; struct bnxt_qplib_q *sq = &qp->sq; struct bnxt_qplib_q *rq = &qp->rq; - int rc; - u16 cmd_flags = 0; + struct cmdq_create_qp1 req = {}; + struct bnxt_qplib_pbl *pbl; u32 qp_flags = 0; + u8 pg_sz_lvl; + u32 tbl_indx; + int rc; - RCFW_CMD_PREP(req, CREATE_QP1, cmd_flags); - + sq->dbinfo.flags = 0; + bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req, + CMDQ_BASE_OPCODE_CREATE_QP1, + sizeof(req)); /* General */ req.type = qp->type; req.dpi = cpu_to_le32(qp->dpi->dpi); req.qp_handle = cpu_to_le64(qp->qp_handle); /* SQ */ - sq->hwq.max_elements = sq->max_wqe; - rc = bnxt_qplib_alloc_init_hwq(res->pdev, &sq->hwq, NULL, 0, - &sq->hwq.max_elements, - BNXT_QPLIB_MAX_SQE_ENTRY_SIZE, 0, - PAGE_SIZE, HWQ_TYPE_QUEUE); + hwq_attr.res = res; + hwq_attr.sginfo = &sq->sg_info; + hwq_attr.stride = sizeof(struct sq_sge); + hwq_attr.depth = bnxt_qplib_get_depth(sq, qp->wqe_mode, false); + hwq_attr.type = HWQ_TYPE_QUEUE; + rc = bnxt_qplib_alloc_init_hwq(&sq->hwq, &hwq_attr); if (rc) - goto exit; + return rc; - sq->swq = kcalloc(sq->hwq.max_elements, sizeof(*sq->swq), GFP_KERNEL); - if (!sq->swq) { - rc = -ENOMEM; + rc = bnxt_qplib_alloc_init_swq(sq); + if (rc) goto fail_sq; - } + + req.sq_size = cpu_to_le32(bnxt_qplib_set_sq_size(sq, qp->wqe_mode)); pbl = &sq->hwq.pbl[PBL_LVL_0]; req.sq_pbl = cpu_to_le64(pbl->pg_map_arr[0]); - req.sq_pg_size_sq_lvl = - ((sq->hwq.level & CMDQ_CREATE_QP1_SQ_LVL_MASK) - << CMDQ_CREATE_QP1_SQ_LVL_SFT) | - (pbl->pg_size == ROCE_PG_SIZE_4K ? - CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_4K : - pbl->pg_size == ROCE_PG_SIZE_8K ? - CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_8K : - pbl->pg_size == ROCE_PG_SIZE_64K ? - CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_64K : - pbl->pg_size == ROCE_PG_SIZE_2M ? - CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_2M : - pbl->pg_size == ROCE_PG_SIZE_8M ? - CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_8M : - pbl->pg_size == ROCE_PG_SIZE_1G ? - CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_1G : - CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_4K); - - if (qp->scq) - req.scq_cid = cpu_to_le32(qp->scq->id); - - qp_flags |= CMDQ_CREATE_QP1_QP_FLAGS_RESERVED_LKEY_ENABLE; + pg_sz_lvl = (bnxt_qplib_base_pg_size(&sq->hwq) << + CMDQ_CREATE_QP1_SQ_PG_SIZE_SFT); + pg_sz_lvl |= (sq->hwq.level & CMDQ_CREATE_QP1_SQ_LVL_MASK); + req.sq_pg_size_sq_lvl = pg_sz_lvl; + req.sq_fwo_sq_sge = + cpu_to_le16((sq->max_sge & CMDQ_CREATE_QP1_SQ_SGE_MASK) << + CMDQ_CREATE_QP1_SQ_SGE_SFT); + req.scq_cid = cpu_to_le32(qp->scq->id); /* RQ */ if (rq->max_wqe) { - rq->hwq.max_elements = qp->rq.max_wqe; - rc = bnxt_qplib_alloc_init_hwq(res->pdev, &rq->hwq, NULL, 0, - &rq->hwq.max_elements, - BNXT_QPLIB_MAX_RQE_ENTRY_SIZE, 0, - PAGE_SIZE, HWQ_TYPE_QUEUE); + rq->dbinfo.flags = 0; + hwq_attr.res = res; + hwq_attr.sginfo = &rq->sg_info; + hwq_attr.stride = sizeof(struct sq_sge); + hwq_attr.depth = bnxt_qplib_get_depth(rq, qp->wqe_mode, false); + hwq_attr.type = HWQ_TYPE_QUEUE; + rc = bnxt_qplib_alloc_init_hwq(&rq->hwq, &hwq_attr); + if (rc) + goto sq_swq; + rc = bnxt_qplib_alloc_init_swq(rq); if (rc) - goto fail_sq; - - rq->swq = kcalloc(rq->hwq.max_elements, sizeof(*rq->swq), - GFP_KERNEL); - if (!rq->swq) { - rc = -ENOMEM; goto fail_rq; - } + req.rq_size = cpu_to_le32(rq->max_wqe); pbl = &rq->hwq.pbl[PBL_LVL_0]; req.rq_pbl = cpu_to_le64(pbl->pg_map_arr[0]); - req.rq_pg_size_rq_lvl = - ((rq->hwq.level & CMDQ_CREATE_QP1_RQ_LVL_MASK) << - CMDQ_CREATE_QP1_RQ_LVL_SFT) | - (pbl->pg_size == ROCE_PG_SIZE_4K ? - CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_4K : - pbl->pg_size == ROCE_PG_SIZE_8K ? - CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_8K : - pbl->pg_size == ROCE_PG_SIZE_64K ? - CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_64K : - pbl->pg_size == ROCE_PG_SIZE_2M ? - CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_2M : - pbl->pg_size == ROCE_PG_SIZE_8M ? - CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_8M : - pbl->pg_size == ROCE_PG_SIZE_1G ? - CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_1G : - CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_4K); - if (qp->rcq) - req.rcq_cid = cpu_to_le32(qp->rcq->id); - } - + pg_sz_lvl = (bnxt_qplib_base_pg_size(&rq->hwq) << + CMDQ_CREATE_QP1_RQ_PG_SIZE_SFT); + pg_sz_lvl |= (rq->hwq.level & CMDQ_CREATE_QP1_RQ_LVL_MASK); + req.rq_pg_size_rq_lvl = pg_sz_lvl; + req.rq_fwo_rq_sge = + cpu_to_le16((rq->max_sge & + CMDQ_CREATE_QP1_RQ_SGE_MASK) << + CMDQ_CREATE_QP1_RQ_SGE_SFT); + } + req.rcq_cid = cpu_to_le32(qp->rcq->id); /* Header buffer - allow hdr_buf pass in */ rc = bnxt_qplib_alloc_qp_hdr_buf(res, qp); if (rc) { rc = -ENOMEM; - goto fail; + goto rq_rwq; } + qp_flags |= CMDQ_CREATE_QP1_QP_FLAGS_RESERVED_LKEY_ENABLE; req.qp_flags = cpu_to_le32(qp_flags); - req.sq_size = cpu_to_le32(sq->hwq.max_elements); - req.rq_size = cpu_to_le32(rq->hwq.max_elements); - - req.sq_fwo_sq_sge = - cpu_to_le16((sq->max_sge & CMDQ_CREATE_QP1_SQ_SGE_MASK) << - CMDQ_CREATE_QP1_SQ_SGE_SFT); - req.rq_fwo_rq_sge = - cpu_to_le16((rq->max_sge & CMDQ_CREATE_QP1_RQ_SGE_MASK) << - CMDQ_CREATE_QP1_RQ_SGE_SFT); - req.pd_id = cpu_to_le32(qp->pd->id); - rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, - (void *)&resp, NULL, 0); + bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req), sizeof(resp), 0); + rc = bnxt_qplib_rcfw_send_message(rcfw, &msg); if (rc) goto fail; qp->id = le32_to_cpu(resp.xid); qp->cur_qp_state = CMDQ_MODIFY_QP_NEW_STATE_RESET; - sq->flush_in_progress = false; - rq->flush_in_progress = false; + qp->cctx = res->cctx; + sq->dbinfo.hwq = &sq->hwq; + sq->dbinfo.xid = qp->id; + sq->dbinfo.db = qp->dpi->dbr; + sq->dbinfo.max_slot = bnxt_qplib_set_sq_max_slot(qp->wqe_mode); + if (rq->max_wqe) { + rq->dbinfo.hwq = &rq->hwq; + rq->dbinfo.xid = qp->id; + rq->dbinfo.db = qp->dpi->dbr; + rq->dbinfo.max_slot = bnxt_qplib_set_rq_max_slot(rq->wqe_size); + } + tbl_indx = map_qp_id_to_tbl_indx(qp->id, rcfw); + rcfw->qp_tbl[tbl_indx].qp_id = qp->id; + rcfw->qp_tbl[tbl_indx].qp_handle = (void *)qp; return 0; fail: bnxt_qplib_free_qp_hdr_buf(res, qp); -fail_rq: - bnxt_qplib_free_hwq(res->pdev, &rq->hwq); +rq_rwq: kfree(rq->swq); -fail_sq: - bnxt_qplib_free_hwq(res->pdev, &sq->hwq); +fail_rq: + bnxt_qplib_free_hwq(res, &rq->hwq); +sq_swq: kfree(sq->swq); -exit: +fail_sq: + bnxt_qplib_free_hwq(res, &sq->hwq); return rc; } +static void bnxt_qplib_init_psn_ptr(struct bnxt_qplib_qp *qp, int size) +{ + struct bnxt_qplib_hwq *hwq; + struct bnxt_qplib_q *sq; + u64 fpsne, psn_pg; + u16 indx_pad = 0; + + sq = &qp->sq; + hwq = &sq->hwq; + /* First psn entry */ + fpsne = (u64)bnxt_qplib_get_qe(hwq, hwq->depth, &psn_pg); + if (!IS_ALIGNED(fpsne, PAGE_SIZE)) + indx_pad = (fpsne & ~PAGE_MASK) / size; + hwq->pad_pgofft = indx_pad; + hwq->pad_pg = (u64 *)psn_pg; + hwq->pad_stride = size; +} + int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) { struct bnxt_qplib_rcfw *rcfw = res->rcfw; - struct sq_send *hw_sq_send_hdr, **hw_sq_send_ptr; - struct cmdq_create_qp req; - struct creq_create_qp_resp resp; - struct bnxt_qplib_pbl *pbl; - struct sq_psn_search **psn_search_ptr; - unsigned long int psn_search, poff = 0; + struct bnxt_qplib_hwq_attr hwq_attr = {}; + struct bnxt_qplib_sg_info sginfo = {}; + struct creq_create_qp_resp resp = {}; + struct bnxt_qplib_cmdqmsg msg = {}; struct bnxt_qplib_q *sq = &qp->sq; struct bnxt_qplib_q *rq = &qp->rq; + struct cmdq_create_qp req = {}; + int rc, req_size, psn_sz = 0; struct bnxt_qplib_hwq *xrrq; - int i, rc, req_size, psn_sz; - u16 cmd_flags = 0, max_ssge; - u32 sw_prod, qp_flags = 0; + struct bnxt_qplib_pbl *pbl; + u32 qp_flags = 0; + u8 pg_sz_lvl; + u32 tbl_indx; + u16 nsge; - RCFW_CMD_PREP(req, CREATE_QP, cmd_flags); + qp->is_host_msn_tbl = _is_host_msn_table(res->dattr->dev_cap_flags2); + sq->dbinfo.flags = 0; + bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req, + CMDQ_BASE_OPCODE_CREATE_QP, + sizeof(req)); /* General */ req.type = qp->type; @@ -442,136 +985,111 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) req.qp_handle = cpu_to_le64(qp->qp_handle); /* SQ */ - psn_sz = (qp->type == CMDQ_CREATE_QP_TYPE_RC) ? - sizeof(struct sq_psn_search) : 0; - sq->hwq.max_elements = sq->max_wqe; - rc = bnxt_qplib_alloc_init_hwq(res->pdev, &sq->hwq, sq->sglist, - sq->nmap, &sq->hwq.max_elements, - BNXT_QPLIB_MAX_SQE_ENTRY_SIZE, - psn_sz, - PAGE_SIZE, HWQ_TYPE_QUEUE); + if (qp->type == CMDQ_CREATE_QP_TYPE_RC) { + psn_sz = bnxt_qplib_is_chip_gen_p5_p7(res->cctx) ? + sizeof(struct sq_psn_search_ext) : + sizeof(struct sq_psn_search); + + if (qp->is_host_msn_tbl) { + psn_sz = sizeof(struct sq_msn_search); + qp->msn = 0; + } + } + + hwq_attr.res = res; + hwq_attr.sginfo = &sq->sg_info; + hwq_attr.stride = sizeof(struct sq_sge); + hwq_attr.depth = bnxt_qplib_get_depth(sq, qp->wqe_mode, true); + hwq_attr.aux_stride = psn_sz; + hwq_attr.aux_depth = psn_sz ? bnxt_qplib_set_sq_size(sq, qp->wqe_mode) + : 0; + /* Update msn tbl size */ + if (qp->is_host_msn_tbl && psn_sz) { + if (qp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) + hwq_attr.aux_depth = + roundup_pow_of_two(bnxt_qplib_set_sq_size(sq, qp->wqe_mode)); + else + hwq_attr.aux_depth = + roundup_pow_of_two(bnxt_qplib_set_sq_size(sq, qp->wqe_mode)) / 2; + qp->msn_tbl_sz = hwq_attr.aux_depth; + qp->msn = 0; + } + + hwq_attr.type = HWQ_TYPE_QUEUE; + rc = bnxt_qplib_alloc_init_hwq(&sq->hwq, &hwq_attr); if (rc) - goto exit; + return rc; - sq->swq = kcalloc(sq->hwq.max_elements, sizeof(*sq->swq), GFP_KERNEL); - if (!sq->swq) { - rc = -ENOMEM; - goto fail_sq; - } - hw_sq_send_ptr = (struct sq_send **)sq->hwq.pbl_ptr; - if (psn_sz) { - psn_search_ptr = (struct sq_psn_search **) - &hw_sq_send_ptr[get_sqe_pg - (sq->hwq.max_elements)]; - psn_search = (unsigned long int) - &hw_sq_send_ptr[get_sqe_pg(sq->hwq.max_elements)] - [get_sqe_idx(sq->hwq.max_elements)]; - if (psn_search & ~PAGE_MASK) { - /* If the psn_search does not start on a page boundary, - * then calculate the offset - */ - poff = (psn_search & ~PAGE_MASK) / - BNXT_QPLIB_MAX_PSNE_ENTRY_SIZE; - } - for (i = 0; i < sq->hwq.max_elements; i++) - sq->swq[i].psn_search = - &psn_search_ptr[get_psne_pg(i + poff)] - [get_psne_idx(i + poff)]; + if (!sq->hwq.is_user) { + rc = bnxt_qplib_alloc_init_swq(sq); + if (rc) + goto fail_sq; + + if (psn_sz) + bnxt_qplib_init_psn_ptr(qp, psn_sz); } + req.sq_size = cpu_to_le32(bnxt_qplib_set_sq_size(sq, qp->wqe_mode)); pbl = &sq->hwq.pbl[PBL_LVL_0]; req.sq_pbl = cpu_to_le64(pbl->pg_map_arr[0]); - req.sq_pg_size_sq_lvl = - ((sq->hwq.level & CMDQ_CREATE_QP_SQ_LVL_MASK) - << CMDQ_CREATE_QP_SQ_LVL_SFT) | - (pbl->pg_size == ROCE_PG_SIZE_4K ? - CMDQ_CREATE_QP_SQ_PG_SIZE_PG_4K : - pbl->pg_size == ROCE_PG_SIZE_8K ? - CMDQ_CREATE_QP_SQ_PG_SIZE_PG_8K : - pbl->pg_size == ROCE_PG_SIZE_64K ? - CMDQ_CREATE_QP_SQ_PG_SIZE_PG_64K : - pbl->pg_size == ROCE_PG_SIZE_2M ? - CMDQ_CREATE_QP_SQ_PG_SIZE_PG_2M : - pbl->pg_size == ROCE_PG_SIZE_8M ? - CMDQ_CREATE_QP_SQ_PG_SIZE_PG_8M : - pbl->pg_size == ROCE_PG_SIZE_1G ? - CMDQ_CREATE_QP_SQ_PG_SIZE_PG_1G : - CMDQ_CREATE_QP_SQ_PG_SIZE_PG_4K); - - /* initialize all SQ WQEs to LOCAL_INVALID (sq prep for hw fetch) */ - hw_sq_send_ptr = (struct sq_send **)sq->hwq.pbl_ptr; - for (sw_prod = 0; sw_prod < sq->hwq.max_elements; sw_prod++) { - hw_sq_send_hdr = &hw_sq_send_ptr[get_sqe_pg(sw_prod)] - [get_sqe_idx(sw_prod)]; - hw_sq_send_hdr->wqe_type = SQ_BASE_WQE_TYPE_LOCAL_INVALID; - } - - if (qp->scq) - req.scq_cid = cpu_to_le32(qp->scq->id); - - qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_RESERVED_LKEY_ENABLE; - qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_FR_PMR_ENABLED; - if (qp->sig_type) - qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_FORCE_COMPLETION; + pg_sz_lvl = (bnxt_qplib_base_pg_size(&sq->hwq) << + CMDQ_CREATE_QP_SQ_PG_SIZE_SFT); + pg_sz_lvl |= (sq->hwq.level & CMDQ_CREATE_QP_SQ_LVL_MASK); + req.sq_pg_size_sq_lvl = pg_sz_lvl; + req.sq_fwo_sq_sge = + cpu_to_le16(((sq->max_sge & CMDQ_CREATE_QP_SQ_SGE_MASK) << + CMDQ_CREATE_QP_SQ_SGE_SFT) | 0); + req.scq_cid = cpu_to_le32(qp->scq->id); /* RQ */ - if (rq->max_wqe) { - rq->hwq.max_elements = rq->max_wqe; - rc = bnxt_qplib_alloc_init_hwq(res->pdev, &rq->hwq, rq->sglist, - rq->nmap, &rq->hwq.max_elements, - BNXT_QPLIB_MAX_RQE_ENTRY_SIZE, 0, - PAGE_SIZE, HWQ_TYPE_QUEUE); + if (!qp->srq) { + rq->dbinfo.flags = 0; + hwq_attr.res = res; + hwq_attr.sginfo = &rq->sg_info; + hwq_attr.stride = sizeof(struct sq_sge); + hwq_attr.depth = bnxt_qplib_get_depth(rq, qp->wqe_mode, false); + hwq_attr.aux_stride = 0; + hwq_attr.aux_depth = 0; + hwq_attr.type = HWQ_TYPE_QUEUE; + rc = bnxt_qplib_alloc_init_hwq(&rq->hwq, &hwq_attr); if (rc) - goto fail_sq; - - rq->swq = kcalloc(rq->hwq.max_elements, sizeof(*rq->swq), - GFP_KERNEL); - if (!rq->swq) { - rc = -ENOMEM; - goto fail_rq; + goto sq_swq; + if (!rq->hwq.is_user) { + rc = bnxt_qplib_alloc_init_swq(rq); + if (rc) + goto fail_rq; } + + req.rq_size = cpu_to_le32(rq->max_wqe); pbl = &rq->hwq.pbl[PBL_LVL_0]; req.rq_pbl = cpu_to_le64(pbl->pg_map_arr[0]); - req.rq_pg_size_rq_lvl = - ((rq->hwq.level & CMDQ_CREATE_QP_RQ_LVL_MASK) << - CMDQ_CREATE_QP_RQ_LVL_SFT) | - (pbl->pg_size == ROCE_PG_SIZE_4K ? - CMDQ_CREATE_QP_RQ_PG_SIZE_PG_4K : - pbl->pg_size == ROCE_PG_SIZE_8K ? - CMDQ_CREATE_QP_RQ_PG_SIZE_PG_8K : - pbl->pg_size == ROCE_PG_SIZE_64K ? - CMDQ_CREATE_QP_RQ_PG_SIZE_PG_64K : - pbl->pg_size == ROCE_PG_SIZE_2M ? - CMDQ_CREATE_QP_RQ_PG_SIZE_PG_2M : - pbl->pg_size == ROCE_PG_SIZE_8M ? - CMDQ_CREATE_QP_RQ_PG_SIZE_PG_8M : - pbl->pg_size == ROCE_PG_SIZE_1G ? - CMDQ_CREATE_QP_RQ_PG_SIZE_PG_1G : - CMDQ_CREATE_QP_RQ_PG_SIZE_PG_4K); - } - - if (qp->rcq) - req.rcq_cid = cpu_to_le32(qp->rcq->id); - req.qp_flags = cpu_to_le32(qp_flags); - req.sq_size = cpu_to_le32(sq->hwq.max_elements); - req.rq_size = cpu_to_le32(rq->hwq.max_elements); - qp->sq_hdr_buf = NULL; - qp->rq_hdr_buf = NULL; + pg_sz_lvl = (bnxt_qplib_base_pg_size(&rq->hwq) << + CMDQ_CREATE_QP_RQ_PG_SIZE_SFT); + pg_sz_lvl |= (rq->hwq.level & CMDQ_CREATE_QP_RQ_LVL_MASK); + req.rq_pg_size_rq_lvl = pg_sz_lvl; + nsge = (qp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) ? + 6 : rq->max_sge; + req.rq_fwo_rq_sge = + cpu_to_le16(((nsge & + CMDQ_CREATE_QP_RQ_SGE_MASK) << + CMDQ_CREATE_QP_RQ_SGE_SFT) | 0); + } else { + /* SRQ */ + qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_SRQ_USED; + req.srq_cid = cpu_to_le32(qp->srq->id); + } + req.rcq_cid = cpu_to_le32(qp->rcq->id); - rc = bnxt_qplib_alloc_qp_hdr_buf(res, qp); - if (rc) - goto fail_rq; + qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_RESERVED_LKEY_ENABLE; + qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_FR_PMR_ENABLED; + if (qp->sig_type) + qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_FORCE_COMPLETION; + if (qp->wqe_mode == BNXT_QPLIB_WQE_MODE_VARIABLE) + qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_VARIABLE_SIZED_WQE_ENABLED; + if (bnxt_ext_stats_supported(res->cctx, res->dattr->dev_cap_flags, res->is_vf)) + qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_EXT_STATS_ENABLED; + + req.qp_flags = cpu_to_le32(qp_flags); - /* CTRL-22434: Irrespective of the requested SGE count on the SQ - * always create the QP with max send sges possible if the requested - * inline size is greater than 0. - */ - max_ssge = qp->max_inline_data ? 6 : sq->max_sge; - req.sq_fwo_sq_sge = cpu_to_le16( - ((max_ssge & CMDQ_CREATE_QP_SQ_SGE_MASK) - << CMDQ_CREATE_QP_SQ_SGE_SFT) | 0); - req.rq_fwo_rq_sge = cpu_to_le16( - ((rq->max_sge & CMDQ_CREATE_QP_RQ_SGE_MASK) - << CMDQ_CREATE_QP_RQ_SGE_SFT) | 0); /* ORRQ and IRRQ */ if (psn_sz) { xrrq = &qp->orrq; @@ -580,12 +1098,19 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) req_size = xrrq->max_elements * BNXT_QPLIB_MAX_ORRQE_ENTRY_SIZE + PAGE_SIZE - 1; req_size &= ~(PAGE_SIZE - 1); - rc = bnxt_qplib_alloc_init_hwq(res->pdev, xrrq, NULL, 0, - &xrrq->max_elements, - BNXT_QPLIB_MAX_ORRQE_ENTRY_SIZE, - 0, req_size, HWQ_TYPE_CTX); + sginfo.pgsize = req_size; + sginfo.pgshft = PAGE_SHIFT; + + hwq_attr.res = res; + hwq_attr.sginfo = &sginfo; + hwq_attr.depth = xrrq->max_elements; + hwq_attr.stride = BNXT_QPLIB_MAX_ORRQE_ENTRY_SIZE; + hwq_attr.aux_stride = 0; + hwq_attr.aux_depth = 0; + hwq_attr.type = HWQ_TYPE_CTX; + rc = bnxt_qplib_alloc_init_hwq(xrrq, &hwq_attr); if (rc) - goto fail_buf_free; + goto rq_swq; pbl = &xrrq->pbl[PBL_LVL_0]; req.orrq_addr = cpu_to_le64(pbl->pg_map_arr[0]); @@ -595,11 +1120,10 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) req_size = xrrq->max_elements * BNXT_QPLIB_MAX_IRRQE_ENTRY_SIZE + PAGE_SIZE - 1; req_size &= ~(PAGE_SIZE - 1); - - rc = bnxt_qplib_alloc_init_hwq(res->pdev, xrrq, NULL, 0, - &xrrq->max_elements, - BNXT_QPLIB_MAX_IRRQE_ENTRY_SIZE, - 0, req_size, HWQ_TYPE_CTX); + sginfo.pgsize = req_size; + hwq_attr.depth = xrrq->max_elements; + hwq_attr.stride = BNXT_QPLIB_MAX_IRRQE_ENTRY_SIZE; + rc = bnxt_qplib_alloc_init_hwq(xrrq, &hwq_attr); if (rc) goto fail_orrq; @@ -608,33 +1132,46 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) } req.pd_id = cpu_to_le32(qp->pd->id); - rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, - (void *)&resp, NULL, 0); + bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req), + sizeof(resp), 0); + rc = bnxt_qplib_rcfw_send_message(rcfw, &msg); if (rc) goto fail; qp->id = le32_to_cpu(resp.xid); qp->cur_qp_state = CMDQ_MODIFY_QP_NEW_STATE_RESET; - sq->flush_in_progress = false; - rq->flush_in_progress = false; + INIT_LIST_HEAD(&qp->sq_flush); + INIT_LIST_HEAD(&qp->rq_flush); + qp->cctx = res->cctx; + sq->dbinfo.hwq = &sq->hwq; + sq->dbinfo.xid = qp->id; + sq->dbinfo.db = qp->dpi->dbr; + sq->dbinfo.max_slot = bnxt_qplib_set_sq_max_slot(qp->wqe_mode); + if (rq->max_wqe) { + rq->dbinfo.hwq = &rq->hwq; + rq->dbinfo.xid = qp->id; + rq->dbinfo.db = qp->dpi->dbr; + rq->dbinfo.max_slot = bnxt_qplib_set_rq_max_slot(rq->wqe_size); + } + spin_lock_bh(&rcfw->tbl_lock); + tbl_indx = map_qp_id_to_tbl_indx(qp->id, rcfw); + rcfw->qp_tbl[tbl_indx].qp_id = qp->id; + rcfw->qp_tbl[tbl_indx].qp_handle = (void *)qp; + spin_unlock_bh(&rcfw->tbl_lock); return 0; - fail: - if (qp->irrq.max_elements) - bnxt_qplib_free_hwq(res->pdev, &qp->irrq); + bnxt_qplib_free_hwq(res, &qp->irrq); fail_orrq: - if (qp->orrq.max_elements) - bnxt_qplib_free_hwq(res->pdev, &qp->orrq); -fail_buf_free: - bnxt_qplib_free_qp_hdr_buf(res, qp); -fail_rq: - bnxt_qplib_free_hwq(res->pdev, &rq->hwq); + bnxt_qplib_free_hwq(res, &qp->orrq); +rq_swq: kfree(rq->swq); -fail_sq: - bnxt_qplib_free_hwq(res->pdev, &sq->hwq); +fail_rq: + bnxt_qplib_free_hwq(res, &rq->hwq); +sq_swq: kfree(sq->swq); -exit: +fail_sq: + bnxt_qplib_free_hwq(res, &sq->hwq); return rc; } @@ -652,8 +1189,6 @@ static void __modify_flags_from_init_state(struct bnxt_qplib_qp *qp) qp->path_mtu = CMDQ_MODIFY_QP_PATH_MTU_MTU_2048; } - qp->modify_flags &= - ~CMDQ_MODIFY_QP_MODIFY_MASK_VLAN_ID; /* Bono FW require the max_dest_rd_atomic to be >= 1 */ if (qp->max_dest_rd_atomic < 1) qp->max_dest_rd_atomic = 1; @@ -727,20 +1262,73 @@ static void __filter_modify_flags(struct bnxt_qplib_qp *qp) } } +static void bnxt_set_mandatory_attributes(struct bnxt_qplib_res *res, + struct bnxt_qplib_qp *qp, + struct cmdq_modify_qp *req) +{ + u32 mandatory_flags = 0; + + if (qp->type == CMDQ_MODIFY_QP_QP_TYPE_RC) + mandatory_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_ACCESS; + + if (qp->cur_qp_state == CMDQ_MODIFY_QP_NEW_STATE_INIT && + qp->state == CMDQ_MODIFY_QP_NEW_STATE_RTR) { + if (qp->type == CMDQ_MODIFY_QP_QP_TYPE_RC && qp->srq) + req->flags = cpu_to_le16(CMDQ_MODIFY_QP_FLAGS_SRQ_USED); + mandatory_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_PKEY; + } + + if (_is_min_rnr_in_rtr_rts_mandatory(res->dattr->dev_cap_flags2) && + (qp->cur_qp_state == CMDQ_MODIFY_QP_NEW_STATE_RTR && + qp->state == CMDQ_MODIFY_QP_NEW_STATE_RTS)) { + if (qp->type == CMDQ_MODIFY_QP_QP_TYPE_RC) + mandatory_flags |= + CMDQ_MODIFY_QP_MODIFY_MASK_MIN_RNR_TIMER; + } + + if (qp->type == CMDQ_MODIFY_QP_QP_TYPE_UD || + qp->type == CMDQ_MODIFY_QP_QP_TYPE_GSI) + mandatory_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_QKEY; + + qp->modify_flags |= mandatory_flags; + req->qp_type = qp->type; +} + +static bool is_optimized_state_transition(struct bnxt_qplib_qp *qp) +{ + if ((qp->cur_qp_state == CMDQ_MODIFY_QP_NEW_STATE_INIT && + qp->state == CMDQ_MODIFY_QP_NEW_STATE_RTR) || + (qp->cur_qp_state == CMDQ_MODIFY_QP_NEW_STATE_RTR && + qp->state == CMDQ_MODIFY_QP_NEW_STATE_RTS)) + return true; + + return false; +} + int bnxt_qplib_modify_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) { + struct bnxt_qplib_sgid_tbl *sgid_tbl = &res->sgid_tbl; struct bnxt_qplib_rcfw *rcfw = res->rcfw; - struct cmdq_modify_qp req; - struct creq_modify_qp_resp resp; - u16 cmd_flags = 0, pkey; + struct creq_modify_qp_resp resp = {}; + struct bnxt_qplib_cmdqmsg msg = {}; + struct cmdq_modify_qp req = {}; + u16 vlan_pcp_vlan_dei_vlan_id; u32 temp32[4]; u32 bmask; int rc; - RCFW_CMD_PREP(req, MODIFY_QP, cmd_flags); + bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req, + CMDQ_BASE_OPCODE_MODIFY_QP, + sizeof(req)); /* Filter out the qp_attr_mask based on the state->new transition */ __filter_modify_flags(qp); + if (qp->modify_flags & CMDQ_MODIFY_QP_MODIFY_MASK_STATE) { + /* Set mandatory attributes for INIT -> RTR and RTR -> RTS transition */ + if (_is_optimize_modify_qp_supported(res->dattr->dev_cap_flags2) && + is_optimized_state_transition(qp)) + bnxt_set_mandatory_attributes(res, qp, &req); + } bmask = qp->modify_flags; req.modify_mask = cpu_to_le32(qp->modify_flags); req.qp_cid = cpu_to_le32(qp->id); @@ -755,11 +1343,9 @@ int bnxt_qplib_modify_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_ACCESS) req.access = qp->access; - if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_PKEY) { - if (!bnxt_qplib_get_pkey(res, &res->pkey_tbl, - qp->pkey_index, &pkey)) - req.pkey = cpu_to_le16(pkey); - } + if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_PKEY) + req.pkey = cpu_to_le16(IB_DEFAULT_PKEY_FULL); + if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_QKEY) req.qkey = cpu_to_le32(qp->qkey); @@ -773,9 +1359,14 @@ int bnxt_qplib_modify_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_FLOW_LABEL) req.flow_label = cpu_to_le32(qp->ah.flow_label); - if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_SGID_INDEX) - req.sgid_index = cpu_to_le16(res->sgid_tbl.hw_id - [qp->ah.sgid_index]); + if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_SGID_INDEX) { + if (qp->type == CMDQ_CREATE_QP_TYPE_RAW_ETHERTYPE) + req.sgid_index = + cpu_to_le16(sgid_tbl->hw_id[qp->ugid_index]); + else + req.sgid_index = + cpu_to_le16(sgid_tbl->hw_id[qp->ah.sgid_index]); + } if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_HOP_LIMIT) req.hop_limit = qp->ah.hop_limit; @@ -787,7 +1378,7 @@ int bnxt_qplib_modify_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) memcpy(req.dest_mac, qp->ah.dmac, 6); if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_PATH_MTU) - req.path_mtu = qp->path_mtu; + req.path_mtu_pingpong_push_enable |= qp->path_mtu; if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_TIMEOUT) req.timeout = qp->timeout; @@ -823,10 +1414,19 @@ int bnxt_qplib_modify_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_DEST_QP_ID) req.dest_qp_id = cpu_to_le32(qp->dest_qpn); - req.vlan_pcp_vlan_dei_vlan_id = cpu_to_le16(qp->vlan_id); + if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_VLAN_ID) { + vlan_pcp_vlan_dei_vlan_id = + ((res->sgid_tbl.tbl[qp->ah.sgid_index].vlan_id << + CMDQ_MODIFY_QP_VLAN_ID_SFT) & + CMDQ_MODIFY_QP_VLAN_ID_MASK); + vlan_pcp_vlan_dei_vlan_id |= + ((qp->ah.sl << CMDQ_MODIFY_QP_VLAN_PCP_SFT) & + CMDQ_MODIFY_QP_VLAN_PCP_MASK); + req.vlan_pcp_vlan_dei_vlan_id = cpu_to_le16(vlan_pcp_vlan_dei_vlan_id); + } - rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, - (void *)&resp, NULL, 0); + bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req), sizeof(resp), 0); + rc = bnxt_qplib_rcfw_send_message(rcfw, &msg); if (rc) return rc; qp->cur_qp_state = qp->state; @@ -836,36 +1436,41 @@ int bnxt_qplib_modify_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) int bnxt_qplib_query_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) { struct bnxt_qplib_rcfw *rcfw = res->rcfw; - struct cmdq_query_qp req; - struct creq_query_qp_resp resp; - struct bnxt_qplib_rcfw_sbuf *sbuf; + struct creq_query_qp_resp resp = {}; + struct bnxt_qplib_cmdqmsg msg = {}; + struct bnxt_qplib_rcfw_sbuf sbuf; struct creq_query_qp_resp_sb *sb; - u16 cmd_flags = 0; + struct cmdq_query_qp req = {}; u32 temp32[4]; - int i, rc = 0; + int i, rc; - RCFW_CMD_PREP(req, QUERY_QP, cmd_flags); - - sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb)); - if (!sbuf) + sbuf.size = ALIGN(sizeof(*sb), BNXT_QPLIB_CMDQE_UNITS); + sbuf.sb = dma_alloc_coherent(&rcfw->pdev->dev, sbuf.size, + &sbuf.dma_addr, GFP_KERNEL); + if (!sbuf.sb) return -ENOMEM; - sb = sbuf->sb; + sb = sbuf.sb; + + bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req, + CMDQ_BASE_OPCODE_QUERY_QP, + sizeof(req)); req.qp_cid = cpu_to_le32(qp->id); - req.resp_size = sizeof(*sb) / BNXT_QPLIB_CMDQE_UNITS; - rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp, - (void *)sbuf, 0); + req.resp_size = sbuf.size / BNXT_QPLIB_CMDQE_UNITS; + bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, &sbuf, sizeof(req), + sizeof(resp), 0); + rc = bnxt_qplib_rcfw_send_message(rcfw, &msg); if (rc) goto bail; /* Extract the context from the side buffer */ qp->state = sb->en_sqd_async_notify_state & CREQ_QUERY_QP_RESP_SB_STATE_MASK; qp->en_sqd_async_notify = sb->en_sqd_async_notify_state & - CREQ_QUERY_QP_RESP_SB_EN_SQD_ASYNC_NOTIFY ? - true : false; + CREQ_QUERY_QP_RESP_SB_EN_SQD_ASYNC_NOTIFY; qp->access = sb->access; qp->pkey_index = le16_to_cpu(sb->pkey); qp->qkey = le32_to_cpu(sb->qkey); + qp->udp_sport = le16_to_cpu(sb->udp_src_port); temp32[0] = le32_to_cpu(sb->dgid[0]); temp32[1] = le32_to_cpu(sb->dgid[1]); @@ -883,7 +1488,7 @@ int bnxt_qplib_query_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) } } if (i == res->sgid_tbl.max) - dev_warn(&res->pdev->dev, "QPLIB: SGID not found??"); + dev_warn(&res->pdev->dev, "SGID not found??\n"); qp->ah.hop_limit = sb->hop_limit; qp->ah.traffic_class = sb->traffic_class; @@ -911,22 +1516,31 @@ int bnxt_qplib_query_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) qp->dest_qpn = le32_to_cpu(sb->dest_qp_id); memcpy(qp->smac, sb->src_mac, 6); qp->vlan_id = le16_to_cpu(sb->vlan_pcp_vlan_dei_vlan_id); + qp->port_id = le16_to_cpu(sb->port_id); bail: - bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf); + dma_free_coherent(&rcfw->pdev->dev, sbuf.size, + sbuf.sb, sbuf.dma_addr); return rc; } static void __clean_cq(struct bnxt_qplib_cq *cq, u64 qp) { struct bnxt_qplib_hwq *cq_hwq = &cq->hwq; - struct cq_base *hw_cqe, **hw_cqe_ptr; + u32 peek_flags, peek_cons; + struct cq_base *hw_cqe; int i; + peek_flags = cq->dbinfo.flags; + peek_cons = cq_hwq->cons; for (i = 0; i < cq_hwq->max_elements; i++) { - hw_cqe_ptr = (struct cq_base **)cq_hwq->pbl_ptr; - hw_cqe = &hw_cqe_ptr[CQE_PG(i)][CQE_IDX(i)]; - if (!CQE_CMP_VALID(hw_cqe, i, cq_hwq->max_elements)) + hw_cqe = bnxt_qplib_get_qe(cq_hwq, peek_cons, NULL); + if (!CQE_CMP_VALID(hw_cqe, peek_flags)) continue; + /* + * The valid test of the entry must be done first before + * reading any further. + */ + dma_rmb(); switch (hw_cqe->cqe_type_toggle & CQ_BASE_CQE_TYPE_MASK) { case CQ_BASE_CQE_TYPE_REQ: case CQ_BASE_CQE_TYPE_TERMINAL: @@ -950,6 +1564,8 @@ static void __clean_cq(struct bnxt_qplib_cq *cq, u64 qp) default: break; } + bnxt_qplib_hwq_incr_cons(cq_hwq->max_elements, &peek_cons, + 1, &peek_flags); } } @@ -957,46 +1573,52 @@ int bnxt_qplib_destroy_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) { struct bnxt_qplib_rcfw *rcfw = res->rcfw; - struct cmdq_destroy_qp req; - struct creq_destroy_qp_resp resp; - unsigned long flags; - u16 cmd_flags = 0; + struct creq_destroy_qp_resp resp = {}; + struct bnxt_qplib_cmdqmsg msg = {}; + struct cmdq_destroy_qp req = {}; + u32 tbl_indx; int rc; - RCFW_CMD_PREP(req, DESTROY_QP, cmd_flags); + spin_lock_bh(&rcfw->tbl_lock); + tbl_indx = map_qp_id_to_tbl_indx(qp->id, rcfw); + rcfw->qp_tbl[tbl_indx].qp_id = BNXT_QPLIB_QP_ID_INVALID; + rcfw->qp_tbl[tbl_indx].qp_handle = NULL; + spin_unlock_bh(&rcfw->tbl_lock); + + bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req, + CMDQ_BASE_OPCODE_DESTROY_QP, + sizeof(req)); req.qp_cid = cpu_to_le32(qp->id); - rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, - (void *)&resp, NULL, 0); - if (rc) + bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req), + sizeof(resp), 0); + rc = bnxt_qplib_rcfw_send_message(rcfw, &msg); + if (rc) { + spin_lock_bh(&rcfw->tbl_lock); + rcfw->qp_tbl[tbl_indx].qp_id = qp->id; + rcfw->qp_tbl[tbl_indx].qp_handle = qp; + spin_unlock_bh(&rcfw->tbl_lock); return rc; - - /* Must walk the associated CQs to nullified the QP ptr */ - spin_lock_irqsave(&qp->scq->hwq.lock, flags); - - __clean_cq(qp->scq, (u64)(unsigned long)qp); - - if (qp->rcq && qp->rcq != qp->scq) { - spin_lock(&qp->rcq->hwq.lock); - __clean_cq(qp->rcq, (u64)(unsigned long)qp); - spin_unlock(&qp->rcq->hwq.lock); } - spin_unlock_irqrestore(&qp->scq->hwq.lock, flags); + return 0; +} +void bnxt_qplib_free_qp_res(struct bnxt_qplib_res *res, + struct bnxt_qplib_qp *qp) +{ bnxt_qplib_free_qp_hdr_buf(res, qp); - bnxt_qplib_free_hwq(res->pdev, &qp->sq.hwq); + bnxt_qplib_free_hwq(res, &qp->sq.hwq); kfree(qp->sq.swq); - bnxt_qplib_free_hwq(res->pdev, &qp->rq.hwq); + bnxt_qplib_free_hwq(res, &qp->rq.hwq); kfree(qp->rq.swq); if (qp->irrq.max_elements) - bnxt_qplib_free_hwq(res->pdev, &qp->irrq); + bnxt_qplib_free_hwq(res, &qp->irrq); if (qp->orrq.max_elements) - bnxt_qplib_free_hwq(res->pdev, &qp->orrq); + bnxt_qplib_free_hwq(res, &qp->orrq); - return 0; } void *bnxt_qplib_get_qp1_sq_buf(struct bnxt_qplib_qp *qp, @@ -1008,7 +1630,7 @@ void *bnxt_qplib_get_qp1_sq_buf(struct bnxt_qplib_qp *qp, memset(sge, 0, sizeof(*sge)); if (qp->sq_hdr_buf) { - sw_prod = HWQ_CMP(sq->hwq.prod, &sq->hwq); + sw_prod = sq->swq_start; sge->addr = (dma_addr_t)(qp->sq_hdr_buf_map + sw_prod * qp->sq_hdr_buf_size); sge->lkey = 0xFFFFFFFF; @@ -1022,7 +1644,7 @@ u32 bnxt_qplib_get_rq_prod_index(struct bnxt_qplib_qp *qp) { struct bnxt_qplib_q *rq = &qp->rq; - return HWQ_CMP(rq->hwq.prod, &rq->hwq); + return rq->swq_start; } dma_addr_t bnxt_qplib_get_qp_buf_from_index(struct bnxt_qplib_qp *qp, u32 index) @@ -1039,7 +1661,7 @@ void *bnxt_qplib_get_qp1_rq_buf(struct bnxt_qplib_qp *qp, memset(sge, 0, sizeof(*sge)); if (qp->rq_hdr_buf) { - sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq); + sw_prod = rq->swq_start; sge->addr = (dma_addr_t)(qp->rq_hdr_buf_map + sw_prod * qp->rq_hdr_buf_size); sge->lkey = 0xFFFFFFFF; @@ -1049,137 +1671,298 @@ void *bnxt_qplib_get_qp1_rq_buf(struct bnxt_qplib_qp *qp, return NULL; } +/* Fil the MSN table into the next psn row */ +static void bnxt_qplib_fill_msn_search(struct bnxt_qplib_qp *qp, + struct bnxt_qplib_swqe *wqe, + struct bnxt_qplib_swq *swq) +{ + struct sq_msn_search *msns; + u32 start_psn, next_psn; + u16 start_idx; + + msns = (struct sq_msn_search *)swq->psn_search; + msns->start_idx_next_psn_start_psn = 0; + + start_psn = swq->start_psn; + next_psn = swq->next_psn; + start_idx = swq->slot_idx; + msns->start_idx_next_psn_start_psn |= + bnxt_re_update_msn_tbl(start_idx, next_psn, start_psn); + qp->msn++; + qp->msn %= qp->msn_tbl_sz; +} + +static void bnxt_qplib_fill_psn_search(struct bnxt_qplib_qp *qp, + struct bnxt_qplib_swqe *wqe, + struct bnxt_qplib_swq *swq) +{ + struct sq_psn_search_ext *psns_ext; + struct sq_psn_search *psns; + u32 flg_npsn; + u32 op_spsn; + + if (!swq->psn_search) + return; + /* Handle MSN differently on cap flags */ + if (qp->is_host_msn_tbl) { + bnxt_qplib_fill_msn_search(qp, wqe, swq); + return; + } + psns = (struct sq_psn_search *)swq->psn_search; + psns = swq->psn_search; + psns_ext = swq->psn_ext; + + op_spsn = ((swq->start_psn << SQ_PSN_SEARCH_START_PSN_SFT) & + SQ_PSN_SEARCH_START_PSN_MASK); + op_spsn |= ((wqe->type << SQ_PSN_SEARCH_OPCODE_SFT) & + SQ_PSN_SEARCH_OPCODE_MASK); + flg_npsn = ((swq->next_psn << SQ_PSN_SEARCH_NEXT_PSN_SFT) & + SQ_PSN_SEARCH_NEXT_PSN_MASK); + + if (bnxt_qplib_is_chip_gen_p5_p7(qp->cctx)) { + psns_ext->opcode_start_psn = cpu_to_le32(op_spsn); + psns_ext->flags_next_psn = cpu_to_le32(flg_npsn); + psns_ext->start_slot_idx = cpu_to_le16(swq->slot_idx); + } else { + psns->opcode_start_psn = cpu_to_le32(op_spsn); + psns->flags_next_psn = cpu_to_le32(flg_npsn); + } +} + +static unsigned int bnxt_qplib_put_inline(struct bnxt_qplib_qp *qp, + struct bnxt_qplib_swqe *wqe, + u32 *idx) +{ + struct bnxt_qplib_hwq *hwq; + int len, t_len, offt; + bool pull_dst = true; + void *il_dst = NULL; + void *il_src = NULL; + int t_cplen, cplen; + int indx; + + hwq = &qp->sq.hwq; + t_len = 0; + for (indx = 0; indx < wqe->num_sge; indx++) { + len = wqe->sg_list[indx].size; + il_src = (void *)wqe->sg_list[indx].addr; + t_len += len; + if (t_len > qp->max_inline_data) + return BNXT_RE_INVAL_MSG_SIZE; + while (len) { + if (pull_dst) { + pull_dst = false; + il_dst = bnxt_qplib_get_prod_qe(hwq, *idx); + (*idx)++; + t_cplen = 0; + offt = 0; + } + cplen = min_t(int, len, sizeof(struct sq_sge)); + cplen = min_t(int, cplen, + (sizeof(struct sq_sge) - offt)); + memcpy(il_dst, il_src, cplen); + t_cplen += cplen; + il_src += cplen; + il_dst += cplen; + offt += cplen; + len -= cplen; + if (t_cplen == sizeof(struct sq_sge)) + pull_dst = true; + } + } + + return t_len; +} + +static unsigned int bnxt_qplib_put_sges(struct bnxt_qplib_hwq *hwq, + struct bnxt_qplib_sge *ssge, + u32 nsge, u32 *idx) +{ + struct sq_sge *dsge; + int indx, len = 0; + + for (indx = 0; indx < nsge; indx++, (*idx)++) { + dsge = bnxt_qplib_get_prod_qe(hwq, *idx); + dsge->va_or_pa = cpu_to_le64(ssge[indx].addr); + dsge->l_key = cpu_to_le32(ssge[indx].lkey); + dsge->size = cpu_to_le32(ssge[indx].size); + len += ssge[indx].size; + } + + return len; +} + +static u16 bnxt_qplib_required_slots(struct bnxt_qplib_qp *qp, + struct bnxt_qplib_swqe *wqe, + u16 *wqe_sz, u16 *qdf, u8 mode) +{ + u32 ilsize, bytes; + u16 nsge; + u16 slot; + + nsge = wqe->num_sge; + /* Adding sq_send_hdr is a misnomer, for rq also hdr size is same. */ + bytes = sizeof(struct sq_send_hdr) + nsge * sizeof(struct sq_sge); + if (wqe->flags & BNXT_QPLIB_SWQE_FLAGS_INLINE) { + ilsize = bnxt_qplib_calc_ilsize(wqe, qp->max_inline_data); + bytes = ALIGN(ilsize, sizeof(struct sq_sge)); + bytes += sizeof(struct sq_send_hdr); + } + + *qdf = __xlate_qfd(qp->sq.q_full_delta, bytes); + slot = bytes >> 4; + *wqe_sz = slot; + if (mode == BNXT_QPLIB_WQE_MODE_STATIC) + slot = 8; + return slot; +} + +static void bnxt_qplib_pull_psn_buff(struct bnxt_qplib_qp *qp, struct bnxt_qplib_q *sq, + struct bnxt_qplib_swq *swq, bool hw_retx) +{ + struct bnxt_qplib_hwq *hwq; + u32 pg_num, pg_indx; + void *buff; + u32 tail; + + hwq = &sq->hwq; + if (!hwq->pad_pg) + return; + tail = swq->slot_idx / sq->dbinfo.max_slot; + if (hw_retx) { + /* For HW retx use qp msn index */ + tail = qp->msn; + tail %= qp->msn_tbl_sz; + } + pg_num = (tail + hwq->pad_pgofft) / (PAGE_SIZE / hwq->pad_stride); + pg_indx = (tail + hwq->pad_pgofft) % (PAGE_SIZE / hwq->pad_stride); + buff = (void *)(hwq->pad_pg[pg_num] + pg_indx * hwq->pad_stride); + swq->psn_ext = buff; + swq->psn_search = buff; +} + void bnxt_qplib_post_send_db(struct bnxt_qplib_qp *qp) { struct bnxt_qplib_q *sq = &qp->sq; - struct dbr_dbr db_msg = { 0 }; - u32 sw_prod; - sw_prod = HWQ_CMP(sq->hwq.prod, &sq->hwq); - - db_msg.index = cpu_to_le32((sw_prod << DBR_DBR_INDEX_SFT) & - DBR_DBR_INDEX_MASK); - db_msg.type_xid = - cpu_to_le32(((qp->id << DBR_DBR_XID_SFT) & DBR_DBR_XID_MASK) | - DBR_DBR_TYPE_SQ); - /* Flush all the WQE writes to HW */ - wmb(); - __iowrite64_copy(qp->dpi->dbr, &db_msg, sizeof(db_msg) / sizeof(u64)); + bnxt_qplib_ring_prod_db(&sq->dbinfo, DBC_DBC_TYPE_SQ); } int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, struct bnxt_qplib_swqe *wqe) { + struct bnxt_qplib_nq_work *nq_work = NULL; + int i, rc = 0, data_len = 0, pkt_num = 0; struct bnxt_qplib_q *sq = &qp->sq; + struct bnxt_qplib_hwq *hwq; struct bnxt_qplib_swq *swq; - struct sq_send *hw_sq_send_hdr, **hw_sq_send_ptr; - struct sq_sge *hw_sge; - u32 sw_prod; - u8 wqe_size16; - int i, rc = 0, data_len = 0, pkt_num = 0; + bool sch_handler = false; + u32 wqe_idx, slots, idx; + u16 wqe_sz, qdf = 0; + bool msn_update; + void *base_hdr; + void *ext_hdr; __le32 temp32; - if (qp->state != CMDQ_MODIFY_QP_NEW_STATE_RTS) { + hwq = &sq->hwq; + if (qp->state != CMDQ_MODIFY_QP_NEW_STATE_RTS && + qp->state != CMDQ_MODIFY_QP_NEW_STATE_ERR) { + dev_err(&hwq->pdev->dev, + "QPLIB: FP: QP (0x%x) is in the 0x%x state", + qp->id, qp->state); rc = -EINVAL; goto done; } - if (bnxt_qplib_queue_full(sq)) { - dev_err(&sq->hwq.pdev->dev, - "QPLIB: prod = %#x cons = %#x qdepth = %#x delta = %#x", - sq->hwq.prod, sq->hwq.cons, sq->hwq.max_elements, - sq->q_full_delta); + slots = bnxt_qplib_required_slots(qp, wqe, &wqe_sz, &qdf, qp->wqe_mode); + if (bnxt_qplib_queue_full(sq, slots + qdf)) { + dev_err(&hwq->pdev->dev, + "prod = %#x cons = %#x qdepth = %#x delta = %#x\n", + hwq->prod, hwq->cons, hwq->depth, sq->q_full_delta); rc = -ENOMEM; goto done; } - sw_prod = HWQ_CMP(sq->hwq.prod, &sq->hwq); - swq = &sq->swq[sw_prod]; + + swq = bnxt_qplib_get_swqe(sq, &wqe_idx); + bnxt_qplib_pull_psn_buff(qp, sq, swq, qp->is_host_msn_tbl); + + idx = 0; + swq->slot_idx = hwq->prod; + swq->slots = slots; swq->wr_id = wqe->wr_id; swq->type = wqe->type; swq->flags = wqe->flags; + swq->start_psn = sq->psn & BTH_PSN_MASK; if (qp->sig_type) swq->flags |= SQ_SEND_FLAGS_SIGNAL_COMP; - swq->start_psn = sq->psn & BTH_PSN_MASK; - hw_sq_send_ptr = (struct sq_send **)sq->hwq.pbl_ptr; - hw_sq_send_hdr = &hw_sq_send_ptr[get_sqe_pg(sw_prod)] - [get_sqe_idx(sw_prod)]; + if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) { + sch_handler = true; + dev_dbg(&hwq->pdev->dev, + "%s Error QP. Scheduling for poll_cq\n", __func__); + goto queue_err; + } - memset(hw_sq_send_hdr, 0, BNXT_QPLIB_MAX_SQE_ENTRY_SIZE); + base_hdr = bnxt_qplib_get_prod_qe(hwq, idx++); + ext_hdr = bnxt_qplib_get_prod_qe(hwq, idx++); + memset(base_hdr, 0, sizeof(struct sq_sge)); + memset(ext_hdr, 0, sizeof(struct sq_sge)); - if (wqe->flags & BNXT_QPLIB_SWQE_FLAGS_INLINE) { + if (wqe->flags & BNXT_QPLIB_SWQE_FLAGS_INLINE) /* Copy the inline data */ - if (wqe->inline_len > BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH) { - dev_warn(&sq->hwq.pdev->dev, - "QPLIB: Inline data length > 96 detected"); - data_len = BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH; - } else { - data_len = wqe->inline_len; - } - memcpy(hw_sq_send_hdr->data, wqe->inline_data, data_len); - wqe_size16 = (data_len + 15) >> 4; - } else { - for (i = 0, hw_sge = (struct sq_sge *)hw_sq_send_hdr->data; - i < wqe->num_sge; i++, hw_sge++) { - hw_sge->va_or_pa = cpu_to_le64(wqe->sg_list[i].addr); - hw_sge->l_key = cpu_to_le32(wqe->sg_list[i].lkey); - hw_sge->size = cpu_to_le32(wqe->sg_list[i].size); - data_len += wqe->sg_list[i].size; - } - /* Each SGE entry = 1 WQE size16 */ - wqe_size16 = wqe->num_sge; - /* HW requires wqe size has room for atleast one SGE even if - * none was supplied by ULP - */ - if (!wqe->num_sge) - wqe_size16++; + data_len = bnxt_qplib_put_inline(qp, wqe, &idx); + else + data_len = bnxt_qplib_put_sges(hwq, wqe->sg_list, wqe->num_sge, + &idx); + if (data_len > BNXT_RE_MAX_MSG_SIZE) { + rc = -EINVAL; + goto done; } - + /* Make sure we update MSN table only for wired wqes */ + msn_update = true; /* Specifics */ switch (wqe->type) { case BNXT_QPLIB_SWQE_TYPE_SEND: if (qp->type == CMDQ_CREATE_QP1_TYPE_GSI) { + struct sq_send_raweth_qp1_hdr *sqe = base_hdr; + struct sq_raw_ext_hdr *ext_sqe = ext_hdr; /* Assemble info for Raw Ethertype QPs */ - struct sq_send_raweth_qp1 *sqe = - (struct sq_send_raweth_qp1 *)hw_sq_send_hdr; sqe->wqe_type = wqe->type; sqe->flags = wqe->flags; - sqe->wqe_size = wqe_size16 + - ((offsetof(typeof(*sqe), data) + 15) >> 4); + sqe->wqe_size = wqe_sz; sqe->cfa_action = cpu_to_le16(wqe->rawqp1.cfa_action); sqe->lflags = cpu_to_le16(wqe->rawqp1.lflags); sqe->length = cpu_to_le32(data_len); - sqe->cfa_meta = cpu_to_le32((wqe->rawqp1.cfa_meta & + ext_sqe->cfa_meta = cpu_to_le32((wqe->rawqp1.cfa_meta & SQ_SEND_RAWETH_QP1_CFA_META_VLAN_VID_MASK) << SQ_SEND_RAWETH_QP1_CFA_META_VLAN_VID_SFT); break; } - /* else, just fall thru */ + fallthrough; case BNXT_QPLIB_SWQE_TYPE_SEND_WITH_IMM: case BNXT_QPLIB_SWQE_TYPE_SEND_WITH_INV: { - struct sq_send *sqe = (struct sq_send *)hw_sq_send_hdr; + struct sq_ud_ext_hdr *ext_sqe = ext_hdr; + struct sq_send_hdr *sqe = base_hdr; sqe->wqe_type = wqe->type; sqe->flags = wqe->flags; - sqe->wqe_size = wqe_size16 + - ((offsetof(typeof(*sqe), data) + 15) >> 4); - sqe->inv_key_or_imm_data = cpu_to_le32( - wqe->send.inv_key); - if (qp->type == CMDQ_CREATE_QP_TYPE_UD) { + sqe->wqe_size = wqe_sz; + sqe->inv_key_or_imm_data = cpu_to_le32(wqe->send.inv_key); + if (qp->type == CMDQ_CREATE_QP_TYPE_UD || + qp->type == CMDQ_CREATE_QP_TYPE_GSI) { sqe->q_key = cpu_to_le32(wqe->send.q_key); - sqe->dst_qp = cpu_to_le32( - wqe->send.dst_qp & SQ_SEND_DST_QP_MASK); sqe->length = cpu_to_le32(data_len); - sqe->avid = cpu_to_le32(wqe->send.avid & - SQ_SEND_AVID_MASK); sq->psn = (sq->psn + 1) & BTH_PSN_MASK; + ext_sqe->dst_qp = cpu_to_le32(wqe->send.dst_qp & + SQ_SEND_DST_QP_MASK); + ext_sqe->avid = cpu_to_le32(wqe->send.avid & + SQ_SEND_AVID_MASK); + msn_update = false; } else { sqe->length = cpu_to_le32(data_len); - sqe->dst_qp = 0; - sqe->avid = 0; if (qp->mtu) pkt_num = (data_len + qp->mtu - 1) / qp->mtu; if (!pkt_num) @@ -1192,16 +1975,16 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, case BNXT_QPLIB_SWQE_TYPE_RDMA_WRITE_WITH_IMM: case BNXT_QPLIB_SWQE_TYPE_RDMA_READ: { - struct sq_rdma *sqe = (struct sq_rdma *)hw_sq_send_hdr; + struct sq_rdma_ext_hdr *ext_sqe = ext_hdr; + struct sq_rdma_hdr *sqe = base_hdr; sqe->wqe_type = wqe->type; sqe->flags = wqe->flags; - sqe->wqe_size = wqe_size16 + - ((offsetof(typeof(*sqe), data) + 15) >> 4); + sqe->wqe_size = wqe_sz; sqe->imm_data = cpu_to_le32(wqe->rdma.inv_key); sqe->length = cpu_to_le32((u32)data_len); - sqe->remote_va = cpu_to_le64(wqe->rdma.remote_va); - sqe->remote_key = cpu_to_le32(wqe->rdma.r_key); + ext_sqe->remote_va = cpu_to_le64(wqe->rdma.remote_va); + ext_sqe->remote_key = cpu_to_le32(wqe->rdma.r_key); if (qp->mtu) pkt_num = (data_len + qp->mtu - 1) / qp->mtu; if (!pkt_num) @@ -1212,14 +1995,15 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, case BNXT_QPLIB_SWQE_TYPE_ATOMIC_CMP_AND_SWP: case BNXT_QPLIB_SWQE_TYPE_ATOMIC_FETCH_AND_ADD: { - struct sq_atomic *sqe = (struct sq_atomic *)hw_sq_send_hdr; + struct sq_atomic_ext_hdr *ext_sqe = ext_hdr; + struct sq_atomic_hdr *sqe = base_hdr; sqe->wqe_type = wqe->type; sqe->flags = wqe->flags; sqe->remote_key = cpu_to_le32(wqe->atomic.r_key); sqe->remote_va = cpu_to_le64(wqe->atomic.remote_va); - sqe->swap_data = cpu_to_le64(wqe->atomic.swap_data); - sqe->cmp_data = cpu_to_le64(wqe->atomic.cmp_data); + ext_sqe->swap_data = cpu_to_le64(wqe->atomic.swap_data); + ext_sqe->cmp_data = cpu_to_le64(wqe->atomic.cmp_data); if (qp->mtu) pkt_num = (data_len + qp->mtu - 1) / qp->mtu; if (!pkt_num) @@ -1229,18 +2013,18 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, } case BNXT_QPLIB_SWQE_TYPE_LOCAL_INV: { - struct sq_localinvalidate *sqe = - (struct sq_localinvalidate *)hw_sq_send_hdr; + struct sq_localinvalidate *sqe = base_hdr; sqe->wqe_type = wqe->type; sqe->flags = wqe->flags; sqe->inv_l_key = cpu_to_le32(wqe->local_inv.inv_l_key); - + msn_update = false; break; } case BNXT_QPLIB_SWQE_TYPE_FAST_REG_MR: { - struct sq_fr_pmr *sqe = (struct sq_fr_pmr *)hw_sq_send_hdr; + struct sq_fr_pmr_ext_hdr *ext_sqe = ext_hdr; + struct sq_fr_pmr_hdr *sqe = base_hdr; sqe->wqe_type = wqe->type; sqe->flags = wqe->flags; @@ -1264,14 +2048,16 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, wqe->frmr.pbl_ptr[i] = cpu_to_le64( wqe->frmr.page_list[i] | PTU_PTE_VALID); - sqe->pblptr = cpu_to_le64(wqe->frmr.pbl_dma_ptr); - sqe->va = cpu_to_le64(wqe->frmr.va); + ext_sqe->pblptr = cpu_to_le64(wqe->frmr.pbl_dma_ptr); + ext_sqe->va = cpu_to_le64(wqe->frmr.va); + msn_update = false; break; } case BNXT_QPLIB_SWQE_TYPE_BIND_MW: { - struct sq_bind *sqe = (struct sq_bind *)hw_sq_send_hdr; + struct sq_bind_ext_hdr *ext_sqe = ext_hdr; + struct sq_bind_hdr *sqe = base_hdr; sqe->wqe_type = wqe->type; sqe->flags = wqe->flags; @@ -1280,9 +2066,9 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, (wqe->bind.zero_based ? SQ_BIND_ZERO_BASED : 0); sqe->parent_l_key = cpu_to_le32(wqe->bind.parent_l_key); sqe->l_key = cpu_to_le32(wqe->bind.r_key); - sqe->va = cpu_to_le64(wqe->bind.va); - temp32 = cpu_to_le32(wqe->bind.length); - memcpy(&sqe->length, &temp32, sizeof(wqe->bind.length)); + ext_sqe->va = cpu_to_le64(wqe->bind.va); + ext_sqe->length_lo = cpu_to_le32(wqe->bind.length); + msn_update = false; break; } default: @@ -1290,245 +2076,323 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, rc = -EINVAL; goto done; } - swq->next_psn = sq->psn & BTH_PSN_MASK; - if (swq->psn_search) { - swq->psn_search->opcode_start_psn = cpu_to_le32( - ((swq->start_psn << SQ_PSN_SEARCH_START_PSN_SFT) & - SQ_PSN_SEARCH_START_PSN_MASK) | - ((wqe->type << SQ_PSN_SEARCH_OPCODE_SFT) & - SQ_PSN_SEARCH_OPCODE_MASK)); - swq->psn_search->flags_next_psn = cpu_to_le32( - ((swq->next_psn << SQ_PSN_SEARCH_NEXT_PSN_SFT) & - SQ_PSN_SEARCH_NEXT_PSN_MASK)); + if (!qp->is_host_msn_tbl || msn_update) { + swq->next_psn = sq->psn & BTH_PSN_MASK; + bnxt_qplib_fill_psn_search(qp, wqe, swq); } - - sq->hwq.prod++; - +queue_err: + bnxt_qplib_swq_mod_start(sq, wqe_idx); + bnxt_qplib_hwq_incr_prod(&sq->dbinfo, hwq, swq->slots); qp->wqe_cnt++; - done: + if (sch_handler) { + nq_work = kzalloc(sizeof(*nq_work), GFP_ATOMIC); + if (nq_work) { + nq_work->cq = qp->scq; + nq_work->nq = qp->scq->nq; + INIT_WORK(&nq_work->work, bnxt_qpn_cqn_sched_task); + queue_work(qp->scq->nq->cqn_wq, &nq_work->work); + } else { + dev_err(&hwq->pdev->dev, + "FP: Failed to allocate SQ nq_work!\n"); + rc = -ENOMEM; + } + } return rc; } void bnxt_qplib_post_recv_db(struct bnxt_qplib_qp *qp) { struct bnxt_qplib_q *rq = &qp->rq; - struct dbr_dbr db_msg = { 0 }; - u32 sw_prod; - sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq); - db_msg.index = cpu_to_le32((sw_prod << DBR_DBR_INDEX_SFT) & - DBR_DBR_INDEX_MASK); - db_msg.type_xid = - cpu_to_le32(((qp->id << DBR_DBR_XID_SFT) & DBR_DBR_XID_MASK) | - DBR_DBR_TYPE_RQ); - - /* Flush the writes to HW Rx WQE before the ringing Rx DB */ - wmb(); - __iowrite64_copy(qp->dpi->dbr, &db_msg, sizeof(db_msg) / sizeof(u64)); + bnxt_qplib_ring_prod_db(&rq->dbinfo, DBC_DBC_TYPE_RQ); } int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp, struct bnxt_qplib_swqe *wqe) { + struct bnxt_qplib_nq_work *nq_work = NULL; struct bnxt_qplib_q *rq = &qp->rq; - struct rq_wqe *rqe, **rqe_ptr; - struct sq_sge *hw_sge; - u32 sw_prod; - int i, rc = 0; + struct rq_wqe_hdr *base_hdr; + struct rq_ext_hdr *ext_hdr; + struct bnxt_qplib_hwq *hwq; + struct bnxt_qplib_swq *swq; + bool sch_handler = false; + u32 wqe_idx, idx; + u16 wqe_sz; + int rc = 0; - if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) { - dev_err(&rq->hwq.pdev->dev, + hwq = &rq->hwq; + if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_RESET) { + dev_err(&hwq->pdev->dev, "QPLIB: FP: QP (0x%x) is in the 0x%x state", qp->id, qp->state); rc = -EINVAL; goto done; } - if (bnxt_qplib_queue_full(rq)) { - dev_err(&rq->hwq.pdev->dev, - "QPLIB: FP: QP (0x%x) RQ is full!", qp->id); + + if (bnxt_qplib_queue_full(rq, rq->dbinfo.max_slot)) { + dev_err(&hwq->pdev->dev, + "FP: QP (0x%x) RQ is full!\n", qp->id); rc = -EINVAL; goto done; } - sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq); - rq->swq[sw_prod].wr_id = wqe->wr_id; - - rqe_ptr = (struct rq_wqe **)rq->hwq.pbl_ptr; - rqe = &rqe_ptr[RQE_PG(sw_prod)][RQE_IDX(sw_prod)]; - memset(rqe, 0, BNXT_QPLIB_MAX_RQE_ENTRY_SIZE); + swq = bnxt_qplib_get_swqe(rq, &wqe_idx); + swq->wr_id = wqe->wr_id; + swq->slots = rq->dbinfo.max_slot; - /* Calculate wqe_size16 and data_len */ - for (i = 0, hw_sge = (struct sq_sge *)rqe->data; - i < wqe->num_sge; i++, hw_sge++) { - hw_sge->va_or_pa = cpu_to_le64(wqe->sg_list[i].addr); - hw_sge->l_key = cpu_to_le32(wqe->sg_list[i].lkey); - hw_sge->size = cpu_to_le32(wqe->sg_list[i].size); + if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) { + sch_handler = true; + dev_dbg(&hwq->pdev->dev, + "%s: Error QP. Scheduling for poll_cq\n", __func__); + goto queue_err; + } + + idx = 0; + base_hdr = bnxt_qplib_get_prod_qe(hwq, idx++); + ext_hdr = bnxt_qplib_get_prod_qe(hwq, idx++); + memset(base_hdr, 0, sizeof(struct sq_sge)); + memset(ext_hdr, 0, sizeof(struct sq_sge)); + wqe_sz = (sizeof(struct rq_wqe_hdr) + + wqe->num_sge * sizeof(struct sq_sge)) >> 4; + bnxt_qplib_put_sges(hwq, wqe->sg_list, wqe->num_sge, &idx); + if (!wqe->num_sge) { + struct sq_sge *sge; + + sge = bnxt_qplib_get_prod_qe(hwq, idx++); + sge->size = 0; + wqe_sz++; + } + base_hdr->wqe_type = wqe->type; + base_hdr->flags = wqe->flags; + base_hdr->wqe_size = wqe_sz; + base_hdr->wr_id[0] = cpu_to_le32(wqe_idx); +queue_err: + bnxt_qplib_swq_mod_start(rq, wqe_idx); + bnxt_qplib_hwq_incr_prod(&rq->dbinfo, hwq, swq->slots); +done: + if (sch_handler) { + nq_work = kzalloc(sizeof(*nq_work), GFP_ATOMIC); + if (nq_work) { + nq_work->cq = qp->rcq; + nq_work->nq = qp->rcq->nq; + INIT_WORK(&nq_work->work, bnxt_qpn_cqn_sched_task); + queue_work(qp->rcq->nq->cqn_wq, &nq_work->work); + } else { + dev_err(&hwq->pdev->dev, + "FP: Failed to allocate RQ nq_work!\n"); + rc = -ENOMEM; + } } - rqe->wqe_type = wqe->type; - rqe->flags = wqe->flags; - rqe->wqe_size = wqe->num_sge + - ((offsetof(typeof(*rqe), data) + 15) >> 4); - /* HW requires wqe size has room for atleast one SGE even if none - * was supplied by ULP - */ - if (!wqe->num_sge) - rqe->wqe_size++; - /* Supply the rqe->wr_id index to the wr_id_tbl for now */ - rqe->wr_id[0] = cpu_to_le32(sw_prod); - - rq->hwq.prod++; -done: return rc; } /* CQ */ - -/* Spinlock must be held */ -static void bnxt_qplib_arm_cq_enable(struct bnxt_qplib_cq *cq) -{ - struct dbr_dbr db_msg = { 0 }; - - db_msg.type_xid = - cpu_to_le32(((cq->id << DBR_DBR_XID_SFT) & DBR_DBR_XID_MASK) | - DBR_DBR_TYPE_CQ_ARMENA); - /* Flush memory writes before enabling the CQ */ - wmb(); - __iowrite64_copy(cq->dbr_base, &db_msg, sizeof(db_msg) / sizeof(u64)); -} - -static void bnxt_qplib_arm_cq(struct bnxt_qplib_cq *cq, u32 arm_type) -{ - struct bnxt_qplib_hwq *cq_hwq = &cq->hwq; - struct dbr_dbr db_msg = { 0 }; - u32 sw_cons; - - /* Ring DB */ - sw_cons = HWQ_CMP(cq_hwq->cons, cq_hwq); - db_msg.index = cpu_to_le32((sw_cons << DBR_DBR_INDEX_SFT) & - DBR_DBR_INDEX_MASK); - db_msg.type_xid = - cpu_to_le32(((cq->id << DBR_DBR_XID_SFT) & DBR_DBR_XID_MASK) | - arm_type); - /* flush memory writes before arming the CQ */ - wmb(); - __iowrite64_copy(cq->dpi->dbr, &db_msg, sizeof(db_msg) / sizeof(u64)); -} - int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq) { struct bnxt_qplib_rcfw *rcfw = res->rcfw; - struct cmdq_create_cq req; - struct creq_create_cq_resp resp; + struct bnxt_qplib_hwq_attr hwq_attr = {}; + struct creq_create_cq_resp resp = {}; + struct bnxt_qplib_cmdqmsg msg = {}; + struct cmdq_create_cq req = {}; struct bnxt_qplib_pbl *pbl; - u16 cmd_flags = 0; + u32 coalescing = 0; + u32 pg_sz_lvl; int rc; - cq->hwq.max_elements = cq->max_wqe; - rc = bnxt_qplib_alloc_init_hwq(res->pdev, &cq->hwq, cq->sghead, - cq->nmap, &cq->hwq.max_elements, - BNXT_QPLIB_MAX_CQE_ENTRY_SIZE, 0, - PAGE_SIZE, HWQ_TYPE_QUEUE); - if (rc) - goto exit; - - RCFW_CMD_PREP(req, CREATE_CQ, cmd_flags); - if (!cq->dpi) { dev_err(&rcfw->pdev->dev, - "QPLIB: FP: CREATE_CQ failed due to NULL DPI"); + "FP: CREATE_CQ failed due to NULL DPI\n"); return -EINVAL; } + + cq->dbinfo.flags = 0; + hwq_attr.res = res; + hwq_attr.depth = cq->max_wqe; + hwq_attr.stride = sizeof(struct cq_base); + hwq_attr.type = HWQ_TYPE_QUEUE; + hwq_attr.sginfo = &cq->sg_info; + rc = bnxt_qplib_alloc_init_hwq(&cq->hwq, &hwq_attr); + if (rc) + return rc; + + bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req, + CMDQ_BASE_OPCODE_CREATE_CQ, + sizeof(req)); + req.dpi = cpu_to_le32(cq->dpi->dpi); req.cq_handle = cpu_to_le64(cq->cq_handle); + req.cq_size = cpu_to_le32(cq->max_wqe); + + if (_is_cq_coalescing_supported(res->dattr->dev_cap_flags2) && + cq->coalescing->enable) { + req.flags |= cpu_to_le16(CMDQ_CREATE_CQ_FLAGS_COALESCING_VALID); + coalescing |= ((cq->coalescing->buf_maxtime << + CMDQ_CREATE_CQ_BUF_MAXTIME_SFT) & + CMDQ_CREATE_CQ_BUF_MAXTIME_MASK); + coalescing |= ((cq->coalescing->normal_maxbuf << + CMDQ_CREATE_CQ_NORMAL_MAXBUF_SFT) & + CMDQ_CREATE_CQ_NORMAL_MAXBUF_MASK); + coalescing |= ((cq->coalescing->during_maxbuf << + CMDQ_CREATE_CQ_DURING_MAXBUF_SFT) & + CMDQ_CREATE_CQ_DURING_MAXBUF_MASK); + if (cq->coalescing->en_ring_idle_mode) + coalescing |= CMDQ_CREATE_CQ_ENABLE_RING_IDLE_MODE; + else + coalescing &= ~CMDQ_CREATE_CQ_ENABLE_RING_IDLE_MODE; + req.coalescing = cpu_to_le32(coalescing); + } - req.cq_size = cpu_to_le32(cq->hwq.max_elements); pbl = &cq->hwq.pbl[PBL_LVL_0]; - req.pg_size_lvl = cpu_to_le32( - ((cq->hwq.level & CMDQ_CREATE_CQ_LVL_MASK) << - CMDQ_CREATE_CQ_LVL_SFT) | - (pbl->pg_size == ROCE_PG_SIZE_4K ? CMDQ_CREATE_CQ_PG_SIZE_PG_4K : - pbl->pg_size == ROCE_PG_SIZE_8K ? CMDQ_CREATE_CQ_PG_SIZE_PG_8K : - pbl->pg_size == ROCE_PG_SIZE_64K ? CMDQ_CREATE_CQ_PG_SIZE_PG_64K : - pbl->pg_size == ROCE_PG_SIZE_2M ? CMDQ_CREATE_CQ_PG_SIZE_PG_2M : - pbl->pg_size == ROCE_PG_SIZE_8M ? CMDQ_CREATE_CQ_PG_SIZE_PG_8M : - pbl->pg_size == ROCE_PG_SIZE_1G ? CMDQ_CREATE_CQ_PG_SIZE_PG_1G : - CMDQ_CREATE_CQ_PG_SIZE_PG_4K)); - + pg_sz_lvl = (bnxt_qplib_base_pg_size(&cq->hwq) << + CMDQ_CREATE_CQ_PG_SIZE_SFT); + pg_sz_lvl |= (cq->hwq.level & CMDQ_CREATE_CQ_LVL_MASK); + req.pg_size_lvl = cpu_to_le32(pg_sz_lvl); req.pbl = cpu_to_le64(pbl->pg_map_arr[0]); - req.cq_fco_cnq_id = cpu_to_le32( (cq->cnq_hw_ring_id & CMDQ_CREATE_CQ_CNQ_ID_MASK) << CMDQ_CREATE_CQ_CNQ_ID_SFT); - - rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, - (void *)&resp, NULL, 0); + bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req), + sizeof(resp), 0); + rc = bnxt_qplib_rcfw_send_message(rcfw, &msg); if (rc) goto fail; cq->id = le32_to_cpu(resp.xid); - cq->dbr_base = res->dpi_tbl.dbr_bar_reg_iomem; cq->period = BNXT_QPLIB_QUEUE_START_PERIOD; init_waitqueue_head(&cq->waitq); + INIT_LIST_HEAD(&cq->sqf_head); + INIT_LIST_HEAD(&cq->rqf_head); + spin_lock_init(&cq->compl_lock); + spin_lock_init(&cq->flush_lock); + + cq->dbinfo.hwq = &cq->hwq; + cq->dbinfo.xid = cq->id; + cq->dbinfo.db = cq->dpi->dbr; + cq->dbinfo.priv_db = res->dpi_tbl.priv_db; + cq->dbinfo.flags = 0; + cq->dbinfo.toggle = 0; + + bnxt_qplib_armen_db(&cq->dbinfo, DBC_DBC_TYPE_CQ_ARMENA); - bnxt_qplib_arm_cq_enable(cq); return 0; fail: - bnxt_qplib_free_hwq(res->pdev, &cq->hwq); -exit: + bnxt_qplib_free_hwq(res, &cq->hwq); + return rc; +} + +void bnxt_qplib_resize_cq_complete(struct bnxt_qplib_res *res, + struct bnxt_qplib_cq *cq) +{ + bnxt_qplib_free_hwq(res, &cq->hwq); + memcpy(&cq->hwq, &cq->resize_hwq, sizeof(cq->hwq)); + /* Reset only the cons bit in the flags */ + cq->dbinfo.flags &= ~(1UL << BNXT_QPLIB_FLAG_EPOCH_CONS_SHIFT); +} + +int bnxt_qplib_resize_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq, + int new_cqes) +{ + struct bnxt_qplib_hwq_attr hwq_attr = {}; + struct bnxt_qplib_rcfw *rcfw = res->rcfw; + struct creq_resize_cq_resp resp = {}; + struct bnxt_qplib_cmdqmsg msg = {}; + struct cmdq_resize_cq req = {}; + struct bnxt_qplib_pbl *pbl; + u32 pg_sz, lvl, new_sz; + int rc; + + bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req, + CMDQ_BASE_OPCODE_RESIZE_CQ, + sizeof(req)); + hwq_attr.sginfo = &cq->sg_info; + hwq_attr.res = res; + hwq_attr.depth = new_cqes; + hwq_attr.stride = sizeof(struct cq_base); + hwq_attr.type = HWQ_TYPE_QUEUE; + rc = bnxt_qplib_alloc_init_hwq(&cq->resize_hwq, &hwq_attr); + if (rc) + return rc; + + req.cq_cid = cpu_to_le32(cq->id); + pbl = &cq->resize_hwq.pbl[PBL_LVL_0]; + pg_sz = bnxt_qplib_base_pg_size(&cq->resize_hwq); + lvl = (cq->resize_hwq.level << CMDQ_RESIZE_CQ_LVL_SFT) & + CMDQ_RESIZE_CQ_LVL_MASK; + new_sz = (new_cqes << CMDQ_RESIZE_CQ_NEW_CQ_SIZE_SFT) & + CMDQ_RESIZE_CQ_NEW_CQ_SIZE_MASK; + req.new_cq_size_pg_size_lvl = cpu_to_le32(new_sz | pg_sz | lvl); + req.new_pbl = cpu_to_le64(pbl->pg_map_arr[0]); + + bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req), + sizeof(resp), 0); + rc = bnxt_qplib_rcfw_send_message(rcfw, &msg); return rc; } int bnxt_qplib_destroy_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq) { struct bnxt_qplib_rcfw *rcfw = res->rcfw; - struct cmdq_destroy_cq req; - struct creq_destroy_cq_resp resp; - u16 cmd_flags = 0; + struct creq_destroy_cq_resp resp = {}; + struct bnxt_qplib_cmdqmsg msg = {}; + struct cmdq_destroy_cq req = {}; + u16 total_cnq_events; int rc; - RCFW_CMD_PREP(req, DESTROY_CQ, cmd_flags); + bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req, + CMDQ_BASE_OPCODE_DESTROY_CQ, + sizeof(req)); req.cq_cid = cpu_to_le32(cq->id); - rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, - (void *)&resp, NULL, 0); + bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req), + sizeof(resp), 0); + rc = bnxt_qplib_rcfw_send_message(rcfw, &msg); if (rc) return rc; - bnxt_qplib_free_hwq(res->pdev, &cq->hwq); + total_cnq_events = le16_to_cpu(resp.total_cnq_events); + __wait_for_all_nqes(cq, total_cnq_events); + bnxt_qplib_free_hwq(res, &cq->hwq); return 0; } static int __flush_sq(struct bnxt_qplib_q *sq, struct bnxt_qplib_qp *qp, struct bnxt_qplib_cqe **pcqe, int *budget) { - u32 sw_prod, sw_cons; struct bnxt_qplib_cqe *cqe; + u32 start, last; int rc = 0; /* Now complete all outstanding SQEs with FLUSHED_ERR */ - sw_prod = HWQ_CMP(sq->hwq.prod, &sq->hwq); + start = sq->swq_start; cqe = *pcqe; while (*budget) { - sw_cons = HWQ_CMP(sq->hwq.cons, &sq->hwq); - if (sw_cons == sw_prod) { - sq->flush_in_progress = false; + last = sq->swq_last; + if (start == last) break; + /* Skip the FENCE WQE completions */ + if (sq->swq[last].wr_id == BNXT_QPLIB_FENCE_WRID) { + bnxt_qplib_cancel_phantom_processing(qp); + goto skip_compl; } memset(cqe, 0, sizeof(*cqe)); cqe->status = CQ_REQ_STATUS_WORK_REQUEST_FLUSHED_ERR; cqe->opcode = CQ_BASE_CQE_TYPE_REQ; cqe->qp_handle = (u64)(unsigned long)qp; - cqe->wr_id = sq->swq[sw_cons].wr_id; + cqe->wr_id = sq->swq[last].wr_id; cqe->src_qp = qp->id; - cqe->type = sq->swq[sw_cons].type; + cqe->type = sq->swq[last].type; cqe++; (*budget)--; - sq->hwq.cons++; +skip_compl: + bnxt_qplib_hwq_incr_cons(sq->hwq.max_elements, &sq->hwq.cons, + sq->swq[last].slots, &sq->dbinfo.flags); + sq->swq_last = sq->swq[last].next_idx; } *pcqe = cqe; - if (!(*budget) && HWQ_CMP(sq->hwq.cons, &sq->hwq) != sw_prod) + if (!(*budget) && sq->swq_last != start) /* Out of budget */ rc = -EAGAIN; @@ -1536,55 +2400,83 @@ static int __flush_sq(struct bnxt_qplib_q *sq, struct bnxt_qplib_qp *qp, } static int __flush_rq(struct bnxt_qplib_q *rq, struct bnxt_qplib_qp *qp, - int opcode, struct bnxt_qplib_cqe **pcqe, int *budget) + struct bnxt_qplib_cqe **pcqe, int *budget) { struct bnxt_qplib_cqe *cqe; - u32 sw_prod, sw_cons; + u32 start, last; + int opcode = 0; int rc = 0; + switch (qp->type) { + case CMDQ_CREATE_QP1_TYPE_GSI: + opcode = CQ_BASE_CQE_TYPE_RES_RAWETH_QP1; + break; + case CMDQ_CREATE_QP_TYPE_RC: + opcode = CQ_BASE_CQE_TYPE_RES_RC; + break; + case CMDQ_CREATE_QP_TYPE_UD: + case CMDQ_CREATE_QP_TYPE_GSI: + opcode = CQ_BASE_CQE_TYPE_RES_UD; + break; + } + /* Flush the rest of the RQ */ - sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq); + start = rq->swq_start; cqe = *pcqe; while (*budget) { - sw_cons = HWQ_CMP(rq->hwq.cons, &rq->hwq); - if (sw_cons == sw_prod) + last = rq->swq_last; + if (last == start) break; memset(cqe, 0, sizeof(*cqe)); cqe->status = CQ_RES_RC_STATUS_WORK_REQUEST_FLUSHED_ERR; cqe->opcode = opcode; cqe->qp_handle = (unsigned long)qp; - cqe->wr_id = rq->swq[sw_cons].wr_id; + cqe->wr_id = rq->swq[last].wr_id; cqe++; (*budget)--; - rq->hwq.cons++; + bnxt_qplib_hwq_incr_cons(rq->hwq.max_elements, &rq->hwq.cons, + rq->swq[last].slots, &rq->dbinfo.flags); + rq->swq_last = rq->swq[last].next_idx; } *pcqe = cqe; - if (!*budget && HWQ_CMP(rq->hwq.cons, &rq->hwq) != sw_prod) + if (!*budget && rq->swq_last != start) /* Out of budget */ rc = -EAGAIN; return rc; } +void bnxt_qplib_mark_qp_error(void *qp_handle) +{ + struct bnxt_qplib_qp *qp = qp_handle; + + if (!qp) + return; + + /* Must block new posting of SQ and RQ */ + qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR; + bnxt_qplib_cancel_phantom_processing(qp); +} + /* Note: SQE is valid from sw_sq_cons up to cqe_sq_cons (exclusive) * CQE is track from sw_cq_cons to max_element but valid only if VALID=1 */ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq, - u32 cq_cons, u32 sw_sq_cons, u32 cqe_sq_cons) + u32 cq_cons, u32 swq_last, u32 cqe_sq_cons) { + u32 peek_sw_cq_cons, peek_sq_cons_idx, peek_flags; struct bnxt_qplib_q *sq = &qp->sq; - struct bnxt_qplib_swq *swq; - u32 peek_sw_cq_cons, peek_raw_cq_cons, peek_sq_cons_idx; - struct cq_base *peek_hwcqe, **peek_hw_cqe_ptr; struct cq_req *peek_req_hwcqe; struct bnxt_qplib_qp *peek_qp; struct bnxt_qplib_q *peek_sq; + struct bnxt_qplib_swq *swq; + struct cq_base *peek_hwcqe; int i, rc = 0; /* Normal mode */ /* Check for the psn_search marking before completing */ - swq = &sq->swq[sw_sq_cons]; + swq = &sq->swq[swq_last]; if (swq->psn_search && le32_to_cpu(swq->psn_search->flags_next_psn) & 0x80000000) { /* Unmark */ @@ -1593,29 +2485,30 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq, & ~0x80000000); dev_dbg(&cq->hwq.pdev->dev, "FP: Process Req cq_cons=0x%x qp=0x%x sq cons sw=0x%x cqe=0x%x marked!\n", - cq_cons, qp->id, sw_sq_cons, cqe_sq_cons); + cq_cons, qp->id, swq_last, cqe_sq_cons); sq->condition = true; sq->send_phantom = true; /* TODO: Only ARM if the previous SQE is ARMALL */ - bnxt_qplib_arm_cq(cq, DBR_DBR_TYPE_CQ_ARMALL); - + bnxt_qplib_ring_db(&cq->dbinfo, DBC_DBC_TYPE_CQ_ARMALL); rc = -EAGAIN; goto out; } if (sq->condition) { /* Peek at the completions */ - peek_raw_cq_cons = cq->hwq.cons; + peek_flags = cq->dbinfo.flags; peek_sw_cq_cons = cq_cons; i = cq->hwq.max_elements; while (i--) { - peek_sw_cq_cons = HWQ_CMP((peek_sw_cq_cons), &cq->hwq); - peek_hw_cqe_ptr = (struct cq_base **)cq->hwq.pbl_ptr; - peek_hwcqe = &peek_hw_cqe_ptr[CQE_PG(peek_sw_cq_cons)] - [CQE_IDX(peek_sw_cq_cons)]; + peek_hwcqe = bnxt_qplib_get_qe(&cq->hwq, + peek_sw_cq_cons, NULL); /* If the next hwcqe is VALID */ - if (CQE_CMP_VALID(peek_hwcqe, peek_raw_cq_cons, - cq->hwq.max_elements)) { + if (CQE_CMP_VALID(peek_hwcqe, peek_flags)) { + /* + * The valid test of the entry must be done first before + * reading any further. + */ + dma_rmb(); /* If the next hwcqe is a REQ */ if ((peek_hwcqe->cqe_type_toggle & CQ_BASE_CQE_TYPE_MASK) == @@ -1627,9 +2520,10 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq, le64_to_cpu (peek_req_hwcqe->qp_handle)); peek_sq = &peek_qp->sq; - peek_sq_cons_idx = HWQ_CMP(le16_to_cpu( - peek_req_hwcqe->sq_cons_idx) - 1 - , &sq->hwq); + peek_sq_cons_idx = + ((le16_to_cpu( + peek_req_hwcqe->sq_cons_idx) + - 1) % sq->max_wqe); /* If the hwcqe's sq's wr_id matches */ if (peek_sq == sq && sq->swq[peek_sq_cons_idx].wr_id == @@ -1639,7 +2533,7 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq, * comes back */ dev_dbg(&cq->hwq.pdev->dev, - "FP:Got Phantom CQE"); + "FP: Got Phantom CQE\n"); sq->condition = false; sq->single = true; rc = 0; @@ -1652,51 +2546,86 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq, rc = -EINVAL; goto out; } - peek_sw_cq_cons++; - peek_raw_cq_cons++; + bnxt_qplib_hwq_incr_cons(cq->hwq.max_elements, + &peek_sw_cq_cons, + 1, &peek_flags); } dev_err(&cq->hwq.pdev->dev, - "Should not have come here! cq_cons=0x%x qp=0x%x sq cons sw=0x%x hw=0x%x", - cq_cons, qp->id, sw_sq_cons, cqe_sq_cons); + "Should not have come here! cq_cons=0x%x qp=0x%x sq cons sw=0x%x hw=0x%x\n", + cq_cons, qp->id, swq_last, cqe_sq_cons); rc = -EINVAL; } out: return rc; } +static int bnxt_qplib_get_cqe_sq_cons(struct bnxt_qplib_q *sq, u32 cqe_slot) +{ + struct bnxt_qplib_hwq *sq_hwq; + struct bnxt_qplib_swq *swq; + int cqe_sq_cons = -1; + u32 start, last; + + sq_hwq = &sq->hwq; + + start = sq->swq_start; + last = sq->swq_last; + + while (last != start) { + swq = &sq->swq[last]; + if (swq->slot_idx == cqe_slot) { + cqe_sq_cons = swq->next_idx; + dev_err(&sq_hwq->pdev->dev, "%s: Found cons wqe = %d slot = %d\n", + __func__, cqe_sq_cons, cqe_slot); + break; + } + + last = swq->next_idx; + } + return cqe_sq_cons; +} + static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq, struct cq_req *hwcqe, struct bnxt_qplib_cqe **pcqe, int *budget, u32 cq_cons, struct bnxt_qplib_qp **lib_qp) { + struct bnxt_qplib_swq *swq; + struct bnxt_qplib_cqe *cqe; + u32 cqe_sq_cons, slot_num; struct bnxt_qplib_qp *qp; struct bnxt_qplib_q *sq; - struct bnxt_qplib_cqe *cqe; - u32 sw_sq_cons, cqe_sq_cons; - struct bnxt_qplib_swq *swq; + int cqe_cons; int rc = 0; qp = (struct bnxt_qplib_qp *)((unsigned long) le64_to_cpu(hwcqe->qp_handle)); if (!qp) { dev_err(&cq->hwq.pdev->dev, - "QPLIB: FP: Process Req qp is NULL"); + "FP: Process Req qp is NULL\n"); return -EINVAL; } sq = &qp->sq; - cqe_sq_cons = HWQ_CMP(le16_to_cpu(hwcqe->sq_cons_idx), &sq->hwq); - if (cqe_sq_cons > sq->hwq.max_elements) { - dev_err(&cq->hwq.pdev->dev, - "QPLIB: FP: CQ Process req reported "); - dev_err(&cq->hwq.pdev->dev, - "QPLIB: sq_cons_idx 0x%x which exceeded max 0x%x", - cqe_sq_cons, sq->hwq.max_elements); - return -EINVAL; + cqe_sq_cons = le16_to_cpu(hwcqe->sq_cons_idx) % sq->max_sw_wqe; + if (qp->sq.flushed) { + dev_dbg(&cq->hwq.pdev->dev, + "%s: QP in Flush QP = %p\n", __func__, qp); + goto done; + } + + if (__is_err_cqe_for_var_wqe(qp, hwcqe->status)) { + slot_num = le16_to_cpu(hwcqe->sq_cons_idx); + cqe_cons = bnxt_qplib_get_cqe_sq_cons(sq, slot_num); + if (cqe_cons < 0) { + dev_err(&cq->hwq.pdev->dev, "%s: Wrong SQ cons cqe_slot_indx = %d\n", + __func__, slot_num); + goto done; + } + cqe_sq_cons = cqe_cons; + dev_err(&cq->hwq.pdev->dev, "%s: cqe_sq_cons = %d swq_last = %d swq_start = %d\n", + __func__, cqe_sq_cons, sq->swq_last, sq->swq_start); } - /* If we were in the middle of flushing the SQ, continue */ - if (sq->flush_in_progress) - goto flush; /* Require to walk the sq's swq to fabricate CQEs for all previously * signaled SWQEs due to CQE aggregation from the current sq cons @@ -1704,12 +2633,11 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq, */ cqe = *pcqe; while (*budget) { - sw_sq_cons = HWQ_CMP(sq->hwq.cons, &sq->hwq); - if (sw_sq_cons == cqe_sq_cons) + if (sq->swq_last == cqe_sq_cons) /* Done */ break; - swq = &sq->swq[sw_sq_cons]; + swq = &sq->swq[sq->swq_last]; memset(cqe, 0, sizeof(*cqe)); cqe->opcode = CQ_BASE_CQE_TYPE_REQ; cqe->qp_handle = (u64)(unsigned long)qp; @@ -1723,42 +2651,42 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq, * of the request being signaled or not, it must complete with * the hwcqe error status */ - if (HWQ_CMP((sw_sq_cons + 1), &sq->hwq) == cqe_sq_cons && + if (swq->next_idx == cqe_sq_cons && hwcqe->status != CQ_REQ_STATUS_OK) { cqe->status = hwcqe->status; dev_err(&cq->hwq.pdev->dev, - "QPLIB: FP: CQ Processed Req "); - dev_err(&cq->hwq.pdev->dev, - "QPLIB: wr_id[%d] = 0x%llx with status 0x%x", - sw_sq_cons, cqe->wr_id, cqe->status); + "FP: CQ Processed Req wr_id[%d] = 0x%llx with status 0x%x\n", + sq->swq_last, cqe->wr_id, cqe->status); cqe++; (*budget)--; - sq->flush_in_progress = true; - /* Must block new posting of SQ and RQ */ - qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR; - sq->condition = false; - sq->single = false; + bnxt_qplib_mark_qp_error(qp); + /* Add qp to flush list of the CQ */ + bnxt_qplib_add_flush_qp(qp); } else { - if (swq->flags & SQ_SEND_FLAGS_SIGNAL_COMP) { - /* Before we complete, do WA 9060 */ - if (do_wa9060(qp, cq, cq_cons, sw_sq_cons, + /* Before we complete, do WA 9060 */ + if (!bnxt_qplib_is_chip_gen_p5_p7(qp->cctx)) { + if (do_wa9060(qp, cq, cq_cons, sq->swq_last, cqe_sq_cons)) { *lib_qp = qp; goto out; } + } + if (swq->flags & SQ_SEND_FLAGS_SIGNAL_COMP) { cqe->status = CQ_REQ_STATUS_OK; cqe++; (*budget)--; } } skip: - sq->hwq.cons++; + bnxt_qplib_hwq_incr_cons(sq->hwq.max_elements, &sq->hwq.cons, + swq->slots, &sq->dbinfo.flags); + sq->swq_last = swq->next_idx; if (sq->single) break; } out: *pcqe = cqe; - if (HWQ_CMP(sq->hwq.cons, &sq->hwq) != cqe_sq_cons) { + if (sq->swq_last != cqe_sq_cons) { /* Out of budget */ rc = -EAGAIN; goto done; @@ -1768,36 +2696,44 @@ out: * the WC for this CQE */ sq->single = false; - if (!sq->flush_in_progress) - goto done; -flush: - /* Require to walk the sq's swq to fabricate CQEs for all - * previously posted SWQEs due to the error CQE received - */ - rc = __flush_sq(sq, qp, pcqe, budget); - if (!rc) - sq->flush_in_progress = false; done: return rc; } +static void bnxt_qplib_release_srqe(struct bnxt_qplib_srq *srq, u32 tag) +{ + spin_lock(&srq->hwq.lock); + srq->swq[srq->last_idx].next_idx = (int)tag; + srq->last_idx = (int)tag; + srq->swq[srq->last_idx].next_idx = -1; + bnxt_qplib_hwq_incr_cons(srq->hwq.max_elements, &srq->hwq.cons, + srq->dbinfo.max_slot, &srq->dbinfo.flags); + spin_unlock(&srq->hwq.lock); +} + static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq, struct cq_res_rc *hwcqe, struct bnxt_qplib_cqe **pcqe, int *budget) { + struct bnxt_qplib_srq *srq; + struct bnxt_qplib_cqe *cqe; struct bnxt_qplib_qp *qp; struct bnxt_qplib_q *rq; - struct bnxt_qplib_cqe *cqe; u32 wr_id_idx; - int rc = 0; qp = (struct bnxt_qplib_qp *)((unsigned long) le64_to_cpu(hwcqe->qp_handle)); if (!qp) { - dev_err(&cq->hwq.pdev->dev, "QPLIB: process_cq RC qp is NULL"); + dev_err(&cq->hwq.pdev->dev, "process_cq RC qp is NULL\n"); return -EINVAL; } + if (qp->rq.flushed) { + dev_dbg(&cq->hwq.pdev->dev, + "%s: QP in Flush QP = %p\n", __func__, qp); + return 0; + } + cqe = *pcqe; cqe->opcode = hwcqe->cqe_type_toggle & CQ_BASE_CQE_TYPE_MASK; cqe->length = le32_to_cpu(hwcqe->length); @@ -1809,31 +2745,50 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq, wr_id_idx = le32_to_cpu(hwcqe->srq_or_rq_wr_id) & CQ_RES_RC_SRQ_OR_RQ_WR_ID_MASK; - rq = &qp->rq; - if (wr_id_idx > rq->hwq.max_elements) { - dev_err(&cq->hwq.pdev->dev, "QPLIB: FP: CQ Process RC "); - dev_err(&cq->hwq.pdev->dev, - "QPLIB: wr_id idx 0x%x exceeded RQ max 0x%x", - wr_id_idx, rq->hwq.max_elements); - return -EINVAL; - } - if (rq->flush_in_progress) - goto flush_rq; + if (cqe->flags & CQ_RES_RC_FLAGS_SRQ_SRQ) { + srq = qp->srq; + if (!srq) + return -EINVAL; + if (wr_id_idx >= srq->hwq.max_elements) { + dev_err(&cq->hwq.pdev->dev, + "FP: CQ Process RC wr_id idx 0x%x exceeded SRQ max 0x%x\n", + wr_id_idx, srq->hwq.max_elements); + return -EINVAL; + } + cqe->wr_id = srq->swq[wr_id_idx].wr_id; + bnxt_qplib_release_srqe(srq, wr_id_idx); + cqe++; + (*budget)--; + *pcqe = cqe; + } else { + struct bnxt_qplib_swq *swq; - cqe->wr_id = rq->swq[wr_id_idx].wr_id; - cqe++; - (*budget)--; - rq->hwq.cons++; - *pcqe = cqe; + rq = &qp->rq; + if (wr_id_idx > (rq->max_wqe - 1)) { + dev_err(&cq->hwq.pdev->dev, + "FP: CQ Process RC wr_id idx 0x%x exceeded RQ max 0x%x\n", + wr_id_idx, rq->max_wqe); + return -EINVAL; + } + if (wr_id_idx != rq->swq_last) + return -EINVAL; + swq = &rq->swq[rq->swq_last]; + cqe->wr_id = swq->wr_id; + cqe++; + (*budget)--; + bnxt_qplib_hwq_incr_cons(rq->hwq.max_elements, &rq->hwq.cons, + swq->slots, &rq->dbinfo.flags); + rq->swq_last = swq->next_idx; + *pcqe = cqe; - if (hwcqe->status != CQ_RES_RC_STATUS_OK) { - rq->flush_in_progress = true; -flush_rq: - rc = __flush_rq(rq, qp, CQ_BASE_CQE_TYPE_RES_RC, pcqe, budget); - if (!rc) - rq->flush_in_progress = false; + if (hwcqe->status != CQ_RES_RC_STATUS_OK) { + qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR; + /* Add qp to flush list of the CQ */ + bnxt_qplib_add_flush_qp(qp); + } } - return rc; + + return 0; } static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq, @@ -1841,26 +2796,33 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe **pcqe, int *budget) { + struct bnxt_qplib_srq *srq; + struct bnxt_qplib_cqe *cqe; struct bnxt_qplib_qp *qp; struct bnxt_qplib_q *rq; - struct bnxt_qplib_cqe *cqe; u32 wr_id_idx; - int rc = 0; qp = (struct bnxt_qplib_qp *)((unsigned long) le64_to_cpu(hwcqe->qp_handle)); if (!qp) { - dev_err(&cq->hwq.pdev->dev, "QPLIB: process_cq UD qp is NULL"); + dev_err(&cq->hwq.pdev->dev, "process_cq UD qp is NULL\n"); return -EINVAL; } + if (qp->rq.flushed) { + dev_dbg(&cq->hwq.pdev->dev, + "%s: QP in Flush QP = %p\n", __func__, qp); + return 0; + } cqe = *pcqe; cqe->opcode = hwcqe->cqe_type_toggle & CQ_BASE_CQE_TYPE_MASK; - cqe->length = le32_to_cpu(hwcqe->length); + cqe->length = le16_to_cpu(hwcqe->length) & CQ_RES_UD_LENGTH_MASK; + cqe->cfa_meta = le16_to_cpu(hwcqe->cfa_metadata); cqe->invrkey = le32_to_cpu(hwcqe->imm_data); cqe->flags = le16_to_cpu(hwcqe->flags); cqe->status = hwcqe->status; cqe->qp_handle = (u64)(unsigned long)qp; - memcpy(cqe->smac, hwcqe->src_mac, 6); + /*FIXME: Endianness fix needed for smace */ + memcpy(cqe->smac, hwcqe->src_mac, ETH_ALEN); wr_id_idx = le32_to_cpu(hwcqe->src_qp_high_srq_or_rq_wr_id) & CQ_RES_UD_SRQ_OR_RQ_WR_ID_MASK; cqe->src_qp = le16_to_cpu(hwcqe->src_qp_low) | @@ -1868,49 +2830,62 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq, hwcqe->src_qp_high_srq_or_rq_wr_id) & CQ_RES_UD_SRC_QP_HIGH_MASK) >> 8); - rq = &qp->rq; - if (wr_id_idx > rq->hwq.max_elements) { - dev_err(&cq->hwq.pdev->dev, "QPLIB: FP: CQ Process UD "); - dev_err(&cq->hwq.pdev->dev, - "QPLIB: wr_id idx %#x exceeded RQ max %#x", - wr_id_idx, rq->hwq.max_elements); - return -EINVAL; - } - if (rq->flush_in_progress) - goto flush_rq; + if (cqe->flags & CQ_RES_RC_FLAGS_SRQ_SRQ) { + srq = qp->srq; + if (!srq) + return -EINVAL; - cqe->wr_id = rq->swq[wr_id_idx].wr_id; - cqe++; - (*budget)--; - rq->hwq.cons++; - *pcqe = cqe; + if (wr_id_idx >= srq->hwq.max_elements) { + dev_err(&cq->hwq.pdev->dev, + "FP: CQ Process UD wr_id idx 0x%x exceeded SRQ max 0x%x\n", + wr_id_idx, srq->hwq.max_elements); + return -EINVAL; + } + cqe->wr_id = srq->swq[wr_id_idx].wr_id; + bnxt_qplib_release_srqe(srq, wr_id_idx); + cqe++; + (*budget)--; + *pcqe = cqe; + } else { + struct bnxt_qplib_swq *swq; + + rq = &qp->rq; + if (wr_id_idx > (rq->max_wqe - 1)) { + dev_err(&cq->hwq.pdev->dev, + "FP: CQ Process UD wr_id idx 0x%x exceeded RQ max 0x%x\n", + wr_id_idx, rq->max_wqe); + return -EINVAL; + } + + if (rq->swq_last != wr_id_idx) + return -EINVAL; + swq = &rq->swq[rq->swq_last]; + cqe->wr_id = swq->wr_id; + cqe++; + (*budget)--; + bnxt_qplib_hwq_incr_cons(rq->hwq.max_elements, &rq->hwq.cons, + swq->slots, &rq->dbinfo.flags); + rq->swq_last = swq->next_idx; + *pcqe = cqe; - if (hwcqe->status != CQ_RES_RC_STATUS_OK) { - rq->flush_in_progress = true; -flush_rq: - rc = __flush_rq(rq, qp, CQ_BASE_CQE_TYPE_RES_UD, pcqe, budget); - if (!rc) - rq->flush_in_progress = false; + if (hwcqe->status != CQ_RES_RC_STATUS_OK) { + qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR; + /* Add qp to flush list of the CQ */ + bnxt_qplib_add_flush_qp(qp); + } } - return rc; + + return 0; } bool bnxt_qplib_is_cq_empty(struct bnxt_qplib_cq *cq) { - struct cq_base *hw_cqe, **hw_cqe_ptr; - unsigned long flags; - u32 sw_cons, raw_cons; + struct cq_base *hw_cqe; bool rc = true; - spin_lock_irqsave(&cq->hwq.lock, flags); - raw_cons = cq->hwq.cons; - sw_cons = HWQ_CMP(raw_cons, &cq->hwq); - hw_cqe_ptr = (struct cq_base **)cq->hwq.pbl_ptr; - hw_cqe = &hw_cqe_ptr[CQE_PG(sw_cons)][CQE_IDX(sw_cons)]; - + hw_cqe = bnxt_qplib_get_qe(&cq->hwq, cq->hwq.cons, NULL); /* Check for Valid bit. If the CQE is valid, return false */ - rc = !CQE_CMP_VALID(hw_cqe, raw_cons, cq->hwq.max_elements); - spin_unlock_irqrestore(&cq->hwq.lock, flags); + rc = !CQE_CMP_VALID(hw_cqe, cq->dbinfo.flags); return rc; } @@ -1921,17 +2896,21 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq, { struct bnxt_qplib_qp *qp; struct bnxt_qplib_q *rq; + struct bnxt_qplib_srq *srq; struct bnxt_qplib_cqe *cqe; u32 wr_id_idx; - int rc = 0; qp = (struct bnxt_qplib_qp *)((unsigned long) le64_to_cpu(hwcqe->qp_handle)); if (!qp) { - dev_err(&cq->hwq.pdev->dev, - "QPLIB: process_cq Raw/QP1 qp is NULL"); + dev_err(&cq->hwq.pdev->dev, "process_cq Raw/QP1 qp is NULL\n"); return -EINVAL; } + if (qp->rq.flushed) { + dev_dbg(&cq->hwq.pdev->dev, + "%s: QP in Flush QP = %p\n", __func__, qp); + return 0; + } cqe = *pcqe; cqe->opcode = hwcqe->cqe_type_toggle & CQ_BASE_CQE_TYPE_MASK; cqe->flags = le16_to_cpu(hwcqe->flags); @@ -1952,32 +2931,55 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq, cqe->raweth_qp1_flags = le16_to_cpu(hwcqe->raweth_qp1_flags); cqe->raweth_qp1_flags2 = le32_to_cpu(hwcqe->raweth_qp1_flags2); + cqe->raweth_qp1_metadata = le32_to_cpu(hwcqe->raweth_qp1_metadata); - rq = &qp->rq; - if (wr_id_idx > rq->hwq.max_elements) { - dev_err(&cq->hwq.pdev->dev, "QPLIB: FP: CQ Process Raw/QP1 RQ wr_id "); - dev_err(&cq->hwq.pdev->dev, "QPLIB: ix 0x%x exceeded RQ max 0x%x", - wr_id_idx, rq->hwq.max_elements); - return -EINVAL; - } - if (rq->flush_in_progress) - goto flush_rq; + if (cqe->flags & CQ_RES_RAWETH_QP1_FLAGS_SRQ_SRQ) { + srq = qp->srq; + if (!srq) { + dev_err(&cq->hwq.pdev->dev, + "FP: SRQ used but not defined??\n"); + return -EINVAL; + } + if (wr_id_idx >= srq->hwq.max_elements) { + dev_err(&cq->hwq.pdev->dev, + "FP: CQ Process Raw/QP1 wr_id idx 0x%x exceeded SRQ max 0x%x\n", + wr_id_idx, srq->hwq.max_elements); + return -EINVAL; + } + cqe->wr_id = srq->swq[wr_id_idx].wr_id; + bnxt_qplib_release_srqe(srq, wr_id_idx); + cqe++; + (*budget)--; + *pcqe = cqe; + } else { + struct bnxt_qplib_swq *swq; - cqe->wr_id = rq->swq[wr_id_idx].wr_id; - cqe++; - (*budget)--; - rq->hwq.cons++; - *pcqe = cqe; + rq = &qp->rq; + if (wr_id_idx > (rq->max_wqe - 1)) { + dev_err(&cq->hwq.pdev->dev, + "FP: CQ Process Raw/QP1 RQ wr_id idx 0x%x exceeded RQ max 0x%x\n", + wr_id_idx, rq->max_wqe); + return -EINVAL; + } + if (rq->swq_last != wr_id_idx) + return -EINVAL; + swq = &rq->swq[rq->swq_last]; + cqe->wr_id = swq->wr_id; + cqe++; + (*budget)--; + bnxt_qplib_hwq_incr_cons(rq->hwq.max_elements, &rq->hwq.cons, + swq->slots, &rq->dbinfo.flags); + rq->swq_last = swq->next_idx; + *pcqe = cqe; - if (hwcqe->status != CQ_RES_RC_STATUS_OK) { - rq->flush_in_progress = true; -flush_rq: - rc = __flush_rq(rq, qp, CQ_BASE_CQE_TYPE_RES_RAWETH_QP1, pcqe, - budget); - if (!rc) - rq->flush_in_progress = false; + if (hwcqe->status != CQ_RES_RC_STATUS_OK) { + qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR; + /* Add qp to flush list of the CQ */ + bnxt_qplib_add_flush_qp(qp); + } } - return rc; + + return 0; } static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq, @@ -1988,23 +2990,20 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq, struct bnxt_qplib_qp *qp; struct bnxt_qplib_q *sq, *rq; struct bnxt_qplib_cqe *cqe; - u32 sw_cons = 0, cqe_cons; + u32 swq_last = 0, cqe_cons; int rc = 0; - u8 opcode = 0; /* Check the Status */ if (hwcqe->status != CQ_TERMINAL_STATUS_OK) dev_warn(&cq->hwq.pdev->dev, - "QPLIB: FP: CQ Process Terminal Error status = 0x%x", + "FP: CQ Process Terminal Error status = 0x%x\n", hwcqe->status); qp = (struct bnxt_qplib_qp *)((unsigned long) le64_to_cpu(hwcqe->qp_handle)); - if (!qp) { - dev_err(&cq->hwq.pdev->dev, - "QPLIB: FP: CQ Process terminal qp is NULL"); + if (!qp) return -EINVAL; - } + /* Must block new posting of SQ and RQ */ qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR; @@ -2014,18 +3013,13 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq, cqe_cons = le16_to_cpu(hwcqe->sq_cons_idx); if (cqe_cons == 0xFFFF) goto do_rq; + cqe_cons %= sq->max_sw_wqe; - if (cqe_cons > sq->hwq.max_elements) { - dev_err(&cq->hwq.pdev->dev, - "QPLIB: FP: CQ Process terminal reported "); - dev_err(&cq->hwq.pdev->dev, - "QPLIB: sq_cons_idx 0x%x which exceeded max 0x%x", - cqe_cons, sq->hwq.max_elements); - goto do_rq; + if (qp->sq.flushed) { + dev_dbg(&cq->hwq.pdev->dev, + "%s: QP in Flush QP = %p\n", __func__, qp); + goto sq_done; } - /* If we were in the middle of flushing, continue */ - if (sq->flush_in_progress) - goto flush_sq; /* Terminal CQE can also include aggregated successful CQEs prior. * So we must complete all CQEs from the current sq's cons to the @@ -2033,33 +3027,30 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq, */ cqe = *pcqe; while (*budget) { - sw_cons = HWQ_CMP(sq->hwq.cons, &sq->hwq); - if (sw_cons == cqe_cons) + swq_last = sq->swq_last; + if (swq_last == cqe_cons) break; - if (sq->swq[sw_cons].flags & SQ_SEND_FLAGS_SIGNAL_COMP) { + if (sq->swq[swq_last].flags & SQ_SEND_FLAGS_SIGNAL_COMP) { memset(cqe, 0, sizeof(*cqe)); cqe->status = CQ_REQ_STATUS_OK; cqe->opcode = CQ_BASE_CQE_TYPE_REQ; cqe->qp_handle = (u64)(unsigned long)qp; cqe->src_qp = qp->id; - cqe->wr_id = sq->swq[sw_cons].wr_id; - cqe->type = sq->swq[sw_cons].type; + cqe->wr_id = sq->swq[swq_last].wr_id; + cqe->type = sq->swq[swq_last].type; cqe++; (*budget)--; } - sq->hwq.cons++; + bnxt_qplib_hwq_incr_cons(sq->hwq.max_elements, &sq->hwq.cons, + sq->swq[swq_last].slots, &sq->dbinfo.flags); + sq->swq_last = sq->swq[swq_last].next_idx; } *pcqe = cqe; - if (!(*budget) && sw_cons != cqe_cons) { + if (!(*budget) && swq_last != cqe_cons) { /* Out of budget */ rc = -EAGAIN; goto sq_done; } - sq->flush_in_progress = true; -flush_sq: - rc = __flush_sq(sq, qp, pcqe, budget); - if (!rc) - sq->flush_in_progress = false; sq_done: if (rc) return rc; @@ -2067,34 +3058,28 @@ do_rq: cqe_cons = le16_to_cpu(hwcqe->rq_cons_idx); if (cqe_cons == 0xFFFF) { goto done; - } else if (cqe_cons > rq->hwq.max_elements) { - dev_err(&cq->hwq.pdev->dev, - "QPLIB: FP: CQ Processed terminal "); + } else if (cqe_cons > rq->max_wqe - 1) { dev_err(&cq->hwq.pdev->dev, - "QPLIB: reported rq_cons_idx 0x%x exceeds max 0x%x", - cqe_cons, rq->hwq.max_elements); + "FP: CQ Processed terminal reported rq_cons_idx 0x%x exceeds max 0x%x\n", + cqe_cons, rq->max_wqe); + rc = -EINVAL; goto done; } + + if (qp->rq.flushed) { + dev_dbg(&cq->hwq.pdev->dev, + "%s: QP in Flush QP = %p\n", __func__, qp); + rc = 0; + goto done; + } + /* Terminal CQE requires all posted RQEs to complete with FLUSHED_ERR * from the current rq->cons to the rq->prod regardless what the * rq->cons the terminal CQE indicates */ - rq->flush_in_progress = true; - switch (qp->type) { - case CMDQ_CREATE_QP1_TYPE_GSI: - opcode = CQ_BASE_CQE_TYPE_RES_RAWETH_QP1; - break; - case CMDQ_CREATE_QP_TYPE_RC: - opcode = CQ_BASE_CQE_TYPE_RES_RC; - break; - case CMDQ_CREATE_QP_TYPE_UD: - opcode = CQ_BASE_CQE_TYPE_RES_UD; - break; - } - rc = __flush_rq(rq, qp, opcode, pcqe, budget); - if (!rc) - rq->flush_in_progress = false; + /* Add qp to flush list of the CQ */ + bnxt_qplib_add_flush_qp(qp); done: return rc; } @@ -2105,7 +3090,7 @@ static int bnxt_qplib_cq_process_cutoff(struct bnxt_qplib_cq *cq, /* Check the Status */ if (hwcqe->status != CQ_CUTOFF_STATUS_OK) { dev_err(&cq->hwq.pdev->dev, - "QPLIB: FP: CQ Process Cutoff Error status = 0x%x", + "FP: CQ Process Cutoff Error status = 0x%x\n", hwcqe->status); return -EINVAL; } @@ -2115,34 +3100,59 @@ static int bnxt_qplib_cq_process_cutoff(struct bnxt_qplib_cq *cq, return 0; } +int bnxt_qplib_process_flush_list(struct bnxt_qplib_cq *cq, + struct bnxt_qplib_cqe *cqe, + int num_cqes) +{ + struct bnxt_qplib_qp *qp = NULL; + u32 budget = num_cqes; + unsigned long flags; + + spin_lock_irqsave(&cq->flush_lock, flags); + list_for_each_entry(qp, &cq->sqf_head, sq_flush) { + dev_dbg(&cq->hwq.pdev->dev, "FP: Flushing SQ QP= %p\n", qp); + __flush_sq(&qp->sq, qp, &cqe, &budget); + } + + list_for_each_entry(qp, &cq->rqf_head, rq_flush) { + dev_dbg(&cq->hwq.pdev->dev, "FP: Flushing RQ QP= %p\n", qp); + __flush_rq(&qp->rq, qp, &cqe, &budget); + } + spin_unlock_irqrestore(&cq->flush_lock, flags); + + return num_cqes - budget; +} + int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe, int num_cqes, struct bnxt_qplib_qp **lib_qp) { - struct cq_base *hw_cqe, **hw_cqe_ptr; - unsigned long flags; - u32 sw_cons, raw_cons; + struct cq_base *hw_cqe; int budget, rc = 0; + u32 hw_polled = 0; + u8 type; - spin_lock_irqsave(&cq->hwq.lock, flags); - raw_cons = cq->hwq.cons; budget = num_cqes; while (budget) { - sw_cons = HWQ_CMP(raw_cons, &cq->hwq); - hw_cqe_ptr = (struct cq_base **)cq->hwq.pbl_ptr; - hw_cqe = &hw_cqe_ptr[CQE_PG(sw_cons)][CQE_IDX(sw_cons)]; + hw_cqe = bnxt_qplib_get_qe(&cq->hwq, cq->hwq.cons, NULL); /* Check for Valid bit */ - if (!CQE_CMP_VALID(hw_cqe, raw_cons, cq->hwq.max_elements)) + if (!CQE_CMP_VALID(hw_cqe, cq->dbinfo.flags)) break; + /* + * The valid test of the entry must be done first before + * reading any further. + */ + dma_rmb(); /* From the device's respective CQE format to qplib_wc*/ - switch (hw_cqe->cqe_type_toggle & CQ_BASE_CQE_TYPE_MASK) { + type = hw_cqe->cqe_type_toggle & CQ_BASE_CQE_TYPE_MASK; + switch (type) { case CQ_BASE_CQE_TYPE_REQ: rc = bnxt_qplib_cq_process_req(cq, (struct cq_req *)hw_cqe, &cqe, &budget, - sw_cons, lib_qp); + cq->hwq.cons, lib_qp); break; case CQ_BASE_CQE_TYPE_RES_RC: rc = bnxt_qplib_cq_process_res_rc(cq, @@ -2172,7 +3182,7 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe, goto exit; default: dev_err(&cq->hwq.pdev->dev, - "QPLIB: process_cq unknown type 0x%lx", + "process_cq unknown type 0x%lx\n", hw_cqe->cqe_type_toggle & CQ_BASE_CQE_TYPE_MASK); rc = -EINVAL; @@ -2184,27 +3194,33 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe, /* Error while processing the CQE, just skip to the * next one */ - dev_err(&cq->hwq.pdev->dev, - "QPLIB: process_cqe error rc = 0x%x", rc); + if (type != CQ_BASE_CQE_TYPE_TERMINAL) + dev_err(&cq->hwq.pdev->dev, + "process_cqe error rc = 0x%x\n", rc); } - raw_cons++; - } - if (cq->hwq.cons != raw_cons) { - cq->hwq.cons = raw_cons; - bnxt_qplib_arm_cq(cq, DBR_DBR_TYPE_CQ); + hw_polled++; + bnxt_qplib_hwq_incr_cons(cq->hwq.max_elements, &cq->hwq.cons, + 1, &cq->dbinfo.flags); + } + if (hw_polled) + bnxt_qplib_ring_db(&cq->dbinfo, DBC_DBC_TYPE_CQ); exit: - spin_unlock_irqrestore(&cq->hwq.lock, flags); return num_cqes - budget; } void bnxt_qplib_req_notify_cq(struct bnxt_qplib_cq *cq, u32 arm_type) { - unsigned long flags; - - spin_lock_irqsave(&cq->hwq.lock, flags); + cq->dbinfo.toggle = cq->toggle; if (arm_type) - bnxt_qplib_arm_cq(cq, arm_type); + bnxt_qplib_ring_db(&cq->dbinfo, arm_type); + /* Using cq->arm_state variable to track whether to issue cq handler */ + atomic_set(&cq->arm_state, 1); +} - spin_unlock_irqrestore(&cq->hwq.lock, flags); +void bnxt_qplib_flush_cqn_wq(struct bnxt_qplib_qp *qp) +{ + flush_workqueue(qp->scq->nq->cqn_wq); + if (qp->scq != qp->rcq) + flush_workqueue(qp->rcq->nq->cqn_wq); } |
