From 82a979265638c505e12fbe7ba40980dc0901436d Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Thu, 1 Feb 2018 10:43:42 -0800 Subject: IB/hfi1: Re-order IRQ cleanup to address driver cleanup race The pci_request_irq() interfaces always adds the IRQF_SHARED bit to all IRQ requests. When the kernel is built with CONFIG_DEBUG_SHIRQ config flag, if the IRQF_SHARED bit is set, a call to the IRQ handler is made from the __free_irq() function. This is testing a race condition between the IRQ cleanup and an IRQ racing the cleanup. The HFI driver should be able to handle this race, but does not. This race can cause traces that start with this footprint: BUG: unable to handle kernel NULL pointer dereference at (null) Call Trace: ... __free_irq+0x1b3/0x2d0 free_irq+0x35/0x70 pci_free_irq+0x1c/0x30 clean_up_interrupts+0x53/0xf0 [hfi1] hfi1_start_cleanup+0x122/0x190 [hfi1] postinit_cleanup+0x1d/0x280 [hfi1] remove_one+0x233/0x250 [hfi1] pci_device_remove+0x39/0xc0 Export IRQ cleanup function so it can be called from other modules. Using the exported cleanup function: Re-order the driver cleanup code to clean up IRQ resources before other resources, eliminating the race. Re-order error path for init so that the race does not occur. Reduce severity on spurious error message for SDMA IRQs to info. Reviewed-by: Alex Estrin Reviewed-by: Patel Jay P Reviewed-by: Mike Marciniszyn Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/chip.c | 18 ++++++++++++------ drivers/infiniband/hw/hfi1/hfi.h | 1 + drivers/infiniband/hw/hfi1/init.c | 4 +++- 3 files changed, 16 insertions(+), 7 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 6660f920f42e..57f0df2f84f2 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -8264,8 +8264,8 @@ static irqreturn_t sdma_interrupt(int irq, void *data) /* handle the interrupt(s) */ sdma_engine_interrupt(sde, status); } else { - dd_dev_err_ratelimited(dd, "SDMA engine %u interrupt, but no status bits set\n", - sde->this_idx); + dd_dev_info_ratelimited(dd, "SDMA engine %u interrupt, but no status bits set\n", + sde->this_idx); } return IRQ_HANDLED; } @@ -12960,7 +12960,14 @@ static void disable_intx(struct pci_dev *pdev) pci_intx(pdev, 0); } -static void clean_up_interrupts(struct hfi1_devdata *dd) +/** + * hfi1_clean_up_interrupts() - Free all IRQ resources + * @dd: valid device data data structure + * + * Free the MSI or INTx IRQs and assoicated PCI resources, + * if they have been allocated. + */ +void hfi1_clean_up_interrupts(struct hfi1_devdata *dd) { int i; @@ -13321,7 +13328,7 @@ static int set_up_interrupts(struct hfi1_devdata *dd) return 0; fail: - clean_up_interrupts(dd); + hfi1_clean_up_interrupts(dd); return ret; } @@ -14748,7 +14755,6 @@ void hfi1_start_cleanup(struct hfi1_devdata *dd) aspm_exit(dd); free_cntrs(dd); free_rcverr(dd); - clean_up_interrupts(dd); finish_chip_resources(dd); } @@ -15204,7 +15210,7 @@ bail_free_rcverr: bail_free_cntrs: free_cntrs(dd); bail_clear_intr: - clean_up_interrupts(dd); + hfi1_clean_up_interrupts(dd); bail_cleanup: hfi1_pcie_ddcleanup(dd); bail_free: diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index b42c22292597..5757d0e3482d 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1957,6 +1957,7 @@ void hfi1_verbs_unregister_sysfs(struct hfi1_devdata *dd); int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len); int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent); +void hfi1_clean_up_interrupts(struct hfi1_devdata *dd); void hfi1_pcie_cleanup(struct pci_dev *pdev); int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev); void hfi1_pcie_ddcleanup(struct hfi1_devdata *); diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 9b128268fb28..c676d1cb8755 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -1058,8 +1058,9 @@ static void shutdown_device(struct hfi1_devdata *dd) } dd->flags &= ~HFI1_INITTED; - /* mask interrupts, but not errors */ + /* mask and clean up interrupts, but not errors */ set_intr_state(dd, 0); + hfi1_clean_up_interrupts(dd); for (pidx = 0; pidx < dd->num_pports; ++pidx) { ppd = dd->pport + pidx; @@ -1704,6 +1705,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) dd_dev_err(dd, "Failed to create /dev devices: %d\n", -j); if (initfail || ret) { + hfi1_clean_up_interrupts(dd); stop_timers(dd); flush_workqueue(ib_wq); for (pidx = 0; pidx < dd->num_pports; ++pidx) { -- cgit From 473291b3ea0e1df81f7abf13b8ab4b98a346df5e Mon Sep 17 00:00:00 2001 From: Alex Estrin Date: Thu, 1 Feb 2018 10:43:50 -0800 Subject: IB/hfi1: Fix for early release of sdma context With IRQF_SHARED flag set and CONFIG_DEBUG_SHIRQ enabled module removal may result in panic in sdma_interrupt() routine if associated sdma context was released before pci_free_irq(); [ 9198.939885] BUG: unable to handle kernel NULL pointer dereference at (null) [ 9198.940514] IP: sdma_make_progress+0xa5/0x450 [hfi1] [ 9198.941114] PGD 170bdc0067 P4D 170bdc0067 PUD 172063e067 PMD 0 [ 9198.941783] Oops: 0000 [#1] SMP ..... [ 9198.958877] CPU: 132 PID: 64173 Comm: rmmod Tainted: G OE 4.14.0-rc4+ #1 [ 9198.961032] Hardware name: Intel Corporation S7200AP/S7200AP, BIOS S72C610.86B.01.02.0118.080620171935 08/06/2017 [ 9198.963323] task: ffff9681397f0000 task.stack: ffffae1647c40000 [ 9198.965695] RIP: 0010:sdma_make_progress+0xa5/0x450 [hfi1] [ 9198.968082] RSP: 0018:ffffae1647c43be8 EFLAGS: 00010046 [ 9198.970503] RAX: 0000000000000000 RBX: ffff9680ce8b5ca8 RCX: 0000000000000000 [ 9198.973006] RDX: 0000000000000000 RSI: 0000000001a00d28 RDI: ffff9680ce8b5ca0 [ 9198.975546] RBP: ffffae1647c43c40 R08: ffff96814325ec00 R09: 00000000ffffffff [ 9198.978142] R10: 000000004325e501 R11: ffff96814325ec00 R12: ffff9680ce8b5c44 [ 9198.980779] R13: ffff9680ce8b5ca0 R14: 0000000000000000 R15: ffff9680ce8b5b00 [ 9198.983462] FS: 00007f31196ba740(0000) GS:ffff96819df00000(0000) knlGS:0000000000000000 [ 9198.986231] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 9198.989036] CR2: 0000000000000000 CR3: 000000170833f000 CR4: 00000000001406e0 [ 9198.991911] Call Trace: [ 9198.994847] sdma_engine_interrupt+0x82/0x100 [hfi1] [ 9198.997852] sdma_interrupt+0x61/0xc0 [hfi1] [ 9199.000852] __free_irq+0x1b3/0x2d0 [ 9199.003873] free_irq+0x35/0x70 [ 9199.006909] pci_free_irq+0x1c/0x30 [ 9199.009999] clean_up_interrupts+0x53/0xf0 [hfi1] [ 9199.013137] hfi1_start_cleanup+0x117/0x190 [hfi1] [ 9199.016315] postinit_cleanup+0x1d/0x270 [hfi1] [ 9199.019529] remove_one+0x1f3/0x210 [hfi1] [ 9199.022738] pci_device_remove+0x39/0xc0 [ 9199.025974] device_release_driver_internal+0x141/0x210 [ 9199.029268] driver_detach+0x3f/0x80 [ 9199.032580] bus_remove_driver+0x55/0xd0 [ 9199.035931] driver_unregister+0x2c/0x50 [ 9199.039321] pci_unregister_driver+0x2a/0xa0 [ 9199.042755] hfi1_mod_cleanup+0x10/0xb50 [hfi1] [ 9199.046196] SyS_delete_module+0x171/0x250 ... Fix by exporting sdma_clean() and removing from sdma_exit(). sdma_exit() now just manipulates the engine state, leaving the memory free to sdma_clean() which is now called just before the dd is freed. Reviewed-by: Mike Marciniszyn Reviewed-by: Michael J Ruhl Signed-off-by: Alex Estrin Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/init.c | 1 + drivers/infiniband/hw/hfi1/sdma.c | 13 +++++++------ drivers/infiniband/hw/hfi1/sdma.h | 1 + 3 files changed, 9 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index c676d1cb8755..4c51a75b238a 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -1219,6 +1219,7 @@ static void __hfi1_free_devdata(struct kobject *kobj) free_percpu(dd->rcv_limit); free_percpu(dd->send_schedule); free_percpu(dd->tx_opstats); + sdma_clean(dd, dd->num_sdma); rvt_dealloc_device(&dd->verbs_dev.rdi); } diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 31c8f89b5fc8..37424a81a7c9 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -1276,13 +1276,15 @@ bail: return -ENOMEM; } -/* - * Clean up allocated memory. - * - * This routine is can be called regardless of the success of sdma_init() +/** + * sdma_clean() Clean up allocated memory + * @dd: struct hfi1_devdata + * @num_engines: num sdma engines * + * This routine can be called regardless of the success of + * sdma_init() */ -static void sdma_clean(struct hfi1_devdata *dd, size_t num_engines) +void sdma_clean(struct hfi1_devdata *dd, size_t num_engines) { size_t i; struct sdma_engine *sde; @@ -1618,7 +1620,6 @@ void sdma_exit(struct hfi1_devdata *dd) */ sdma_finalput(&sde->state); } - sdma_clean(dd, dd->num_sdma); } /* diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h index 374c59784950..46c775f255d1 100644 --- a/drivers/infiniband/hw/hfi1/sdma.h +++ b/drivers/infiniband/hw/hfi1/sdma.h @@ -420,6 +420,7 @@ struct sdma_engine { int sdma_init(struct hfi1_devdata *dd, u8 port); void sdma_start(struct hfi1_devdata *dd); void sdma_exit(struct hfi1_devdata *dd); +void sdma_clean(struct hfi1_devdata *dd, size_t num_engines); void sdma_all_running(struct hfi1_devdata *dd); void sdma_all_idle(struct hfi1_devdata *dd); void sdma_freeze_notify(struct hfi1_devdata *dd, int go_idle); -- cgit From 2b1e7fe16124e86ee9242aeeee859c79a843e3a2 Mon Sep 17 00:00:00 2001 From: Alex Estrin Date: Thu, 1 Feb 2018 10:43:58 -0800 Subject: IB/hfi1: Fix for potential refcount leak in hfi1_open_file() The dd refcount is speculatively incremented prior to allocating the fd memory with kzalloc(). If that kzalloc() failed the dd refcount leaks. Increment refcount on kzalloc success. Fixes: e11ffbd57520 ("IB/hfi1: Do not free hfi1 cdev parent structure early") Reviewed-by: Michael J Ruhl Signed-off-by: Alex Estrin Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/file_ops.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 1df7da47f431..acf0ba56c309 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -196,9 +196,6 @@ static int hfi1_file_open(struct inode *inode, struct file *fp) if (!atomic_inc_not_zero(&dd->user_refcount)) return -ENXIO; - /* Just take a ref now. Not all opens result in a context assign */ - kobject_get(&dd->kobj); - /* The real work is performed later in assign_ctxt() */ fd = kzalloc(sizeof(*fd), GFP_KERNEL); @@ -208,6 +205,7 @@ static int hfi1_file_open(struct inode *inode, struct file *fp) fd->mm = current->mm; mmgrab(fd->mm); fd->dd = dd; + kobject_get(&fd->dd->kobj); fp->private_data = fd; } else { fp->private_data = NULL; -- cgit From 9636258f103bac6853e280beecf9e85674736a6a Mon Sep 17 00:00:00 2001 From: Mitko Haralanov Date: Thu, 1 Feb 2018 10:46:07 -0800 Subject: IB/hfi1: Remove dependence on qp->s_hdrwords The s_hdrwords variable was used to indicate whether a packet was already built on a previous iteration of the send engine. This variable assumed the protection of the QP's RVT_S_BUSY flag, which was required since the the QP's s_lock was dropped just prior to the packet being queued on the one of the egress mechanisms. Support for multiple send engine instantiations require that the field not be used due to concurency issues. The ps.txreq signals the "already built" without the potential concurency issues. Fix by getting rid of all s_hdrword usage. A wrapper is added to test for the already built case that used to use s_hdrwords. What used to be stored in s_hdrwords is now in the txreq. The PBC is not counted, but is added in the pio/sdma code paths prior to posting the packet. Reviewed-by: Don Hiatt Signed-off-by: Mitko Haralanov Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/iowait.h | 9 +++++++++ drivers/infiniband/hw/hfi1/qp.c | 4 +--- drivers/infiniband/hw/hfi1/qp.h | 13 +++++++++++++ drivers/infiniband/hw/hfi1/rc.c | 10 ++-------- drivers/infiniband/hw/hfi1/ruc.c | 23 +++++++++++------------ drivers/infiniband/hw/hfi1/uc.c | 6 +----- drivers/infiniband/hw/hfi1/ud.c | 27 ++++++++++++--------------- drivers/infiniband/hw/hfi1/verbs.c | 10 +++++----- drivers/infiniband/hw/hfi1/verbs.h | 11 ----------- drivers/infiniband/hw/hfi1/verbs_txreq.h | 7 +++++++ 10 files changed, 61 insertions(+), 59 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/iowait.h b/drivers/infiniband/hw/hfi1/iowait.h index 591697d85eed..3d9c32c7c340 100644 --- a/drivers/infiniband/hw/hfi1/iowait.h +++ b/drivers/infiniband/hw/hfi1/iowait.h @@ -371,4 +371,13 @@ static inline void iowait_starve_find_max(struct iowait *w, u8 *max, } } +/** + * iowait_packet_queued() - determine if a packet is already built + * @wait: the wait structure + */ +static inline bool iowait_packet_queued(struct iowait *wait) +{ + return !list_empty(&wait->tx_head); +} + #endif diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 5507910e8b8a..d30dd1a5b0a6 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -565,7 +565,7 @@ void qp_iter_print(struct seq_file *s, struct rvt_qp_iter *iter) if (qp->s_ack_queue) e = &qp->s_ack_queue[qp->s_tail_ack_queue]; seq_printf(s, - "N %d %s QP %x R %u %s %u %u %u f=%x %u %u %u %u %u %u SPSN %x %x %x %x %x RPSN %x S(%u %u %u %u %u %u %u) R(%u %u %u) RQP %x LID %x SL %u MTU %u %u %u %u %u SDE %p,%u SC %p,%u SCQ %u %u PID %d OS %x %x E %x %x %x RNR %d %s %d\n", + "N %d %s QP %x R %u %s %u %u f=%x %u %u %u %u %u %u SPSN %x %x %x %x %x RPSN %x S(%u %u %u %u %u %u %u) R(%u %u %u) RQP %x LID %x SL %u MTU %u %u %u %u %u SDE %p,%u SC %p,%u SCQ %u %u PID %d OS %x %x E %x %x %x RNR %d %s %d\n", iter->n, qp_idle(qp) ? "I" : "B", qp->ibqp.qp_num, @@ -573,7 +573,6 @@ void qp_iter_print(struct seq_file *s, struct rvt_qp_iter *iter) qp_type_str[qp->ibqp.qp_type], qp->state, wqe ? wqe->wr.opcode : 0, - qp->s_hdrwords, qp->s_flags, iowait_sdma_pending(&priv->s_iowait), iowait_pio_pending(&priv->s_iowait), @@ -795,7 +794,6 @@ void notify_error_qp(struct rvt_qp *qp) } if (!(qp->s_flags & RVT_S_BUSY)) { - qp->s_hdrwords = 0; if (qp->s_rdma_mr) { rvt_put_mr(qp->s_rdma_mr); qp->s_rdma_mr = NULL; diff --git a/drivers/infiniband/hw/hfi1/qp.h b/drivers/infiniband/hw/hfi1/qp.h index c06d2f8348e0..b2d4cba8d15b 100644 --- a/drivers/infiniband/hw/hfi1/qp.h +++ b/drivers/infiniband/hw/hfi1/qp.h @@ -51,11 +51,24 @@ #include #include "verbs.h" #include "sdma.h" +#include "verbs_txreq.h" extern unsigned int hfi1_qp_table_size; extern const struct rvt_operation_params hfi1_post_parms[]; +/* + * Send if not busy or waiting for I/O and either + * a RC response is pending or we can process send work requests. + */ +static inline int hfi1_send_ok(struct rvt_qp *qp) +{ + return !(qp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT_IO)) && + (verbs_txreq_queued(qp) || + (qp->s_flags & RVT_S_RESP_PENDING) || + !(qp->s_flags & RVT_S_ANY_WAIT_SEND)); +} + /* * free_ahg - clear ahg from QP */ diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 68d5c3cce2eb..524c12f04bd9 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -226,12 +226,10 @@ normal: bth2 = mask_psn(qp->s_ack_psn); } qp->s_rdma_ack_cnt++; - qp->s_hdrwords = hwords; ps->s_txreq->sde = priv->s_sde; ps->s_txreq->s_cur_size = len; + ps->s_txreq->hdr_dwords = hwords; hfi1_make_ruc_header(qp, ohdr, bth0, bth2, middle, ps); - /* pbc */ - ps->s_txreq->hdr_dwords = qp->s_hdrwords + 2; return 1; bail: @@ -387,7 +385,6 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) : IB_WC_SUCCESS); if (local_ops) atomic_dec(&qp->local_ops_pending); - qp->s_hdrwords = 0; goto done_free_tx; } @@ -690,7 +687,7 @@ no_flow_control: bth2 |= IB_BTH_REQ_ACK; } qp->s_len -= len; - qp->s_hdrwords = hwords; + ps->s_txreq->hdr_dwords = hwords; ps->s_txreq->sde = priv->s_sde; ps->s_txreq->ss = ss; ps->s_txreq->s_cur_size = len; @@ -701,8 +698,6 @@ no_flow_control: bth2, middle, ps); - /* pbc */ - ps->s_txreq->hdr_dwords = qp->s_hdrwords + 2; return 1; done_free_tx: @@ -716,7 +711,6 @@ bail: bail_no_tx: ps->s_txreq = NULL; qp->s_flags &= ~RVT_S_BUSY; - qp->s_hdrwords = 0; return 0; } diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index 2c7fc6e331ea..0cced9a4a345 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -757,8 +757,9 @@ static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp, u32 slid; u16 pkey = hfi1_get_pkey(ibp, qp->s_pkey_index); u8 l4 = OPA_16B_L4_IB_LOCAL; - u8 extra_bytes = hfi1_get_16b_padding((qp->s_hdrwords << 2), - ps->s_txreq->s_cur_size); + u8 extra_bytes = hfi1_get_16b_padding( + (ps->s_txreq->hdr_dwords << 2), + ps->s_txreq->s_cur_size); u32 nwords = SIZE_OF_CRC + ((ps->s_txreq->s_cur_size + extra_bytes + SIZE_OF_LT) >> 2); u8 becn = 0; @@ -778,9 +779,9 @@ static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp, grd->sgid_index = 0; grh = &ps->s_txreq->phdr.hdr.opah.u.l.grh; l4 = OPA_16B_L4_IB_GLOBAL; - hdrwords = qp->s_hdrwords - 4; - qp->s_hdrwords += hfi1_make_grh(ibp, grh, grd, - hdrwords, nwords); + hdrwords = ps->s_txreq->hdr_dwords - 4; + ps->s_txreq->hdr_dwords += hfi1_make_grh(ibp, grh, grd, + hdrwords, nwords); middle = 0; } @@ -814,7 +815,7 @@ static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp, slid, opa_get_lid(rdma_ah_get_dlid(&qp->remote_ah_attr), 16B), - (qp->s_hdrwords + nwords) >> 1, + (ps->s_txreq->hdr_dwords + nwords) >> 1, pkey, becn, 0, l4, priv->s_sc); } @@ -834,10 +835,10 @@ static inline void hfi1_make_ruc_header_9B(struct rvt_qp *qp, if (unlikely(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)) { struct ib_grh *grh = &ps->s_txreq->phdr.hdr.ibh.u.l.grh; - int hdrwords = qp->s_hdrwords - 2; + int hdrwords = ps->s_txreq->hdr_dwords - 2; lrh0 = HFI1_LRH_GRH; - qp->s_hdrwords += + ps->s_txreq->hdr_dwords += hfi1_make_grh(ibp, grh, rdma_ah_read_grh(&qp->remote_ah_attr), hdrwords, nwords); @@ -866,7 +867,7 @@ static inline void hfi1_make_ruc_header_9B(struct rvt_qp *qp, hfi1_make_ruc_bth(qp, ohdr, bth0, bth1, bth2); hfi1_make_ib_hdr(&ps->s_txreq->phdr.hdr.ibh, lrh0, - qp->s_hdrwords + nwords, + ps->s_txreq->hdr_dwords + nwords, opa_get_lid(rdma_ah_get_dlid(&qp->remote_ah_attr), 9B), ppd_from_ibp(ibp)->lid | rdma_ah_get_path_bits(&qp->remote_ah_attr)); @@ -1031,7 +1032,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread) ps.s_txreq = get_waiting_verbs_txreq(qp); do { /* Check for a constructed packet to be sent. */ - if (qp->s_hdrwords != 0) { + if (ps.s_txreq) { spin_unlock_irqrestore(&qp->s_lock, ps.flags); /* * If the packet cannot be sent now, return and @@ -1039,8 +1040,6 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread) */ if (hfi1_verbs_send(qp, &ps)) return; - /* Record that s_ahg is empty. */ - qp->s_hdrwords = 0; /* allow other tasks to run */ if (schedule_send_yield(qp, &ps)) return; diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c index 991bbee04821..fef69d98386c 100644 --- a/drivers/infiniband/hw/hfi1/uc.c +++ b/drivers/infiniband/hw/hfi1/uc.c @@ -146,7 +146,6 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) : IB_WC_SUCCESS); if (local_ops) atomic_dec(&qp->local_ops_pending); - qp->s_hdrwords = 0; goto done_free_tx; } /* @@ -269,14 +268,12 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) break; } qp->s_len -= len; - qp->s_hdrwords = hwords; + ps->s_txreq->hdr_dwords = hwords; ps->s_txreq->sde = priv->s_sde; ps->s_txreq->ss = &qp->s_sge; ps->s_txreq->s_cur_size = len; hfi1_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24), mask_psn(qp->s_psn++), middle, ps); - /* pbc */ - ps->s_txreq->hdr_dwords = qp->s_hdrwords + 2; return 1; done_free_tx: @@ -290,7 +287,6 @@ bail: bail_no_tx: ps->s_txreq = NULL; qp->s_flags &= ~RVT_S_BUSY; - qp->s_hdrwords = 0; return 0; } diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index beb5091eccca..6b5cd3a86be4 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -340,15 +340,15 @@ void hfi1_make_ud_req_9B(struct rvt_qp *qp, struct hfi1_pkt_state *ps, extra_bytes = -wqe->length & 3; nwords = ((wqe->length + extra_bytes) >> 2) + SIZE_OF_CRC; /* header size in dwords LRH+BTH+DETH = (8+12+8)/4. */ - qp->s_hdrwords = 7; + ps->s_txreq->hdr_dwords = 7; if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) - qp->s_hdrwords++; + ps->s_txreq->hdr_dwords++; if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) { grh = &ps->s_txreq->phdr.hdr.ibh.u.l.grh; - qp->s_hdrwords += hfi1_make_grh(ibp, grh, - rdma_ah_read_grh(ah_attr), - qp->s_hdrwords - 2, nwords); + ps->s_txreq->hdr_dwords += + hfi1_make_grh(ibp, grh, rdma_ah_read_grh(ah_attr), + ps->s_txreq->hdr_dwords - 2, nwords); lrh0 = HFI1_LRH_GRH; ohdr = &ps->s_txreq->phdr.hdr.ibh.u.l.oth; } else { @@ -381,7 +381,7 @@ void hfi1_make_ud_req_9B(struct rvt_qp *qp, struct hfi1_pkt_state *ps, } } hfi1_make_bth_deth(qp, wqe, ohdr, &pkey, extra_bytes, false); - len = qp->s_hdrwords + nwords; + len = ps->s_txreq->hdr_dwords + nwords; /* Setup the packet */ ps->s_txreq->phdr.hdr.hdr_type = HFI1_PKT_TYPE_9B; @@ -405,12 +405,12 @@ void hfi1_make_ud_req_16B(struct rvt_qp *qp, struct hfi1_pkt_state *ps, ppd = ppd_from_ibp(ibp); ah_attr = &ibah_to_rvtah(wqe->ud_wr.ah)->attr; /* header size in dwords 16B LRH+BTH+DETH = (16+12+8)/4. */ - qp->s_hdrwords = 9; + ps->s_txreq->hdr_dwords = 9; if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) - qp->s_hdrwords++; + ps->s_txreq->hdr_dwords++; /* SW provides space for CRC and LT for bypass packets. */ - extra_bytes = hfi1_get_16b_padding((qp->s_hdrwords << 2), + extra_bytes = hfi1_get_16b_padding((ps->s_txreq->hdr_dwords << 2), wqe->length); nwords = ((wqe->length + extra_bytes + SIZE_OF_LT) >> 2) + SIZE_OF_CRC; @@ -428,8 +428,8 @@ void hfi1_make_ud_req_16B(struct rvt_qp *qp, struct hfi1_pkt_state *ps, grd->sgid_index = 0; } grh = &ps->s_txreq->phdr.hdr.opah.u.l.grh; - qp->s_hdrwords += hfi1_make_grh(ibp, grh, grd, - qp->s_hdrwords - 4, nwords); + ps->s_txreq->hdr_dwords += hfi1_make_grh(ibp, grh, grd, + ps->s_txreq->hdr_dwords - 4, nwords); ohdr = &ps->s_txreq->phdr.hdr.opah.u.l.oth; l4 = OPA_16B_L4_IB_GLOBAL; } else { @@ -452,7 +452,7 @@ void hfi1_make_ud_req_16B(struct rvt_qp *qp, struct hfi1_pkt_state *ps, hfi1_make_bth_deth(qp, wqe, ohdr, &pkey, extra_bytes, true); /* Convert dwords to flits */ - len = (qp->s_hdrwords + nwords) >> 1; + len = (ps->s_txreq->hdr_dwords + nwords) >> 1; /* Setup the packet */ ps->s_txreq->phdr.hdr.hdr_type = HFI1_PKT_TYPE_16B; @@ -564,8 +564,6 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) priv->s_ahg->ahgcount = 0; priv->s_ahg->ahgidx = 0; priv->s_ahg->tx_flags = 0; - /* pbc */ - ps->s_txreq->hdr_dwords = qp->s_hdrwords + 2; return 1; @@ -580,7 +578,6 @@ bail: bail_no_tx: ps->s_txreq = NULL; qp->s_flags &= ~RVT_S_BUSY; - qp->s_hdrwords = 0; return 0; } diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index b8776a362a91..471d55c50066 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -835,7 +835,7 @@ static int build_verbs_tx_desc( { int ret = 0; struct hfi1_sdma_header *phdr = &tx->phdr; - u16 hdrbytes = tx->hdr_dwords << 2; + u16 hdrbytes = (tx->hdr_dwords + sizeof(pbc) / 4) << 2; u8 extra_bytes = 0; if (tx->phdr.hdr.hdr_type) { @@ -901,7 +901,7 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, { struct hfi1_qp_priv *priv = qp->priv; struct hfi1_ahg_info *ahg_info = priv->s_ahg; - u32 hdrwords = qp->s_hdrwords; + u32 hdrwords = ps->s_txreq->hdr_dwords; u32 len = ps->s_txreq->s_cur_size; u32 plen; struct hfi1_ibdev *dev = ps->dev; @@ -919,7 +919,7 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, } else { dwords = (len + 3) >> 2; } - plen = hdrwords + dwords + 2; + plen = hdrwords + dwords + sizeof(pbc) / 4; tx = ps->s_txreq; if (!sdma_txreq_built(&tx->txreq)) { @@ -1038,7 +1038,7 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, u64 pbc) { struct hfi1_qp_priv *priv = qp->priv; - u32 hdrwords = qp->s_hdrwords; + u32 hdrwords = ps->s_txreq->hdr_dwords; struct rvt_sge_state *ss = ps->s_txreq->ss; u32 len = ps->s_txreq->s_cur_size; u32 dwords; @@ -1064,7 +1064,7 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, dwords = (len + 3) >> 2; hdr = (u32 *)&ps->s_txreq->phdr.hdr.ibh; } - plen = hdrwords + dwords + 2; + plen = hdrwords + dwords + sizeof(pbc) / 4; /* only RC/UC use complete */ switch (qp->ibqp.qp_type) { diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index 87d1285a3340..c2aeb32bf482 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -245,17 +245,6 @@ static inline struct rvt_qp *iowait_to_qp(struct iowait *s_iowait) return priv->owner; } -/* - * Send if not busy or waiting for I/O and either - * a RC response is pending or we can process send work requests. - */ -static inline int hfi1_send_ok(struct rvt_qp *qp) -{ - return !(qp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT_IO)) && - (qp->s_hdrwords || (qp->s_flags & RVT_S_RESP_PENDING) || - !(qp->s_flags & RVT_S_ANY_WAIT_SEND)); -} - /* * This must be called with s_lock held. */ diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.h b/drivers/infiniband/hw/hfi1/verbs_txreq.h index cec7a4b34d16..729244c3086c 100644 --- a/drivers/infiniband/hw/hfi1/verbs_txreq.h +++ b/drivers/infiniband/hw/hfi1/verbs_txreq.h @@ -113,6 +113,13 @@ static inline struct verbs_txreq *get_waiting_verbs_txreq(struct rvt_qp *qp) return NULL; } +static inline bool verbs_txreq_queued(struct rvt_qp *qp) +{ + struct hfi1_qp_priv *priv = qp->priv; + + return iowait_packet_queued(&priv->s_iowait); +} + void hfi1_put_txreq(struct verbs_txreq *tx); int verbs_txreq_init(struct hfi1_ibdev *dev); void verbs_txreq_exit(struct hfi1_ibdev *dev); -- cgit From f150e2736f346a3171f002e660c3dfc653cc11cd Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Thu, 1 Feb 2018 10:46:15 -0800 Subject: IB/hfi1: Compute BTH only for RDMA_WRITE_LAST/SEND_LAST packet In hfi1_rc_rcv(), BTH is computed for all packets received. However, it's only used for packets received with opcodes RDMA_WRITE_LAST and SEND_LAST, and it is a costly operation. Compute BTH only in the RDMA_WRITE_LAST/SEND_LAST code path and let the compiler handle endianness conversion for bitwise operations. Reviewed-by: Mike Marciniszyn Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/rc.c | 3 +-- drivers/infiniband/hw/hfi1/uc.c | 3 +-- drivers/infiniband/hw/hfi1/ud.c | 3 +-- 3 files changed, 3 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 524c12f04bd9..70aa82b305d8 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -2035,7 +2035,6 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) struct rvt_qp *qp = packet->qp; struct hfi1_ibport *ibp = rcd_to_iport(rcd); struct ib_other_headers *ohdr = packet->ohdr; - u32 bth0 = be32_to_cpu(ohdr->bth[0]); u32 opcode = packet->opcode; u32 hdrsize = packet->hlen; u32 psn = ib_bth_get_psn(packet->ohdr); @@ -2233,7 +2232,7 @@ send_last: wc.port_num = 0; /* Signal completion event if the solicited bit is set. */ rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, - (bth0 & IB_BTH_SOLICITED) != 0); + ib_bth_is_solicited(ohdr)); break; case OP(RDMA_WRITE_ONLY): diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c index fef69d98386c..8030c38a27f2 100644 --- a/drivers/infiniband/hw/hfi1/uc.c +++ b/drivers/infiniband/hw/hfi1/uc.c @@ -478,8 +478,7 @@ last_imm: wc.port_num = 0; /* Signal completion event if the solicited bit is set. */ rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, - (ohdr->bth[0] & - cpu_to_be32(IB_BTH_SOLICITED)) != 0); + ib_bth_is_solicited(ohdr)); break; case OP(RDMA_WRITE_FIRST): diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index 6b5cd3a86be4..cff2fd8907f5 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -1045,8 +1045,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) wc.port_num = qp->port_num; /* Signal completion event if the solicited bit is set. */ rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, - (ohdr->bth[0] & - cpu_to_be32(IB_BTH_SOLICITED)) != 0); + ib_bth_is_solicited(ohdr)); return; drop: -- cgit From 6d6b8848c882b22e3170cc9f217101773e8bd8d2 Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Thu, 1 Feb 2018 10:46:23 -0800 Subject: IB/hfi1: Optimize packet type comparison using 9B and bypass code paths The packet type comparison used to find out if a packet is a bypass packet in the hot path is an expensive operation as seen in a profile. Determine packet's pkey and migration bit through the bypass and 9B code paths instead. Reviewed-by: Don Hiatt Reviewed-by: Mike Marciniszyn Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/driver.c | 4 ++++ drivers/infiniband/hw/hfi1/hfi.h | 2 ++ drivers/infiniband/hw/hfi1/ruc.c | 15 ++------------- drivers/infiniband/hw/hfi1/verbs.h | 5 +++++ 4 files changed, 13 insertions(+), 13 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 067b29f35f21..bb1a41b74dff 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -1440,6 +1440,8 @@ static int hfi1_setup_9B_packet(struct hfi1_packet *packet) packet->extra_byte = 0; packet->fecn = ib_bth_get_fecn(packet->ohdr); packet->becn = ib_bth_get_becn(packet->ohdr); + packet->pkey = ib_bth_get_pkey(packet->ohdr); + packet->migrated = ib_bth_is_migration(packet->ohdr); return 0; drop: @@ -1506,6 +1508,8 @@ static int hfi1_setup_bypass_packet(struct hfi1_packet *packet) packet->extra_byte = SIZE_OF_LT; packet->fecn = hfi1_16B_get_fecn(packet->hdr); packet->becn = hfi1_16B_get_becn(packet->hdr); + packet->pkey = hfi1_16B_get_pkey(packet->hdr); + packet->migrated = opa_bth_is_migration(packet->ohdr); if (hfi1_bypass_ingress_pkt_check(packet)) goto drop; diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 5757d0e3482d..2c257ac685e7 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -341,6 +341,7 @@ struct hfi1_packet { u32 slid; u16 tlen; s16 etail; + u16 pkey; u8 hlen; u8 numpkt; u8 rsize; @@ -353,6 +354,7 @@ struct hfi1_packet { u8 opcode; bool becn; bool fecn; + bool migrated; }; /* Packet types */ diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index 0cced9a4a345..6434207a9506 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -225,19 +225,8 @@ int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_packet *packet) u32 dlid = packet->dlid; u32 slid = packet->slid; u32 sl = packet->sl; - int migrated; - u32 bth0, bth1; - u16 pkey; - - bth0 = be32_to_cpu(packet->ohdr->bth[0]); - bth1 = be32_to_cpu(packet->ohdr->bth[1]); - if (packet->etype == RHF_RCV_TYPE_BYPASS) { - pkey = hfi1_16B_get_pkey(packet->hdr); - migrated = bth1 & OPA_BTH_MIG_REQ; - } else { - pkey = ib_bth_get_pkey(packet->ohdr); - migrated = bth0 & IB_BTH_MIG_REQ; - } + bool migrated = packet->migrated; + u16 pkey = packet->pkey; if (qp->s_mig_state == IB_MIG_ARMED && migrated) { if (!packet->grh) { diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index c2aeb32bf482..ffc933ea146f 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -405,6 +405,11 @@ static inline void cacheless_memcpy(void *dst, void *src, size_t n) __copy_user_nocache(dst, (void __user *)src, n, 0); } +static inline bool opa_bth_is_migration(struct ib_other_headers *ohdr) +{ + return ohdr->bth[1] & cpu_to_be32(OPA_BTH_MIG_REQ); +} + extern const enum ib_wc_opcode ib_hfi1_wc_opcode[]; extern const u8 hdr_len_by_opcode[]; -- cgit From bdaf96f6500880401fe692cd0bf57afb596b135c Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Thu, 1 Feb 2018 10:46:31 -0800 Subject: IB/hfi1: Look up ibport using a pointer in receive path In the receive path, hfi1_ibport is looked up by indexing into an array. A profile shows this to be expensive. The receive context data has a pointer to the ibport data, use that pointer instead. Reviewed-by: Mike Marciniszyn Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/driver.c | 14 ++++++++------ drivers/infiniband/hw/hfi1/rc.c | 36 +++++++++++++++++++++--------------- drivers/infiniband/hw/hfi1/verbs.h | 3 +-- 3 files changed, 30 insertions(+), 23 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index bb1a41b74dff..0de4654e34ca 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -634,9 +634,10 @@ next: } } -static void process_rcv_qp_work(struct hfi1_ctxtdata *rcd) +static void process_rcv_qp_work(struct hfi1_packet *packet) { struct rvt_qp *qp, *nqp; + struct hfi1_ctxtdata *rcd = packet->rcd; /* * Iterate over all QPs waiting to respond. @@ -646,7 +647,8 @@ static void process_rcv_qp_work(struct hfi1_ctxtdata *rcd) list_del_init(&qp->rspwait); if (qp->r_flags & RVT_R_RSP_NAK) { qp->r_flags &= ~RVT_R_RSP_NAK; - hfi1_send_rc_ack(rcd, qp, 0); + packet->qp = qp; + hfi1_send_rc_ack(packet, 0); } if (qp->r_flags & RVT_R_RSP_SEND) { unsigned long flags; @@ -667,7 +669,7 @@ static noinline int max_packet_exceeded(struct hfi1_packet *packet, int thread) if (thread) { if ((packet->numpkt & (MAX_PKT_RECV_THREAD - 1)) == 0) /* allow defered processing */ - process_rcv_qp_work(packet->rcd); + process_rcv_qp_work(packet); cond_resched(); return RCV_PKT_OK; } else { @@ -809,7 +811,7 @@ int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread) last = RCV_PKT_DONE; process_rcv_update(last, &packet); } - process_rcv_qp_work(rcd); + process_rcv_qp_work(&packet); rcd->head = packet.rhqoff; bail: finish_packet(&packet); @@ -838,7 +840,7 @@ int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread) last = RCV_PKT_DONE; process_rcv_update(last, &packet); } - process_rcv_qp_work(rcd); + process_rcv_qp_work(&packet); rcd->head = packet.rhqoff; bail: finish_packet(&packet); @@ -1068,7 +1070,7 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread) process_rcv_update(last, &packet); } - process_rcv_qp_work(rcd); + process_rcv_qp_work(&packet); rcd->head = packet.rhqoff; bail: diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 70aa82b305d8..daf50cc445e4 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -730,14 +730,16 @@ static inline void hfi1_make_bth_aeth(struct rvt_qp *qp, ohdr->bth[2] = cpu_to_be32(mask_psn(qp->r_ack_psn)); } -static inline void hfi1_queue_rc_ack(struct rvt_qp *qp, bool is_fecn) +static inline void hfi1_queue_rc_ack(struct hfi1_packet *packet, bool is_fecn) { - struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); + struct rvt_qp *qp = packet->qp; + struct hfi1_ibport *ibp; unsigned long flags; spin_lock_irqsave(&qp->s_lock, flags); if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) goto unlock; + ibp = rcd_to_iport(packet->rcd); this_cpu_inc(*ibp->rvp.rc_qacks); qp->s_flags |= RVT_S_ACK_PENDING | RVT_S_RESP_PENDING; qp->s_nak_state = qp->r_nak_state; @@ -751,13 +753,14 @@ unlock: spin_unlock_irqrestore(&qp->s_lock, flags); } -static inline void hfi1_make_rc_ack_9B(struct rvt_qp *qp, +static inline void hfi1_make_rc_ack_9B(struct hfi1_packet *packet, struct hfi1_opa_header *opa_hdr, u8 sc5, bool is_fecn, u64 *pbc_flags, u32 *hwords, u32 *nwords) { - struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); + struct rvt_qp *qp = packet->qp; + struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd); struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); struct ib_header *hdr = &opa_hdr->ibh; struct ib_other_headers *ohdr; @@ -798,13 +801,14 @@ static inline void hfi1_make_rc_ack_9B(struct rvt_qp *qp, hfi1_make_bth_aeth(qp, ohdr, bth0, bth1); } -static inline void hfi1_make_rc_ack_16B(struct rvt_qp *qp, +static inline void hfi1_make_rc_ack_16B(struct hfi1_packet *packet, struct hfi1_opa_header *opa_hdr, u8 sc5, bool is_fecn, u64 *pbc_flags, u32 *hwords, u32 *nwords) { - struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); + struct rvt_qp *qp = packet->qp; + struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd); struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); struct hfi1_16b_header *hdr = &opa_hdr->opah; struct ib_other_headers *ohdr; @@ -850,7 +854,7 @@ static inline void hfi1_make_rc_ack_16B(struct rvt_qp *qp, hfi1_make_bth_aeth(qp, ohdr, bth0, bth1); } -typedef void (*hfi1_make_rc_ack)(struct rvt_qp *qp, +typedef void (*hfi1_make_rc_ack)(struct hfi1_packet *packet, struct hfi1_opa_header *opa_hdr, u8 sc5, bool is_fecn, u64 *pbc_flags, u32 *hwords, @@ -870,9 +874,10 @@ static const hfi1_make_rc_ack hfi1_make_rc_ack_tbl[2] = { * Note that RDMA reads and atomics are handled in the * send side QP state and send engine. */ -void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, - struct rvt_qp *qp, bool is_fecn) +void hfi1_send_rc_ack(struct hfi1_packet *packet, bool is_fecn) { + struct hfi1_ctxtdata *rcd = packet->rcd; + struct rvt_qp *qp = packet->qp; struct hfi1_ibport *ibp = rcd_to_iport(rcd); struct hfi1_qp_priv *priv = qp->priv; struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); @@ -889,14 +894,14 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, /* Don't send ACK or NAK if a RDMA read or atomic is pending. */ if (qp->s_flags & RVT_S_RESP_PENDING) { - hfi1_queue_rc_ack(qp, is_fecn); + hfi1_queue_rc_ack(packet, is_fecn); return; } /* Ensure s_rdma_ack_cnt changes are committed */ smp_read_barrier_depends(); if (qp->s_rdma_ack_cnt) { - hfi1_queue_rc_ack(qp, is_fecn); + hfi1_queue_rc_ack(packet, is_fecn); return; } @@ -905,7 +910,7 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, return; /* Make the appropriate header */ - hfi1_make_rc_ack_tbl[priv->hdr_type](qp, &opa_hdr, sc5, is_fecn, + hfi1_make_rc_ack_tbl[priv->hdr_type](packet, &opa_hdr, sc5, is_fecn, &pbc_flags, &hwords, &nwords); plen = 2 /* PBC */ + hwords + nwords; @@ -919,7 +924,7 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, * so that when enough buffer space becomes available, * the ACK is sent ahead of other outgoing packets. */ - hfi1_queue_rc_ack(qp, is_fecn); + hfi1_queue_rc_ack(packet, is_fecn); return; } trace_ack_output_ibhdr(dd_from_ibdev(qp->ibqp.device), @@ -1537,7 +1542,7 @@ static void rc_rcv_resp(struct hfi1_packet *packet) void *data = packet->payload; u32 tlen = packet->tlen; struct rvt_qp *qp = packet->qp; - struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); + struct hfi1_ibport *ibp; struct ib_other_headers *ohdr = packet->ohdr; struct rvt_swqe *wqe; enum ib_wc_status status; @@ -1695,6 +1700,7 @@ ack_op_err: goto ack_err; ack_seq_err: + ibp = rcd_to_iport(rcd); rdma_seq_err(qp, ibp, psn, rcd); goto ack_done; @@ -2476,7 +2482,7 @@ nack_acc: qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR; qp->r_ack_psn = qp->r_psn; send_ack: - hfi1_send_rc_ack(rcd, qp, is_fecn); + hfi1_send_rc_ack(packet, is_fecn); } void hfi1_rc_hdrerr( diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index ffc933ea146f..4ea040df334b 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -358,8 +358,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread); void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, enum ib_wc_status status); -void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp, - bool is_fecn); +void hfi1_send_rc_ack(struct hfi1_packet *packet, bool is_fecn); int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps); -- cgit From ca85bb1ca9948899682fe7170636e465599ea8e7 Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Thu, 1 Feb 2018 10:46:38 -0800 Subject: IB/hfi1: Remove unnecessary fecn and becn fields packet->fecn and packet->becn are calculated in the hot path and are never used. Remove these fields as they show to be costly in a profile. Also, remove initialization for becn and fecn in process_ecn() as they're unconditionally assigned in the function and ensure fecn and becn variables use a boolean type. Reviewed-by: Mike Marciniszyn Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/driver.c | 4 ---- drivers/infiniband/hw/hfi1/hfi.h | 16 +++++----------- drivers/infiniband/hw/hfi1/rc.c | 2 +- drivers/infiniband/hw/hfi1/ruc.c | 4 ++-- drivers/infiniband/hw/hfi1/trace.c | 8 ++++---- drivers/infiniband/hw/hfi1/trace_ibhdrs.h | 16 ++++++++-------- 6 files changed, 20 insertions(+), 30 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 0de4654e34ca..0a9bc1875d53 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -1440,8 +1440,6 @@ static int hfi1_setup_9B_packet(struct hfi1_packet *packet) packet->sc = hfi1_9B_get_sc5(hdr, packet->rhf); packet->pad = ib_bth_get_pad(packet->ohdr); packet->extra_byte = 0; - packet->fecn = ib_bth_get_fecn(packet->ohdr); - packet->becn = ib_bth_get_becn(packet->ohdr); packet->pkey = ib_bth_get_pkey(packet->ohdr); packet->migrated = ib_bth_is_migration(packet->ohdr); @@ -1508,8 +1506,6 @@ static int hfi1_setup_bypass_packet(struct hfi1_packet *packet) packet->sl = ibp->sc_to_sl[packet->sc]; packet->pad = hfi1_16B_bth_get_pad(packet->ohdr); packet->extra_byte = SIZE_OF_LT; - packet->fecn = hfi1_16B_get_fecn(packet->hdr); - packet->becn = hfi1_16B_get_becn(packet->hdr); packet->pkey = hfi1_16B_get_pkey(packet->hdr); packet->migrated = opa_bth_is_migration(packet->ohdr); diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 2c257ac685e7..105d11dcc554 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -352,8 +352,6 @@ struct hfi1_packet { u8 sc; u8 sl; u8 opcode; - bool becn; - bool fecn; bool migrated; }; @@ -1781,19 +1779,15 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt, bool do_cnp) { - struct ib_other_headers *ohdr = pkt->ohdr; - - u32 bth1; - bool becn = false; - bool fecn = false; + bool becn; + bool fecn; if (pkt->etype == RHF_RCV_TYPE_BYPASS) { fecn = hfi1_16B_get_fecn(pkt->hdr); becn = hfi1_16B_get_becn(pkt->hdr); } else { - bth1 = be32_to_cpu(ohdr->bth[1]); - fecn = bth1 & IB_FECN_SMASK; - becn = bth1 & IB_BECN_SMASK; + fecn = ib_bth_get_fecn(pkt->ohdr); + becn = ib_bth_get_becn(pkt->ohdr); } if (unlikely(fecn || becn)) { hfi1_process_ecn_slowpath(qp, pkt, do_cnp); @@ -2419,7 +2413,7 @@ static inline void hfi1_make_ib_hdr(struct ib_header *hdr, static inline void hfi1_make_16b_hdr(struct hfi1_16b_header *hdr, u32 slid, u32 dlid, u16 len, u16 pkey, - u8 becn, u8 fecn, u8 l4, + bool becn, bool fecn, u8 l4, u8 sc) { u32 lrh0 = 0; diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index daf50cc445e4..93ea03c2129c 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -814,7 +814,7 @@ static inline void hfi1_make_rc_ack_16B(struct hfi1_packet *packet, struct ib_other_headers *ohdr; u32 bth0, bth1 = 0; u16 len, pkey; - u8 becn = !!is_fecn; + bool becn = is_fecn; u8 l4 = OPA_16B_L4_IB_LOCAL; u8 extra_bytes; diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index 6434207a9506..425272210da0 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -751,7 +751,7 @@ static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp, ps->s_txreq->s_cur_size); u32 nwords = SIZE_OF_CRC + ((ps->s_txreq->s_cur_size + extra_bytes + SIZE_OF_LT) >> 2); - u8 becn = 0; + bool becn = false; if (unlikely(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH) && hfi1_check_mcast(rdma_ah_get_dlid(&qp->remote_ah_attr))) { @@ -789,7 +789,7 @@ static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp, if (qp->s_flags & RVT_S_ECN) { qp->s_flags &= ~RVT_S_ECN; /* we recently received a FECN, so return a BECN */ - becn = 1; + becn = true; } hfi1_make_ruc_bth(qp, ohdr, bth0, bth1, bth2); diff --git a/drivers/infiniband/hw/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c index 959a80429ee9..89bd9851065b 100644 --- a/drivers/infiniband/hw/hfi1/trace.c +++ b/drivers/infiniband/hw/hfi1/trace.c @@ -138,7 +138,7 @@ static const char *parse_syndrome(u8 syndrome) } void hfi1_trace_parse_9b_bth(struct ib_other_headers *ohdr, - u8 *ack, u8 *becn, u8 *fecn, u8 *mig, + u8 *ack, bool *becn, bool *fecn, u8 *mig, u8 *se, u8 *pad, u8 *opcode, u8 *tver, u16 *pkey, u32 *psn, u32 *qpn) { @@ -184,7 +184,7 @@ void hfi1_trace_parse_9b_hdr(struct ib_header *hdr, bool sc5, } void hfi1_trace_parse_16b_hdr(struct hfi1_16b_header *hdr, - u8 *age, u8 *becn, u8 *fecn, + u8 *age, bool *becn, bool *fecn, u8 *l4, u8 *rc, u8 *sc, u16 *entropy, u16 *len, u16 *pkey, u32 *dlid, u32 *slid) @@ -207,7 +207,7 @@ void hfi1_trace_parse_16b_hdr(struct hfi1_16b_header *hdr, #define LRH_16B_PRN "age:%d becn:%d fecn:%d l4:%d " \ "rc:%d sc:%d pkey:0x%.4x entropy:0x%.4x" const char *hfi1_trace_fmt_lrh(struct trace_seq *p, bool bypass, - u8 age, u8 becn, u8 fecn, u8 l4, + u8 age, bool becn, bool fecn, u8 l4, u8 lnh, const char *lnh_name, u8 lver, u8 rc, u8 sc, u8 sl, u16 entropy, u16 len, u16 pkey, u32 dlid, u32 slid) @@ -235,7 +235,7 @@ const char *hfi1_trace_fmt_lrh(struct trace_seq *p, bool bypass, "op:0x%.2x,%s se:%d m:%d pad:%d tver:%d " \ "qpn:0x%.6x a:%d psn:0x%.8x" const char *hfi1_trace_fmt_bth(struct trace_seq *p, bool bypass, - u8 ack, u8 becn, u8 fecn, u8 mig, + u8 ack, bool becn, bool fecn, u8 mig, u8 se, u8 pad, u8 opcode, const char *opname, u8 tver, u16 pkey, u32 psn, u32 qpn) { diff --git a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h index fb631278eccd..2847626d3819 100644 --- a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h +++ b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h @@ -101,7 +101,7 @@ u8 hfi1_trace_opa_hdr_len(struct hfi1_opa_header *opah); u8 hfi1_trace_packet_hdr_len(struct hfi1_packet *packet); const char *hfi1_trace_get_packet_l4_str(u8 l4); void hfi1_trace_parse_9b_bth(struct ib_other_headers *ohdr, - u8 *ack, u8 *becn, u8 *fecn, u8 *mig, + u8 *ack, bool *becn, bool *fecn, u8 *mig, u8 *se, u8 *pad, u8 *opcode, u8 *tver, u16 *pkey, u32 *psn, u32 *qpn); void hfi1_trace_parse_9b_hdr(struct ib_header *hdr, bool sc5, @@ -112,19 +112,19 @@ void hfi1_trace_parse_16b_bth(struct ib_other_headers *ohdr, u8 *pad, u8 *se, u8 *tver, u32 *psn, u32 *qpn); void hfi1_trace_parse_16b_hdr(struct hfi1_16b_header *hdr, - u8 *age, u8 *becn, u8 *fecn, + u8 *age, bool *becn, bool *fecn, u8 *l4, u8 *rc, u8 *sc, u16 *entropy, u16 *len, u16 *pkey, u32 *dlid, u32 *slid); const char *hfi1_trace_fmt_lrh(struct trace_seq *p, bool bypass, - u8 age, u8 becn, u8 fecn, u8 l4, + u8 age, bool becn, bool fecn, u8 l4, u8 lnh, const char *lnh_name, u8 lver, u8 rc, u8 sc, u8 sl, u16 entropy, u16 len, u16 pkey, u32 dlid, u32 slid); const char *hfi1_trace_fmt_bth(struct trace_seq *p, bool bypass, - u8 ack, u8 becn, u8 fecn, u8 mig, + u8 ack, bool becn, bool fecn, u8 mig, u8 se, u8 pad, u8 opcode, const char *opname, u8 tver, u16 pkey, u32 psn, u32 qpn); @@ -148,8 +148,8 @@ DECLARE_EVENT_CLASS(hfi1_input_ibhdr_template, __field(u8, etype) __field(u8, ack) __field(u8, age) - __field(u8, becn) - __field(u8, fecn) + __field(bool, becn) + __field(bool, fecn) __field(u8, l2) __field(u8, l4) __field(u8, lnh) @@ -290,8 +290,8 @@ DECLARE_EVENT_CLASS(hfi1_output_ibhdr_template, __field(u8, hdr_type) __field(u8, ack) __field(u8, age) - __field(u8, becn) - __field(u8, fecn) + __field(bool, becn) + __field(bool, fecn) __field(u8, l4) __field(u8, lnh) __field(u8, lver) -- cgit From aca7f4fc320b5a507da4a41454582440f65cde4c Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Thu, 1 Feb 2018 10:46:46 -0800 Subject: IB/hfi1: Optimize process_receive_ib() The arguments for trace_hfi1_rcvhdr() get computed every time in the hot path regardless of the whether the trace is on or off. This is seen to be costly with a profile. The handling of fault inject isolates the verbs device for all packets regardless of the presence of a RHF_DC_ERR error. Fix the first by computing trace_hfi1_rcvhdr() arguments within the trace itself, so that when the trace is off, the argument data isn't computed. Fix the second by moving the error check to handle_eflags() when an RHF error occurs and by testing for RHF_DC_ERR before executing the reset of handle_eflags(). Reviewed-by: Don Hiatt Reviewed-by: Mike Marciniszyn Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/driver.c | 21 +++++++-------------- drivers/infiniband/hw/hfi1/trace_rx.h | 28 ++++++++++------------------ 2 files changed, 17 insertions(+), 32 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 0a9bc1875d53..98703f1ce7ac 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -256,7 +256,12 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, u32 mlid_base; struct hfi1_ibport *ibp = rcd_to_iport(rcd); struct hfi1_devdata *dd = ppd->dd; - struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; + struct hfi1_ibdev *verbs_dev = &dd->verbs_dev; + struct rvt_dev_info *rdi = &verbs_dev->rdi; + + if ((packet->rhf & RHF_DC_ERR) && + hfi1_dbg_fault_suppress_err(verbs_dev)) + return; if (packet->rhf & (RHF_VCRC_ERR | RHF_ICRC_ERR)) return; @@ -1552,19 +1557,7 @@ int process_receive_ib(struct hfi1_packet *packet) if (hfi1_setup_9B_packet(packet)) return RHF_RCV_CONTINUE; - trace_hfi1_rcvhdr(packet->rcd->ppd->dd, - packet->rcd->ctxt, - rhf_err_flags(packet->rhf), - RHF_RCV_TYPE_IB, - packet->hlen, - packet->tlen, - packet->updegr, - rhf_egr_index(packet->rhf)); - - if (unlikely( - (hfi1_dbg_fault_suppress_err(&packet->rcd->dd->verbs_dev) && - (packet->rhf & RHF_DC_ERR)))) - return RHF_RCV_CONTINUE; + trace_hfi1_rcvhdr(packet, RHF_RCV_TYPE_IB); if (unlikely(rhf_err_flags(packet->rhf))) { handle_eflags(packet); diff --git a/drivers/infiniband/hw/hfi1/trace_rx.h b/drivers/infiniband/hw/hfi1/trace_rx.h index 4d487fee105d..f76841595e08 100644 --- a/drivers/infiniband/hw/hfi1/trace_rx.h +++ b/drivers/infiniband/hw/hfi1/trace_rx.h @@ -63,17 +63,9 @@ __print_symbolic(type, \ #define TRACE_SYSTEM hfi1_rx TRACE_EVENT(hfi1_rcvhdr, - TP_PROTO(struct hfi1_devdata *dd, - u32 ctxt, - u64 eflags, - u32 etype, - u32 hlen, - u32 tlen, - u32 updegr, - u32 etail - ), - TP_ARGS(dd, ctxt, eflags, etype, hlen, tlen, updegr, etail), - TP_STRUCT__entry(DD_DEV_ENTRY(dd) + TP_PROTO(struct hfi1_packet *packet, u32 etype), + TP_ARGS(packet, etype), + TP_STRUCT__entry(DD_DEV_ENTRY(packet->rcd->dd) __field(u64, eflags) __field(u32, ctxt) __field(u32, etype) @@ -82,14 +74,14 @@ TRACE_EVENT(hfi1_rcvhdr, __field(u32, updegr) __field(u32, etail) ), - TP_fast_assign(DD_DEV_ASSIGN(dd); - __entry->eflags = eflags; - __entry->ctxt = ctxt; + TP_fast_assign(DD_DEV_ASSIGN(packet->rcd->dd); + __entry->eflags = rhf_err_flags(packet->rhf); + __entry->ctxt = packet->rcd->ctxt; __entry->etype = etype; - __entry->hlen = hlen; - __entry->tlen = tlen; - __entry->updegr = updegr; - __entry->etail = etail; + __entry->hlen = packet->hlen; + __entry->tlen = packet->tlen; + __entry->updegr = packet->updegr; + __entry->etail = rhf_egr_index(packet->rhf); ), TP_printk( "[%s] ctxt %d eflags 0x%llx etype %d,%s hlen %d tlen %d updegr %d etail %d", -- cgit From 6391214f4d80e32caf622bacab64ed99ed43e1eb Mon Sep 17 00:00:00 2001 From: Bartlomiej Dudek Date: Thu, 1 Feb 2018 10:52:20 -0800 Subject: IB/hfi1: Do not override given pcie_pset value During PCIe Gen 3 transistion, pcie_pset is read and might be overridden to a default value(i.e. 255) in do_pcie_gen3_transition() routine. If the pcie_pset value is overridden then this new value will be used during initialization of next adapter on a different card. Introducing a new local variable to avoid modification of pcie_pset Reviewed-by: Dennis Dalessandro Signed-off-by: Bartlomiej Dudek Signed-off-by: Patel Jay P Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/pcie.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index 8c7e7a60b715..83d66e862207 100644 --- a/drivers/infiniband/hw/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c @@ -1034,6 +1034,7 @@ int do_pcie_gen3_transition(struct hfi1_devdata *dd) int do_retry, retry_count = 0; int intnum = 0; uint default_pset; + uint pset = pcie_pset; u16 target_vector, target_speed; u16 lnkctl2, vendor; u8 div; @@ -1201,16 +1202,16 @@ retry: * * Set Gen3EqPsetReqVec, leave other fields 0. */ - if (pcie_pset == UNSET_PSET) - pcie_pset = default_pset; - if (pcie_pset > 10) { /* valid range is 0-10, inclusive */ + if (pset == UNSET_PSET) + pset = default_pset; + if (pset > 10) { /* valid range is 0-10, inclusive */ dd_dev_err(dd, "%s: Invalid Eq Pset %u, setting to %d\n", - __func__, pcie_pset, default_pset); - pcie_pset = default_pset; + __func__, pset, default_pset); + pset = default_pset; } - dd_dev_info(dd, "%s: using EQ Pset %u\n", __func__, pcie_pset); + dd_dev_info(dd, "%s: using EQ Pset %u\n", __func__, pset); pci_write_config_dword(dd->pcidev, PCIE_CFG_REG_PL106, - ((1 << pcie_pset) << + ((1 << pset) << PCIE_CFG_REG_PL106_GEN3_EQ_PSET_REQ_VEC_SHIFT) | PCIE_CFG_REG_PL106_GEN3_EQ_EVAL2MS_DISABLE_SMASK | PCIE_CFG_REG_PL106_GEN3_EQ_PHASE23_EXIT_MODE_SMASK); @@ -1240,10 +1241,10 @@ retry: /* apply static CTLE tunings */ u8 pcie_dc, pcie_lf, pcie_hf, pcie_bw; - pcie_dc = ctle_tunings[pcie_pset][0]; - pcie_lf = ctle_tunings[pcie_pset][1]; - pcie_hf = ctle_tunings[pcie_pset][2]; - pcie_bw = ctle_tunings[pcie_pset][3]; + pcie_dc = ctle_tunings[pset][0]; + pcie_lf = ctle_tunings[pset][1]; + pcie_hf = ctle_tunings[pset][2]; + pcie_bw = ctle_tunings[pset][3]; write_gasket_interrupt(dd, intnum++, 0x0026, 0x0200 | pcie_dc); write_gasket_interrupt(dd, intnum++, 0x0026, 0x0100 | pcie_lf); write_gasket_interrupt(dd, intnum++, 0x0026, 0x0000 | pcie_hf); -- cgit From 0719007663ce2d5da653ec1dc3bcfe2ab681b964 Mon Sep 17 00:00:00 2001 From: Kamenee Arumugam Date: Thu, 1 Feb 2018 10:52:28 -0800 Subject: IB/hfi1: Convert PortXmitWait/PortVLXmitWait counters to flit times HFI's counters SendWaitCnt and SendWaitVlCnt are in units of TXE cycle time (at 805MHz). OPA counters PortXmitWait and PortVLXmtWait are in units of flit times. Convert the counter values to flit units using following conversion formula: PortXmitWait = SendWaitCnt * 2 * (4 /link_width) * (25 Gbps /link_speed) PortVLXmitWait = SendWaitVLCnt * 2 * (4 /link_width) * (25 Gbps /link_speed) At link up or downgrade events, the link width can change. To ensure accurate counter calculations, sample the counters after the events, during counter requests, and then aggregate the OPA counters. Reviewed-by: Michael J. Ruhl Signed-off-by: Kamenee Arumugam Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/chip.c | 64 +++++++++++++++++-- drivers/infiniband/hw/hfi1/chip.h | 4 +- drivers/infiniband/hw/hfi1/hfi.h | 7 +++ drivers/infiniband/hw/hfi1/init.c | 9 +++ drivers/infiniband/hw/hfi1/mad.c | 127 +++++++++++++++++++++++++++++++++++--- drivers/infiniband/hw/hfi1/mad.h | 47 +++++++++++++- 6 files changed, 239 insertions(+), 19 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 57f0df2f84f2..e6a60fa59f2b 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1083,6 +1083,7 @@ static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp, static void clear_full_mgmt_pkey(struct hfi1_pportdata *ppd); static int wait_link_transfer_active(struct hfi1_devdata *dd, int wait_ms); static void clear_rsm_rule(struct hfi1_devdata *dd, u8 rule_index); +static void update_xmit_counters(struct hfi1_pportdata *ppd, u16 link_width); /* * Error interrupt table entry. This is used as input to the interrupt @@ -6905,6 +6906,32 @@ void handle_freeze(struct work_struct *work) /* no longer frozen */ } +/** + * update_xmit_counters - update PortXmitWait/PortVlXmitWait + * counters. + * @ppd: info of physical Hfi port + * @link_width: new link width after link up or downgrade + * + * Update the PortXmitWait and PortVlXmitWait counters after + * a link up or downgrade event to reflect a link width change. + */ +static void update_xmit_counters(struct hfi1_pportdata *ppd, u16 link_width) +{ + int i; + u16 tx_width; + u16 link_speed; + + tx_width = tx_link_width(link_width); + link_speed = get_link_speed(ppd->link_speed_active); + + /* + * There are C_VL_COUNT number of PortVLXmitWait counters. + * Adding 1 to C_VL_COUNT to include the PortXmitWait counter. + */ + for (i = 0; i < C_VL_COUNT + 1; i++) + get_xmit_wait_counters(ppd, tx_width, link_speed, i); +} + /* * Handle a link up interrupt from the 8051. * @@ -7526,18 +7553,29 @@ void handle_verify_cap(struct work_struct *work) set_link_state(ppd, HLS_GOING_UP); } -/* - * Apply the link width downgrade enabled policy against the current active - * link widths. +/** + * apply_link_downgrade_policy - Apply the link width downgrade enabled + * policy against the current active link widths. + * @ppd: info of physical Hfi port + * @refresh_widths: True indicates link downgrade event + * @return: True indicates a successful link downgrade. False indicates + * link downgrade event failed and the link will bounce back to + * default link width. * - * Called when the enabled policy changes or the active link widths change. + * Called when the enabled policy changes or the active link widths + * change. + * Refresh_widths indicates that a link downgrade occurred. The + * link_downgraded variable is set by refresh_widths and + * determines the success/failure of the policy application. */ -void apply_link_downgrade_policy(struct hfi1_pportdata *ppd, int refresh_widths) +bool apply_link_downgrade_policy(struct hfi1_pportdata *ppd, + bool refresh_widths) { int do_bounce = 0; int tries; u16 lwde; u16 tx, rx; + bool link_downgraded = refresh_widths; /* use the hls lock to avoid a race with actual link up */ tries = 0; @@ -7571,6 +7609,7 @@ retry: ppd->link_width_downgrade_rx_active == 0) { /* the 8051 reported a dead link as a downgrade */ dd_dev_err(ppd->dd, "Link downgrade is really a link down, ignoring\n"); + link_downgraded = false; } else if (lwde == 0) { /* downgrade is disabled */ @@ -7587,6 +7626,7 @@ retry: ppd->link_width_downgrade_tx_active, ppd->link_width_downgrade_rx_active); do_bounce = 1; + link_downgraded = false; } } else if ((lwde & ppd->link_width_downgrade_tx_active) == 0 || (lwde & ppd->link_width_downgrade_rx_active) == 0) { @@ -7598,6 +7638,7 @@ retry: lwde, ppd->link_width_downgrade_tx_active, ppd->link_width_downgrade_rx_active); do_bounce = 1; + link_downgraded = false; } done: @@ -7609,6 +7650,8 @@ done: set_link_state(ppd, HLS_DN_OFFLINE); start_link(ppd); } + + return link_downgraded; } /* @@ -7622,7 +7665,8 @@ void handle_link_downgrade(struct work_struct *work) link_downgrade_work); dd_dev_info(ppd->dd, "8051: Link width downgrade\n"); - apply_link_downgrade_policy(ppd, 1); + if (apply_link_downgrade_policy(ppd, true)) + update_xmit_counters(ppd, ppd->link_width_downgrade_tx_active); } static char *dcc_err_string(char *buf, int buf_len, u64 flags) @@ -10597,6 +10641,14 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) add_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK); handle_linkup_change(dd, 1); + + /* + * After link up, a new link width will have been set. + * Update the xmit counters with regards to the new + * link width. + */ + update_xmit_counters(ppd, ppd->link_width_active); + ppd->host_link_state = HLS_UP_INIT; update_statusp(ppd, IB_PORT_INIT); break; diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index 21fca8ec5076..c0d70f255050 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -736,8 +736,8 @@ int read_8051_config(struct hfi1_devdata *, u8, u8, u32 *); int start_link(struct hfi1_pportdata *ppd); int bringup_serdes(struct hfi1_pportdata *ppd); void set_intr_state(struct hfi1_devdata *dd, u32 enable); -void apply_link_downgrade_policy(struct hfi1_pportdata *ppd, - int refresh_widths); +bool apply_link_downgrade_policy(struct hfi1_pportdata *ppd, + bool refresh_widths); void update_usrhead(struct hfi1_ctxtdata *rcd, u32 hd, u32 updegr, u32 egrhd, u32 intr_adjust, u32 npkts); int stop_drain_data_vls(struct hfi1_devdata *dd); diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 105d11dcc554..90bc8c76d2ca 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -858,6 +858,13 @@ struct hfi1_pportdata { struct work_struct linkstate_active_work; /* Does this port need to prescan for FECNs */ bool cc_prescan; + /* + * Sample sendWaitCnt & sendWaitVlCnt during link transition + * and counter request. + */ + u64 port_vl_xmit_wait_last[C_VL_COUNT + 1]; + u16 prev_link_width; + u64 vl_xmit_flit_cnt[C_VL_COUNT + 1]; }; typedef int (*rhf_rcv_function_ptr)(struct hfi1_packet *packet); diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 4c51a75b238a..8c4f04032b28 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -637,6 +637,15 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd, ppd->dd = dd; ppd->hw_pidx = hw_pidx; ppd->port = port; /* IB port number, not index */ + ppd->prev_link_width = LINK_WIDTH_DEFAULT; + /* + * There are C_VL_COUNT number of PortVLXmitWait counters. + * Adding 1 to C_VL_COUNT to include the PortXmitWait counter. + */ + for (i = 0; i < C_VL_COUNT + 1; i++) { + ppd->port_vl_xmit_wait_last[i] = 0; + ppd->vl_xmit_flit_cnt[i] = 0; + } default_pkey_idx = 1; diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 34547a48a445..e9962c65c68f 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -2649,6 +2649,79 @@ static void a0_portstatus(struct hfi1_pportdata *ppd, } } +/** + * tx_link_width - convert link width bitmask to integer + * value representing actual link width. + * @link_width: width of active link + * @return: return index of the bit set in link_width var + * + * The function convert and return the index of bit set + * that indicate the current link width. + */ +u16 tx_link_width(u16 link_width) +{ + int n = LINK_WIDTH_DEFAULT; + u16 tx_width = n; + + while (link_width && n) { + if (link_width & (1 << (n - 1))) { + tx_width = n; + break; + } + n--; + } + + return tx_width; +} + +/** + * get_xmit_wait_counters - Convert HFI 's SendWaitCnt/SendWaitVlCnt + * counter in unit of TXE cycle times to flit times. + * @ppd: info of physical Hfi port + * @link_width: width of active link + * @link_speed: speed of active link + * @vl: represent VL0-VL7, VL15 for PortVLXmitWait counters request + * and if vl value is C_VL_COUNT, it represent SendWaitCnt + * counter request + * @return: return SendWaitCnt/SendWaitVlCnt counter value per vl. + * + * Convert SendWaitCnt/SendWaitVlCnt counter from TXE cycle times to + * flit times. Call this function to samples these counters. This + * function will calculate for previous state transition and update + * current state at end of function using ppd->prev_link_width and + * ppd->port_vl_xmit_wait_last to port_vl_xmit_wait_curr and link_width. + */ +u64 get_xmit_wait_counters(struct hfi1_pportdata *ppd, + u16 link_width, u16 link_speed, int vl) +{ + u64 port_vl_xmit_wait_curr; + u64 delta_vl_xmit_wait; + u64 xmit_wait_val; + + if (vl > C_VL_COUNT) + return 0; + if (vl < C_VL_COUNT) + port_vl_xmit_wait_curr = + read_port_cntr(ppd, C_TX_WAIT_VL, vl); + else + port_vl_xmit_wait_curr = + read_port_cntr(ppd, C_TX_WAIT, CNTR_INVALID_VL); + + xmit_wait_val = + port_vl_xmit_wait_curr - + ppd->port_vl_xmit_wait_last[vl]; + delta_vl_xmit_wait = + convert_xmit_counter(xmit_wait_val, + ppd->prev_link_width, + link_speed); + + ppd->vl_xmit_flit_cnt[vl] += delta_vl_xmit_wait; + ppd->port_vl_xmit_wait_last[vl] = port_vl_xmit_wait_curr; + ppd->prev_link_width = link_width; + + return ppd->vl_xmit_flit_cnt[vl]; +} + static int pma_get_opa_portstatus(struct opa_pma_mad *pmp, struct ib_device *ibdev, u8 port, u32 *resp_len) @@ -2668,6 +2741,8 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp, struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); int vfi; u64 tmp, tmp2; + u16 link_width; + u16 link_speed; response_data_size = sizeof(struct opa_port_status_rsp) + num_vls * sizeof(struct _vls_pctrs); @@ -2711,8 +2786,16 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp, rsp->port_multicast_rcv_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_MC_RCV_PKTS, CNTR_INVALID_VL)); + /* + * Convert PortXmitWait counter from TXE cycle times + * to flit times. + */ + link_width = + tx_link_width(ppd->link_width_downgrade_tx_active); + link_speed = get_link_speed(ppd->link_speed_active); rsp->port_xmit_wait = - cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT, CNTR_INVALID_VL)); + cpu_to_be64(get_xmit_wait_counters(ppd, link_width, + link_speed, C_VL_COUNT)); rsp->port_rcv_fecn = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN, CNTR_INVALID_VL)); rsp->port_rcv_becn = @@ -2777,10 +2860,14 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp, rsp->vls[vfi].port_vl_xmit_pkts = cpu_to_be64(read_port_cntr(ppd, C_TX_PKT_VL, idx_from_vl(vl))); - + /* + * Convert PortVlXmitWait counter from TXE cycle + * times to flit times. + */ rsp->vls[vfi].port_vl_xmit_wait = - cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT_VL, - idx_from_vl(vl))); + cpu_to_be64(get_xmit_wait_counters(ppd, link_width, + link_speed, + idx_from_vl(vl))); rsp->vls[vfi].port_vl_rcv_fecn = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN_VL, @@ -2910,6 +2997,8 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp, unsigned long vl; u32 vl_select_mask; int vfi; + u16 link_width; + u16 link_speed; num_ports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24; num_vls = hweight32(be32_to_cpu(req->vl_select_mask)); @@ -2959,8 +3048,16 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp, rsp->link_quality_indicator = cpu_to_be32((u32)lq); pma_get_opa_port_dctrs(ibdev, rsp); + /* + * Convert PortXmitWait counter from TXE + * cycle times to flit times. + */ + link_width = + tx_link_width(ppd->link_width_downgrade_tx_active); + link_speed = get_link_speed(ppd->link_speed_active); rsp->port_xmit_wait = - cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT, CNTR_INVALID_VL)); + cpu_to_be64(get_xmit_wait_counters(ppd, link_width, + link_speed, C_VL_COUNT)); rsp->port_rcv_fecn = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN, CNTR_INVALID_VL)); rsp->port_rcv_becn = @@ -2996,9 +3093,14 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp, cpu_to_be64(read_dev_cntr(dd, C_DC_RX_PKT_VL, idx_from_vl(vl))); + /* + * Convert PortVlXmitWait counter from TXE + * cycle times to flit times. + */ rsp->vls[vfi].port_vl_xmit_wait = - cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT_VL, - idx_from_vl(vl))); + cpu_to_be64(get_xmit_wait_counters(ppd, link_width, + link_speed, + idx_from_vl(vl))); rsp->vls[vfi].port_vl_rcv_fecn = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN_VL, @@ -3416,9 +3518,11 @@ static int pma_set_opa_portstatus(struct opa_pma_mad *pmp, if (counter_select & CS_PORT_MCAST_RCV_PKTS) write_dev_cntr(dd, C_DC_MC_RCV_PKTS, CNTR_INVALID_VL, 0); - if (counter_select & CS_PORT_XMIT_WAIT) + if (counter_select & CS_PORT_XMIT_WAIT) { write_port_cntr(ppd, C_TX_WAIT, CNTR_INVALID_VL, 0); - + ppd->port_vl_xmit_wait_last[C_VL_COUNT] = 0; + ppd->vl_xmit_flit_cnt[C_VL_COUNT] = 0; + } /* ignore cs_sw_portCongestion for HFIs */ if (counter_select & CS_PORT_RCV_FECN) @@ -3491,8 +3595,11 @@ static int pma_set_opa_portstatus(struct opa_pma_mad *pmp, if (counter_select & CS_PORT_RCV_PKTS) write_dev_cntr(dd, C_DC_RX_PKT_VL, idx_from_vl(vl), 0); - if (counter_select & CS_PORT_XMIT_WAIT) + if (counter_select & CS_PORT_XMIT_WAIT) { write_port_cntr(ppd, C_TX_WAIT_VL, idx_from_vl(vl), 0); + ppd->port_vl_xmit_wait_last[idx_from_vl(vl)] = 0; + ppd->vl_xmit_flit_cnt[idx_from_vl(vl)] = 0; + } /* sw_port_vl_congestion is 0 for HFIs */ if (counter_select & CS_PORT_RCV_FECN) diff --git a/drivers/infiniband/hw/hfi1/mad.h b/drivers/infiniband/hw/hfi1/mad.h index c4938f3d97c8..2f48e6953629 100644 --- a/drivers/infiniband/hw/hfi1/mad.h +++ b/drivers/infiniband/hw/hfi1/mad.h @@ -180,6 +180,15 @@ struct opa_mad_notice_attr { #define OPA_VLARB_PREEMPT_MATRIX 3 #define IB_PMA_PORT_COUNTERS_CONG cpu_to_be16(0xFF00) +#define LINK_SPEED_25G 1 +#define LINK_SPEED_12_5G 2 +#define LINK_WIDTH_DEFAULT 4 +#define DECIMAL_FACTORING 1000 +/* + * The default link width is multiplied by 1000 + * to get accurate value after division. + */ +#define FACTOR_LINK_WIDTH (LINK_WIDTH_DEFAULT * DECIMAL_FACTORING) struct ib_pma_portcounters_cong { u8 reserved; @@ -429,5 +438,41 @@ struct sc2vlnt { void hfi1_event_pkey_change(struct hfi1_devdata *dd, u8 port); void hfi1_handle_trap_timer(struct timer_list *t); - +u16 tx_link_width(u16 link_width); +u64 get_xmit_wait_counters(struct hfi1_pportdata *ppd, u16 link_width, + u16 link_speed, int vl); +/** + * get_link_speed - determine whether 12.5G or 25G speed + * @link_speed: the speed of active link + * @return: Return 2 if link speed identified as 12.5G + * or return 1 if link speed is 25G. + * + * The function indirectly calculate required link speed + * value for convert_xmit_counter function. If the link + * speed is 25G, the function return as 1 as it is required + * by xmit counter conversion formula :-( 25G / link_speed). + * This conversion will provide value 1 if current + * link speed is 25G or 2 if 12.5G.This is done to avoid + * 12.5 float number conversion. + */ +static inline u16 get_link_speed(u16 link_speed) +{ + return (link_speed == 1) ? + LINK_SPEED_12_5G : LINK_SPEED_25G; +} + +/** + * convert_xmit_counter - calculate flit times for given xmit counter + * value + * @xmit_wait_val: current xmit counter value + * @link_width: width of active link + * @link_speed: speed of active link + * @return: return xmit counter value in flit times. + */ +static inline u64 convert_xmit_counter(u64 xmit_wait_val, u16 link_width, + u16 link_speed) +{ + return (xmit_wait_val * 2 * (FACTOR_LINK_WIDTH / link_width) + * link_speed) / DECIMAL_FACTORING; +} #endif /* _HFI1_MAD_H */ -- cgit From 78d3633ba9c2351fc869271ee00284f46e6b15b2 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Thu, 1 Feb 2018 10:52:35 -0800 Subject: IB/hfi1: Remove blind constants from 16B update These values were introduced as part of the 16B code to account for the varying size of the LRH between the differing packet formats. Replace the blind constants with defines based on FIELD_SIZEOF() calls. Fixes: 5b6cabb0db77 ("IB/hfi1: Add 16B RC/UC support") Reviewed-by: Don Hiatt Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/driver.c | 6 ++++-- drivers/infiniband/hw/hfi1/ruc.c | 13 ++++++------- drivers/infiniband/hw/hfi1/ud.c | 15 ++++++++++----- drivers/infiniband/hw/hfi1/verbs.h | 5 +++++ 4 files changed, 25 insertions(+), 14 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 98703f1ce7ac..c7e6b670e809 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -1499,8 +1499,10 @@ static int hfi1_setup_bypass_packet(struct hfi1_packet *packet) /* Query commonly used fields from packet header */ packet->opcode = ib_bth_get_opcode(packet->ohdr); - packet->hlen = hdr_len_by_opcode[packet->opcode] + 8 + grh_len; - packet->payload = packet->ebuf + packet->hlen - (4 * sizeof(u32)); + /* hdr_len_by_opcode already has an IB LRH factored in */ + packet->hlen = hdr_len_by_opcode[packet->opcode] + + (LRH_16B_BYTES - LRH_9B_BYTES) + grh_len; + packet->payload = packet->ebuf + packet->hlen - LRH_16B_BYTES; packet->slid = hfi1_16B_get_slid(packet->hdr); packet->dlid = hfi1_16B_get_dlid(packet->hdr); if (unlikely(hfi1_is_16B_mcast(packet->dlid))) diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index 425272210da0..edef320d745d 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -758,8 +758,6 @@ static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp, struct ib_grh *grh; struct ib_global_route *grd = rdma_ah_retrieve_grh(&qp->remote_ah_attr); - int hdrwords; - /* * Ensure OPA GIDs are transformed to IB gids * before creating the GRH. @@ -768,9 +766,10 @@ static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp, grd->sgid_index = 0; grh = &ps->s_txreq->phdr.hdr.opah.u.l.grh; l4 = OPA_16B_L4_IB_GLOBAL; - hdrwords = ps->s_txreq->hdr_dwords - 4; - ps->s_txreq->hdr_dwords += hfi1_make_grh(ibp, grh, grd, - hdrwords, nwords); + ps->s_txreq->hdr_dwords += + hfi1_make_grh(ibp, grh, grd, + ps->s_txreq->hdr_dwords - LRH_16B_DWORDS, + nwords); middle = 0; } @@ -824,13 +823,13 @@ static inline void hfi1_make_ruc_header_9B(struct rvt_qp *qp, if (unlikely(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)) { struct ib_grh *grh = &ps->s_txreq->phdr.hdr.ibh.u.l.grh; - int hdrwords = ps->s_txreq->hdr_dwords - 2; lrh0 = HFI1_LRH_GRH; ps->s_txreq->hdr_dwords += hfi1_make_grh(ibp, grh, rdma_ah_read_grh(&qp->remote_ah_attr), - hdrwords, nwords); + ps->s_txreq->hdr_dwords - LRH_9B_DWORDS, + nwords); middle = 0; } lrh0 |= (priv->s_sc & 0xf) << 12 | diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index cff2fd8907f5..066afbcfc064 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -348,7 +348,8 @@ void hfi1_make_ud_req_9B(struct rvt_qp *qp, struct hfi1_pkt_state *ps, grh = &ps->s_txreq->phdr.hdr.ibh.u.l.grh; ps->s_txreq->hdr_dwords += hfi1_make_grh(ibp, grh, rdma_ah_read_grh(ah_attr), - ps->s_txreq->hdr_dwords - 2, nwords); + ps->s_txreq->hdr_dwords - LRH_9B_DWORDS, + nwords); lrh0 = HFI1_LRH_GRH; ohdr = &ps->s_txreq->phdr.hdr.ibh.u.l.oth; } else { @@ -428,8 +429,10 @@ void hfi1_make_ud_req_16B(struct rvt_qp *qp, struct hfi1_pkt_state *ps, grd->sgid_index = 0; } grh = &ps->s_txreq->phdr.hdr.opah.u.l.grh; - ps->s_txreq->hdr_dwords += hfi1_make_grh(ibp, grh, grd, - ps->s_txreq->hdr_dwords - 4, nwords); + ps->s_txreq->hdr_dwords += hfi1_make_grh( + ibp, grh, grd, + ps->s_txreq->hdr_dwords - LRH_16B_DWORDS, + nwords); ohdr = &ps->s_txreq->phdr.hdr.opah.u.l.oth; l4 = OPA_16B_L4_IB_GLOBAL; } else { @@ -648,7 +651,8 @@ void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp, struct ib_grh *grh = &hdr.u.l.grh; grh->version_tclass_flow = old_grh->version_tclass_flow; - grh->paylen = cpu_to_be16((hwords - 4 + nwords) << 2); + grh->paylen = cpu_to_be16( + (hwords - LRH_16B_DWORDS + nwords) << 2); grh->hop_limit = 0xff; grh->sgid = old_grh->dgid; grh->dgid = old_grh->sgid; @@ -702,7 +706,8 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn, struct ib_grh *grh = &hdr.u.l.grh; grh->version_tclass_flow = old_grh->version_tclass_flow; - grh->paylen = cpu_to_be16((hwords - 2 + SIZE_OF_CRC) << 2); + grh->paylen = cpu_to_be16( + (hwords - LRH_9B_DWORDS + SIZE_OF_CRC) << 2); grh->hop_limit = 0xff; grh->sgid = old_grh->dgid; grh->dgid = old_grh->sgid; diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index 4ea040df334b..2d787b8346ca 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -105,6 +105,11 @@ enum { HFI1_HAS_GRH = (1 << 0), }; +#define LRH_16B_BYTES (FIELD_SIZEOF(struct hfi1_16b_header, lrh)) +#define LRH_16B_DWORDS (LRH_16B_BYTES / sizeof(u32)) +#define LRH_9B_BYTES (FIELD_SIZEOF(struct ib_header, lrh)) +#define LRH_9B_DWORDS (LRH_9B_BYTES / sizeof(u32)) + struct hfi1_16b_header { u32 lrh[4]; union { -- cgit From b5de809ef6f6c32cfbbf0da14ed11b0fdc0888f8 Mon Sep 17 00:00:00 2001 From: Mitko Haralanov Date: Thu, 1 Feb 2018 10:52:43 -0800 Subject: IB/hfi1: Show fault stats in both TX and RX directions The routine which shows the fault stats checks the counters to determine whether to show any stats based on the number of transmitted pkts/bytes for a particular opcode. Unfortunately, it only checked the receive counters. As a result, if any packet faults have happened for packets egressing the HFI, those stats would not be shown. In order to fix this, the routine is amended to also check the TX counters. With this change the pkt/byte counts are the sum of both TX and RX counts for the opcode. Fixes: 1b311f8931cf ("IB/hfi1: Add tx_opcode_stats like the opcode_stats") Reviewed-by: Don Hiatt Reviewed-by: Michael J. Ruhl Signed-off-by: Mitko Haralanov Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/debugfs.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c index 2e6e0c516041..852173bf05d0 100644 --- a/drivers/infiniband/hw/hfi1/debugfs.c +++ b/drivers/infiniband/hw/hfi1/debugfs.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015-2017 Intel Corporation. + * Copyright(c) 2015-2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -1201,6 +1201,13 @@ static int _fault_stats_seq_show(struct seq_file *s, void *v) } hfi1_rcd_put(rcd); } + for_each_possible_cpu(j) { + struct hfi1_opcode_stats_perctx *sp = + per_cpu_ptr(dd->tx_opstats, j); + + n_packets += sp->stats[i].n_packets; + n_bytes += sp->stats[i].n_bytes; + } if (!n_packets && !n_bytes) return SEQ_SKIP; if (!ibd->fault_opcode->n_rxfaults[i] && -- cgit From 953a9cebeab43f33baed79d1a9ef643bfb249c4b Mon Sep 17 00:00:00 2001 From: Kamenee Arumugam Date: Thu, 1 Feb 2018 12:37:30 -0800 Subject: IB/hfi1: Convert kzalloc_node and kcalloc to use kcalloc_node Kzalloc_node API doesn't check for overflows in size multiplication. While kcalloc API check for overflows in size multiplication but these implementations are not NUMA-aware. This conversion allowed for correcting an allocation used in the hot path to be on the local NUMA and ensure us overflow free multiplication for the size of a memory allocation. Reviewed-by: Mike Marciniszyn Signed-off-by: Kamenee Arumugam Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/init.c | 17 +++++++++-------- drivers/infiniband/hw/hfi1/pio.c | 15 ++++++++------- drivers/infiniband/hw/hfi1/sdma.c | 3 ++- 3 files changed, 19 insertions(+), 16 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 8c4f04032b28..33eba2356742 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -172,7 +172,7 @@ int hfi1_create_kctxts(struct hfi1_devdata *dd) u16 i; int ret; - dd->rcd = kzalloc_node(dd->num_rcv_contexts * sizeof(*dd->rcd), + dd->rcd = kcalloc_node(dd->num_rcv_contexts, sizeof(*dd->rcd), GFP_KERNEL, dd->node); if (!dd->rcd) return -ENOMEM; @@ -439,15 +439,16 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa, * The resulting value will be rounded down to the closest * multiple of dd->rcv_entries.group_size. */ - rcd->egrbufs.buffers = kzalloc_node( - rcd->egrbufs.count * sizeof(*rcd->egrbufs.buffers), - GFP_KERNEL, numa); + rcd->egrbufs.buffers = + kcalloc_node(rcd->egrbufs.count, + sizeof(*rcd->egrbufs.buffers), + GFP_KERNEL, numa); if (!rcd->egrbufs.buffers) goto bail; - rcd->egrbufs.rcvtids = kzalloc_node( - rcd->egrbufs.count * - sizeof(*rcd->egrbufs.rcvtids), - GFP_KERNEL, numa); + rcd->egrbufs.rcvtids = + kcalloc_node(rcd->egrbufs.count, + sizeof(*rcd->egrbufs.rcvtids), + GFP_KERNEL, numa); if (!rcd->egrbufs.rcvtids) goto bail; rcd->egrbufs.size = eager_buffer_size; diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index 4c1198bc5e70..40dac4d16eb8 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -455,8 +455,8 @@ int init_send_contexts(struct hfi1_devdata *dd) dd->hw_to_sw = kmalloc_array(TXE_NUM_CONTEXTS, sizeof(u8), GFP_KERNEL); dd->send_contexts = kcalloc(dd->num_send_contexts, - sizeof(struct send_context_info), - GFP_KERNEL); + sizeof(struct send_context_info), + GFP_KERNEL); if (!dd->send_contexts || !dd->hw_to_sw) { kfree(dd->hw_to_sw); kfree(dd->send_contexts); @@ -856,8 +856,9 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type, * so head == tail can mean empty. */ sc->sr_size = sci->credits + 1; - sc->sr = kzalloc_node(sizeof(union pio_shadow_ring) * - sc->sr_size, GFP_KERNEL, numa); + sc->sr = kcalloc_node(sc->sr_size, + sizeof(union pio_shadow_ring), + GFP_KERNEL, numa); if (!sc->sr) { sc_free(sc); return NULL; @@ -1958,9 +1959,9 @@ int init_pervl_scs(struct hfi1_devdata *dd) hfi1_init_ctxt(dd->vld[15].sc); dd->vld[15].mtu = enum_to_mtu(OPA_MTU_2048); - dd->kernel_send_context = kzalloc_node(dd->num_send_contexts * - sizeof(struct send_context *), - GFP_KERNEL, dd->node); + dd->kernel_send_context = kcalloc_node(dd->num_send_contexts, + sizeof(struct send_context *), + GFP_KERNEL, dd->node); if (!dd->kernel_send_context) goto freesc15; diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 37424a81a7c9..d8ddbfdf3a4d 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -1389,7 +1389,8 @@ int sdma_init(struct hfi1_devdata *dd, u8 port) num_engines, descq_cnt); /* alloc memory for array of send engines */ - dd->per_sdma = kcalloc(num_engines, sizeof(*dd->per_sdma), GFP_KERNEL); + dd->per_sdma = kcalloc_node(num_engines, sizeof(*dd->per_sdma), + GFP_KERNEL, dd->node); if (!dd->per_sdma) return ret; -- cgit From 6197a815fe9c6e28523eede3d69fa5a8dd7052db Mon Sep 17 00:00:00 2001 From: Don Hiatt Date: Thu, 1 Feb 2018 12:38:48 -0800 Subject: IB/hfi1: Add 16B rcvhdr trace support Add trace_hfi1_rcvhdr support for bypass packets. While here, remove the etype argument as it is available in struct hfi1_packet. Reviewed-by: Mike Marciniszyn Signed-off-by: Don Hiatt Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/driver.c | 4 +++- drivers/infiniband/hw/hfi1/trace_rx.h | 6 +++--- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband/hw/hfi1') diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index c7e6b670e809..addc68e83606 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -1559,7 +1559,7 @@ int process_receive_ib(struct hfi1_packet *packet) if (hfi1_setup_9B_packet(packet)) return RHF_RCV_CONTINUE; - trace_hfi1_rcvhdr(packet, RHF_RCV_TYPE_IB); + trace_hfi1_rcvhdr(packet); if (unlikely(rhf_err_flags(packet->rhf))) { handle_eflags(packet); @@ -1595,6 +1595,8 @@ int process_receive_bypass(struct hfi1_packet *packet) if (hfi1_setup_bypass_packet(packet)) return RHF_RCV_CONTINUE; + trace_hfi1_rcvhdr(packet); + if (unlikely(rhf_err_flags(packet->rhf))) { handle_eflags(packet); return RHF_RCV_CONTINUE; diff --git a/drivers/infiniband/hw/hfi1/trace_rx.h b/drivers/infiniband/hw/hfi1/trace_rx.h index f76841595e08..7eceb57e0415 100644 --- a/drivers/infiniband/hw/hfi1/trace_rx.h +++ b/drivers/infiniband/hw/hfi1/trace_rx.h @@ -63,8 +63,8 @@ __print_symbolic(type, \ #define TRACE_SYSTEM hfi1_rx TRACE_EVENT(hfi1_rcvhdr, - TP_PROTO(struct hfi1_packet *packet, u32 etype), - TP_ARGS(packet, etype), + TP_PROTO(struct hfi1_packet *packet), + TP_ARGS(packet), TP_STRUCT__entry(DD_DEV_ENTRY(packet->rcd->dd) __field(u64, eflags) __field(u32, ctxt) @@ -77,7 +77,7 @@ TRACE_EVENT(hfi1_rcvhdr, TP_fast_assign(DD_DEV_ASSIGN(packet->rcd->dd); __entry->eflags = rhf_err_flags(packet->rhf); __entry->ctxt = packet->rcd->ctxt; - __entry->etype = etype; + __entry->etype = packet->etype; __entry->hlen = packet->hlen; __entry->tlen = packet->tlen; __entry->updegr = packet->updegr; -- cgit