diff options
Diffstat (limited to 'drivers/infiniband/hw/hfi1/verbs.c')
| -rw-r--r-- | drivers/infiniband/hw/hfi1/verbs.c | 462 |
1 files changed, 284 insertions, 178 deletions
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index ec582d86025f..3cbbfccdd8cd 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1,48 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* - * Copyright(c) 2015 - 2018 Intel Corporation. - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * BSD LICENSE - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * + * Copyright(c) 2015 - 2020 Intel Corporation. */ #include <rdma/ib_mad.h> @@ -54,6 +12,7 @@ #include <linux/mm.h> #include <linux/vmalloc.h> #include <rdma/opa_addr.h> +#include <linux/nospec.h> #include "hfi.h" #include "common.h" @@ -65,6 +24,7 @@ #include "vnic.h" #include "fault.h" #include "affinity.h" +#include "ipoib.h" static unsigned int hfi1_lkey_table_size = 16; module_param_named(lkey_table_size, hfi1_lkey_table_size, uint, @@ -146,9 +106,6 @@ static int pio_wait(struct rvt_qp *qp, /* Length of buffer to create verbs txreq cache name */ #define TXREQ_NAME_LEN 24 -/* 16B trailing buffer */ -static const u8 trail_buf[MAX_16B_PADDING]; - static uint wss_threshold = 80; module_param(wss_threshold, uint, S_IRUGO); MODULE_PARM_DESC(wss_threshold, "Percentage (1-100) of LLC to use as a threshold for a cacheless copy"); @@ -161,10 +118,12 @@ MODULE_PARM_DESC(wss_clean_period, "Count of verbs copies before an entry in the */ const enum ib_wc_opcode ib_hfi1_wc_opcode[] = { [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE, + [IB_WR_TID_RDMA_WRITE] = IB_WC_RDMA_WRITE, [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE, [IB_WR_SEND] = IB_WC_SEND, [IB_WR_SEND_WITH_IMM] = IB_WC_SEND, [IB_WR_RDMA_READ] = IB_WC_RDMA_READ, + [IB_WR_TID_RDMA_READ] = IB_WC_RDMA_READ, [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP, [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD, [IB_WR_SEND_WITH_INV] = IB_WC_SEND, @@ -200,6 +159,14 @@ const u8 hdr_len_by_opcode[256] = { [IB_OPCODE_RC_FETCH_ADD] = 12 + 8 + 28, [IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE] = 12 + 8 + 4, [IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE] = 12 + 8 + 4, + [IB_OPCODE_TID_RDMA_READ_REQ] = 12 + 8 + 36, + [IB_OPCODE_TID_RDMA_READ_RESP] = 12 + 8 + 36, + [IB_OPCODE_TID_RDMA_WRITE_REQ] = 12 + 8 + 36, + [IB_OPCODE_TID_RDMA_WRITE_RESP] = 12 + 8 + 36, + [IB_OPCODE_TID_RDMA_WRITE_DATA] = 12 + 8 + 36, + [IB_OPCODE_TID_RDMA_WRITE_DATA_LAST] = 12 + 8 + 36, + [IB_OPCODE_TID_RDMA_ACK] = 12 + 8 + 36, + [IB_OPCODE_TID_RDMA_RESYNC] = 12 + 8 + 36, /* UC */ [IB_OPCODE_UC_SEND_FIRST] = 12 + 8, [IB_OPCODE_UC_SEND_MIDDLE] = 12 + 8, @@ -243,6 +210,17 @@ static const opcode_handler opcode_handler_tbl[256] = { [IB_OPCODE_RC_FETCH_ADD] = &hfi1_rc_rcv, [IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE] = &hfi1_rc_rcv, [IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE] = &hfi1_rc_rcv, + + /* TID RDMA has separate handlers for different opcodes.*/ + [IB_OPCODE_TID_RDMA_WRITE_REQ] = &hfi1_rc_rcv_tid_rdma_write_req, + [IB_OPCODE_TID_RDMA_WRITE_RESP] = &hfi1_rc_rcv_tid_rdma_write_resp, + [IB_OPCODE_TID_RDMA_WRITE_DATA] = &hfi1_rc_rcv_tid_rdma_write_data, + [IB_OPCODE_TID_RDMA_WRITE_DATA_LAST] = &hfi1_rc_rcv_tid_rdma_write_data, + [IB_OPCODE_TID_RDMA_READ_REQ] = &hfi1_rc_rcv_tid_rdma_read_req, + [IB_OPCODE_TID_RDMA_READ_RESP] = &hfi1_rc_rcv_tid_rdma_read_resp, + [IB_OPCODE_TID_RDMA_RESYNC] = &hfi1_rc_rcv_tid_rdma_resync, + [IB_OPCODE_TID_RDMA_ACK] = &hfi1_rc_rcv_tid_rdma_ack, + /* UC */ [IB_OPCODE_UC_SEND_FIRST] = &hfi1_uc_rcv, [IB_OPCODE_UC_SEND_MIDDLE] = &hfi1_uc_rcv, @@ -308,7 +286,7 @@ static inline opcode_handler qp_ok(struct hfi1_packet *packet) static u64 hfi1_fault_tx(struct rvt_qp *qp, u8 opcode, u64 pbc) { #ifdef CONFIG_FAULT_INJECTION - if ((opcode & IB_OPCODE_MSP) == IB_OPCODE_MSP) + if ((opcode & IB_OPCODE_MSP) == IB_OPCODE_MSP) { /* * In order to drop non-IB traffic we * set PbcInsertHrc to NONE (0x2). @@ -319,8 +297,9 @@ static u64 hfi1_fault_tx(struct rvt_qp *qp, u8 opcode, u64 pbc) * packet will not be delivered to the * correct context. */ + pbc &= ~PBC_INSERT_HCRC_SMASK; pbc |= (u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT; - else + } else { /* * In order to drop regular verbs * traffic we set the PbcTestEbp @@ -330,10 +309,129 @@ static u64 hfi1_fault_tx(struct rvt_qp *qp, u8 opcode, u64 pbc) * triggered and will be dropped. */ pbc |= PBC_TEST_EBP; + } #endif return pbc; } +static opcode_handler tid_qp_ok(int opcode, struct hfi1_packet *packet) +{ + if (packet->qp->ibqp.qp_type != IB_QPT_RC || + !(ib_rvt_state_ops[packet->qp->state] & RVT_PROCESS_RECV_OK)) + return NULL; + if ((opcode & RVT_OPCODE_QP_MASK) == IB_OPCODE_TID_RDMA) + return opcode_handler_tbl[opcode]; + return NULL; +} + +void hfi1_kdeth_eager_rcv(struct hfi1_packet *packet) +{ + struct hfi1_ctxtdata *rcd = packet->rcd; + struct ib_header *hdr = packet->hdr; + u32 tlen = packet->tlen; + struct hfi1_pportdata *ppd = rcd->ppd; + struct hfi1_ibport *ibp = &ppd->ibport_data; + struct rvt_dev_info *rdi = &ppd->dd->verbs_dev.rdi; + opcode_handler opcode_handler; + unsigned long flags; + u32 qp_num; + int lnh; + u8 opcode; + + /* DW == LRH (2) + BTH (3) + KDETH (9) + CRC (1) */ + if (unlikely(tlen < 15 * sizeof(u32))) + goto drop; + + lnh = be16_to_cpu(hdr->lrh[0]) & 3; + if (lnh != HFI1_LRH_BTH) + goto drop; + + packet->ohdr = &hdr->u.oth; + trace_input_ibhdr(rcd->dd, packet, !!(rhf_dc_info(packet->rhf))); + + opcode = (be32_to_cpu(packet->ohdr->bth[0]) >> 24); + inc_opstats(tlen, &rcd->opstats->stats[opcode]); + + /* verbs_qp can be picked up from any tid_rdma header struct */ + qp_num = be32_to_cpu(packet->ohdr->u.tid_rdma.r_req.verbs_qp) & + RVT_QPN_MASK; + + rcu_read_lock(); + packet->qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num); + if (!packet->qp) + goto drop_rcu; + spin_lock_irqsave(&packet->qp->r_lock, flags); + opcode_handler = tid_qp_ok(opcode, packet); + if (likely(opcode_handler)) + opcode_handler(packet); + else + goto drop_unlock; + spin_unlock_irqrestore(&packet->qp->r_lock, flags); + rcu_read_unlock(); + + return; +drop_unlock: + spin_unlock_irqrestore(&packet->qp->r_lock, flags); +drop_rcu: + rcu_read_unlock(); +drop: + ibp->rvp.n_pkt_drops++; +} + +void hfi1_kdeth_expected_rcv(struct hfi1_packet *packet) +{ + struct hfi1_ctxtdata *rcd = packet->rcd; + struct ib_header *hdr = packet->hdr; + u32 tlen = packet->tlen; + struct hfi1_pportdata *ppd = rcd->ppd; + struct hfi1_ibport *ibp = &ppd->ibport_data; + struct rvt_dev_info *rdi = &ppd->dd->verbs_dev.rdi; + opcode_handler opcode_handler; + unsigned long flags; + u32 qp_num; + int lnh; + u8 opcode; + + /* DW == LRH (2) + BTH (3) + KDETH (9) + CRC (1) */ + if (unlikely(tlen < 15 * sizeof(u32))) + goto drop; + + lnh = be16_to_cpu(hdr->lrh[0]) & 3; + if (lnh != HFI1_LRH_BTH) + goto drop; + + packet->ohdr = &hdr->u.oth; + trace_input_ibhdr(rcd->dd, packet, !!(rhf_dc_info(packet->rhf))); + + opcode = (be32_to_cpu(packet->ohdr->bth[0]) >> 24); + inc_opstats(tlen, &rcd->opstats->stats[opcode]); + + /* verbs_qp can be picked up from any tid_rdma header struct */ + qp_num = be32_to_cpu(packet->ohdr->u.tid_rdma.r_rsp.verbs_qp) & + RVT_QPN_MASK; + + rcu_read_lock(); + packet->qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num); + if (!packet->qp) + goto drop_rcu; + spin_lock_irqsave(&packet->qp->r_lock, flags); + opcode_handler = tid_qp_ok(opcode, packet); + if (likely(opcode_handler)) + opcode_handler(packet); + else + goto drop_unlock; + spin_unlock_irqrestore(&packet->qp->r_lock, flags); + rcu_read_unlock(); + + return; +drop_unlock: + spin_unlock_irqrestore(&packet->qp->r_lock, flags); +drop_rcu: + rcu_read_unlock(); +drop: + ibp->rvp.n_pkt_drops++; +} + static int hfi1_do_pkey_check(struct hfi1_packet *packet) { struct hfi1_ctxtdata *rcd = packet->rcd; @@ -376,10 +474,11 @@ static inline void hfi1_handle_packet(struct hfi1_packet *packet, opa_get_lid(packet->dlid, 9B)); if (!mcast) goto drop; + rcu_read_lock(); list_for_each_entry_rcu(p, &mcast->qp_list, list) { packet->qp = p->qp; if (hfi1_do_pkey_check(packet)) - goto drop; + goto unlock_drop; spin_lock_irqsave(&packet->qp->r_lock, flags); packet_handler = qp_ok(packet); if (likely(packet_handler)) @@ -388,6 +487,7 @@ static inline void hfi1_handle_packet(struct hfi1_packet *packet, ibp->rvp.n_pkt_drops++; spin_unlock_irqrestore(&packet->qp->r_lock, flags); } + rcu_read_unlock(); /* * Notify rvt_multicast_detach() if it is waiting for us * to finish. @@ -454,7 +554,7 @@ void hfi1_16B_rcv(struct hfi1_packet *packet) */ static void mem_timer(struct timer_list *t) { - struct hfi1_ibdev *dev = from_timer(dev, t, mem_timer); + struct hfi1_ibdev *dev = timer_container_of(dev, t, mem_timer); struct list_head *list = &dev->memwait; struct rvt_qp *qp = NULL; struct iowait *wait; @@ -497,6 +597,8 @@ static void verbs_sdma_complete( struct hfi1_opa_header *hdr; hdr = &tx->phdr.hdr; + if (unlikely(status == SDMA_TXREQ_S_ABORTED)) + hfi1_rc_verbs_aborted(qp, hdr); hfi1_rc_send_complete(qp, hdr); } spin_unlock(&qp->s_lock); @@ -504,11 +606,28 @@ static void verbs_sdma_complete( hfi1_put_txreq(tx); } +void hfi1_wait_kmem(struct rvt_qp *qp) +{ + struct hfi1_qp_priv *priv = qp->priv; + struct ib_qp *ibqp = &qp->ibqp; + struct ib_device *ibdev = ibqp->device; + struct hfi1_ibdev *dev = to_idev(ibdev); + + if (list_empty(&priv->s_iowait.list)) { + if (list_empty(&dev->memwait)) + mod_timer(&dev->mem_timer, jiffies + 1); + qp->s_flags |= RVT_S_WAIT_KMEM; + list_add_tail(&priv->s_iowait.list, &dev->memwait); + priv->s_iowait.lock = &dev->iowait_lock; + trace_hfi1_qpsleep(qp, RVT_S_WAIT_KMEM); + rvt_get_qp(qp); + } +} + static int wait_kmem(struct hfi1_ibdev *dev, struct rvt_qp *qp, struct hfi1_pkt_state *ps) { - struct hfi1_qp_priv *priv = qp->priv; unsigned long flags; int ret = 0; @@ -517,15 +636,7 @@ static int wait_kmem(struct hfi1_ibdev *dev, write_seqlock(&dev->iowait_lock); list_add_tail(&ps->s_txreq->txreq.list, &ps->wait->tx_head); - if (list_empty(&priv->s_iowait.list)) { - if (list_empty(&dev->memwait)) - mod_timer(&dev->mem_timer, jiffies + 1); - qp->s_flags |= RVT_S_WAIT_KMEM; - list_add_tail(&priv->s_iowait.list, &dev->memwait); - priv->s_iowait.lock = &dev->iowait_lock; - trace_hfi1_qpsleep(qp, RVT_S_WAIT_KMEM); - rvt_get_qp(qp); - } + hfi1_wait_kmem(qp); write_sequnlock(&dev->iowait_lock); hfi1_qp_unbusy(qp, ps->wait); ret = -EBUSY; @@ -553,11 +664,7 @@ static noinline int build_verbs_ulp_payload( int ret = 0; while (length) { - len = ss->sge.length; - if (len > length) - len = length; - if (len > ss->sge.sge_length) - len = ss->sge.sge_length; + len = rvt_get_sge_length(&ss->sge, length); WARN_ON_ONCE(len == 0); ret = sdma_txadd_kvaddr( sde->dd, @@ -580,7 +687,7 @@ bail_txadd: /** * update_tx_opstats - record stats by opcode - * @qp; the qp + * @qp: the qp * @ps: transmit packet state * @plen: the plen in dwords * @@ -671,13 +778,22 @@ static int build_verbs_tx_desc( /* add icrc, lt byte, and padding to flit */ if (extra_bytes) - ret = sdma_txadd_kvaddr(sde->dd, &tx->txreq, - (void *)trail_buf, extra_bytes); + ret = sdma_txadd_daddr(sde->dd, &tx->txreq, sde->dd->sdma_pad_phys, + extra_bytes); bail_txadd: return ret; } +static u64 update_hcrc(u8 opcode, u64 pbc) +{ + if ((opcode & IB_OPCODE_TID_RDMA) == IB_OPCODE_TID_RDMA) { + pbc &= ~PBC_INSERT_HCRC_SMASK; + pbc |= (u64)PBC_IHCRC_LKDETH << PBC_INSERT_HCRC_SHIFT; + } + return pbc; +} + int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, u64 pbc) { @@ -716,13 +832,17 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, else pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT); - if (unlikely(hfi1_dbg_should_fault_tx(qp, ps->opcode))) - pbc = hfi1_fault_tx(qp, ps->opcode, pbc); pbc = create_pbc(ppd, pbc, qp->srate_mbps, vl, plen); + + if (unlikely(hfi1_dbg_should_fault_tx(qp, ps->opcode))) + pbc = hfi1_fault_tx(qp, ps->opcode, pbc); + else + /* Update HCRC based on packet opcode */ + pbc = update_hcrc(ps->opcode, pbc); } tx->wqe = qp->s_wqe; ret = build_verbs_tx_desc(tx->sde, len, tx, ahg_info, pbc); @@ -787,6 +907,7 @@ static int pio_wait(struct rvt_qp *qp, dev->n_piodrain += !!(flag & HFI1_S_WAIT_PIO_DRAIN); qp->s_flags |= flag; was_empty = list_empty(&sc->piowait); + iowait_get_priority(&priv->s_iowait); iowait_queue(ps->pkts_sent, &priv->s_iowait, &sc->piowait); priv->s_iowait.lock = &sc->waitlock; @@ -868,17 +989,20 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, else pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT); + pbc = create_pbc(ppd, pbc, qp->srate_mbps, vl, plen); if (unlikely(hfi1_dbg_should_fault_tx(qp, ps->opcode))) pbc = hfi1_fault_tx(qp, ps->opcode, pbc); - pbc = create_pbc(ppd, pbc, qp->srate_mbps, vl, plen); + else + /* Update HCRC based on packet opcode */ + pbc = update_hcrc(ps->opcode, pbc); } if (cb) iowait_pio_inc(&priv->s_iowait); pbuf = sc_buffer_alloc(sc, plen, cb, qp); - if (unlikely(!pbuf)) { + if (IS_ERR_OR_NULL(pbuf)) { if (cb) verbs_pio_complete(qp, 0); - if (ppd->host_link_state != HLS_UP_ACTIVE) { + if (IS_ERR(pbuf)) { /* * If we have filled the PIO buffers to capacity and are * not in an active state this request is not going to @@ -914,12 +1038,8 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, if (ss) { while (len) { void *addr = ss->sge.vaddr; - u32 slen = ss->sge.length; + u32 slen = rvt_get_sge_length(&ss->sge, len); - if (slen > len) - slen = len; - if (slen > ss->sge.sge_length) - slen = ss->sge.sge_length; rvt_update_sge(ss, slen, false); seg_pio_copy_mid(pbuf, addr, slen); len -= slen; @@ -927,7 +1047,8 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, } /* add icrc, lt byte, and padding to flit */ if (extra_bytes) - seg_pio_copy_mid(pbuf, trail_buf, extra_bytes); + seg_pio_copy_mid(pbuf, ppd->dd->sdma_pad_dma, + extra_bytes); seg_pio_copy_end(pbuf); } @@ -937,15 +1058,15 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, &ps->s_txreq->phdr.hdr, ib_is_sc5(sc5)); pio_bail: + spin_lock_irqsave(&qp->s_lock, flags); if (qp->s_wqe) { - spin_lock_irqsave(&qp->s_lock, flags); rvt_send_complete(qp, qp->s_wqe, wc_status); - spin_unlock_irqrestore(&qp->s_lock, flags); } else if (qp->ibqp.qp_type == IB_QPT_RC) { - spin_lock_irqsave(&qp->s_lock, flags); + if (unlikely(wc_status == IB_WC_GENERAL_ERR)) + hfi1_rc_verbs_aborted(qp, &ps->s_txreq->phdr.hdr); hfi1_rc_send_complete(qp, &ps->s_txreq->phdr.hdr); - spin_unlock_irqrestore(&qp->s_lock, flags); } + spin_unlock_irqrestore(&qp->s_lock, flags); ret = 0; @@ -982,7 +1103,7 @@ static inline int egress_pkey_matches_entry(u16 pkey, u16 ent) * egress_pkey_check - check P_KEY of a packet * @ppd: Physical IB port data * @slid: SLID for packet - * @bkey: PKEY for header + * @pkey: PKEY for header * @sc5: SC for packet * @s_pkey_index: It will be used for look up optimization for kernel contexts * only. If it is negative value, then it means user contexts is calling this @@ -1043,7 +1164,7 @@ bad: return 1; } -/** +/* * get_send_routine - choose an egress routine * * Choose an egress routine based on QP type @@ -1065,15 +1186,16 @@ static inline send_routine get_send_routine(struct rvt_qp *qp, case IB_QPT_UD: break; case IB_QPT_UC: - case IB_QPT_RC: { + case IB_QPT_RC: + priv->s_running_pkt_size = + (tx->s_cur_size + priv->s_running_pkt_size) / 2; if (piothreshold && - tx->s_cur_size <= min(piothreshold, qp->pmtu) && + priv->s_running_pkt_size <= min(piothreshold, qp->pmtu) && (BIT(ps->opcode & OPMASK) & pio_opmask[ps->opcode >> 5]) && iowait_sdma_pending(&priv->s_iowait) == 0 && !sdma_txreq_built(&tx->txreq)) return dd->process_pio_send; break; - } default: break; } @@ -1178,8 +1300,8 @@ static void hfi1_fill_device_attr(struct hfi1_devdata *dd) IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT | IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN | IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE | - IB_DEVICE_MEM_MGT_EXTENSIONS | - IB_DEVICE_RDMA_NETDEV_OPA_VNIC; + IB_DEVICE_MEM_MGT_EXTENSIONS; + rdi->dparms.props.kernel_cap_flags = IBK_RDMA_NETDEV_OPA; rdi->dparms.props.page_size_cap = PAGE_SIZE; rdi->dparms.props.vendor_id = dd->oui1 << 16 | dd->oui2 << 8 | dd->oui3; rdi->dparms.props.vendor_part_id = dd->pcidev->device; @@ -1188,16 +1310,15 @@ static void hfi1_fill_device_attr(struct hfi1_devdata *dd) rdi->dparms.props.max_mr_size = U64_MAX; rdi->dparms.props.max_fast_reg_page_list_len = UINT_MAX; rdi->dparms.props.max_qp = hfi1_max_qps; - rdi->dparms.props.max_qp_wr = hfi1_max_qp_wrs; + rdi->dparms.props.max_qp_wr = + (hfi1_max_qp_wrs >= HFI1_QP_WQE_INVALID ? + HFI1_QP_WQE_INVALID - 1 : hfi1_max_qp_wrs); rdi->dparms.props.max_send_sge = hfi1_max_sges; rdi->dparms.props.max_recv_sge = hfi1_max_sges; rdi->dparms.props.max_sge_rd = hfi1_max_sges; rdi->dparms.props.max_cq = hfi1_max_cqs; rdi->dparms.props.max_ah = hfi1_max_ahs; rdi->dparms.props.max_cqe = hfi1_max_cqes; - rdi->dparms.props.max_mr = rdi->lkey_table.max; - rdi->dparms.props.max_fmr = rdi->lkey_table.max; - rdi->dparms.props.max_map_per_fmr = 32767; rdi->dparms.props.max_pd = hfi1_max_pds; rdi->dparms.props.max_qp_rd_atom = HFI1_MAX_RDMA_ATOMIC; rdi->dparms.props.max_qp_init_rd_atom = 255; @@ -1244,7 +1365,7 @@ static inline u16 opa_width_to_ib(u16 in) } } -static int query_port(struct rvt_dev_info *rdi, u8 port_num, +static int query_port(struct rvt_dev_info *rdi, u32 port_num, struct ib_port_attr *props) { struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi); @@ -1261,7 +1382,7 @@ static int query_port(struct rvt_dev_info *rdi, u8 port_num, props->gid_tbl_len = HFI1_GUIDS_PER_PORT; props->active_width = (u8)opa_width_to_ib(ppd->link_width_active); /* see rate_show() in ib core/sysfs.c */ - props->active_speed = (u8)opa_speed_to_ib(ppd->link_speed_active); + props->active_speed = opa_speed_to_ib(ppd->link_speed_active); props->max_vl_num = ppd->vls_supported; /* Once we are a "first class" citizen and have added the OPA MTUs to @@ -1276,6 +1397,7 @@ static int query_port(struct rvt_dev_info *rdi, u8 port_num, 4096 : hfi1_max_mtu), IB_MTU_4096); props->active_mtu = !valid_ib_mtu(ppd->ibmtu) ? props->max_mtu : mtu_to_enum(ppd->ibmtu, IB_MTU_4096); + props->phys_mtu = hfi1_max_mtu; return 0; } @@ -1320,17 +1442,15 @@ bail: return ret; } -static int shut_down_port(struct rvt_dev_info *rdi, u8 port_num) +static int shut_down_port(struct rvt_dev_info *rdi, u32 port_num) { struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi); struct hfi1_devdata *dd = dd_from_dev(verbs_dev); struct hfi1_pportdata *ppd = &dd->pport[port_num - 1]; - int ret; set_link_down_reason(ppd, OPA_LINKDOWN_REASON_UNKNOWN, 0, OPA_LINKDOWN_REASON_UNKNOWN); - ret = set_link_state(ppd, HLS_DN_DOWNDEF); - return ret; + return set_link_state(ppd, HLS_DN_DOWNDEF); } static int hfi1_get_guid_be(struct rvt_dev_info *rdi, struct rvt_ibport *rvp, @@ -1375,6 +1495,7 @@ static int hfi1_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr) sl = rdma_ah_get_sl(ah_attr); if (sl >= ARRAY_SIZE(ibp->sl_to_sc)) return -EINVAL; + sl = array_index_nospec(sl, ARRAY_SIZE(ibp->sl_to_sc)); sc5 = ibp->sl_to_sc[sl]; if (sc_to_vlt(dd, sc5) > num_vls && sc_to_vlt(dd, sc5) != 0xf) @@ -1477,26 +1598,23 @@ static const char * const driver_cntr_names[] = { "DRIVER_EgrHdrFull" }; -static DEFINE_MUTEX(cntr_names_lock); /* protects the *_cntr_names bufers */ -static const char **dev_cntr_names; -static const char **port_cntr_names; +static struct rdma_stat_desc *dev_cntr_descs; +static struct rdma_stat_desc *port_cntr_descs; int num_driver_cntrs = ARRAY_SIZE(driver_cntr_names); static int num_dev_cntrs; static int num_port_cntrs; -static int cntr_names_initialized; /* * Convert a list of names separated by '\n' into an array of NULL terminated * strings. Optionally some entries can be reserved in the array to hold extra * external strings. */ -static int init_cntr_names(const char *names_in, - const size_t names_len, - int num_extra_names, - int *num_cntrs, - const char ***cntr_names) +static int init_cntr_names(const char *names_in, const size_t names_len, + int num_extra_names, int *num_cntrs, + struct rdma_stat_desc **cntr_descs) { - char *names_out, *p, **q; + struct rdma_stat_desc *names_out; + char *p; int i, n; n = 0; @@ -1504,95 +1622,83 @@ static int init_cntr_names(const char *names_in, if (names_in[i] == '\n') n++; - names_out = kmalloc((n + num_extra_names) * sizeof(char *) + names_len, + names_out = kzalloc((n + num_extra_names) * sizeof(*names_out) + + names_len, GFP_KERNEL); if (!names_out) { *num_cntrs = 0; - *cntr_names = NULL; + *cntr_descs = NULL; return -ENOMEM; } - p = names_out + (n + num_extra_names) * sizeof(char *); + p = (char *)&names_out[n + num_extra_names]; memcpy(p, names_in, names_len); - q = (char **)names_out; for (i = 0; i < n; i++) { - q[i] = p; + names_out[i].name = p; p = strchr(p, '\n'); *p++ = '\0'; } *num_cntrs = n; - *cntr_names = (const char **)names_out; + *cntr_descs = names_out; return 0; } -static struct rdma_hw_stats *alloc_hw_stats(struct ib_device *ibdev, - u8 port_num) +static struct rdma_hw_stats *hfi1_alloc_hw_device_stats(struct ib_device *ibdev) { - int i, err; - - mutex_lock(&cntr_names_lock); - if (!cntr_names_initialized) { + if (!dev_cntr_descs) { struct hfi1_devdata *dd = dd_from_ibdev(ibdev); + int i, err; - err = init_cntr_names(dd->cntrnames, - dd->cntrnameslen, + err = init_cntr_names(dd->cntrnames, dd->cntrnameslen, num_driver_cntrs, - &num_dev_cntrs, - &dev_cntr_names); - if (err) { - mutex_unlock(&cntr_names_lock); + &num_dev_cntrs, &dev_cntr_descs); + if (err) return NULL; - } for (i = 0; i < num_driver_cntrs; i++) - dev_cntr_names[num_dev_cntrs + i] = - driver_cntr_names[i]; + dev_cntr_descs[num_dev_cntrs + i].name = + driver_cntr_names[i]; + } + return rdma_alloc_hw_stats_struct(dev_cntr_descs, + num_dev_cntrs + num_driver_cntrs, + RDMA_HW_STATS_DEFAULT_LIFESPAN); +} + +static struct rdma_hw_stats *hfi_alloc_hw_port_stats(struct ib_device *ibdev, + u32 port_num) +{ + if (!port_cntr_descs) { + struct hfi1_devdata *dd = dd_from_ibdev(ibdev); + int err; - err = init_cntr_names(dd->portcntrnames, - dd->portcntrnameslen, + err = init_cntr_names(dd->portcntrnames, dd->portcntrnameslen, 0, - &num_port_cntrs, - &port_cntr_names); - if (err) { - kfree(dev_cntr_names); - dev_cntr_names = NULL; - mutex_unlock(&cntr_names_lock); + &num_port_cntrs, &port_cntr_descs); + if (err) return NULL; - } - cntr_names_initialized = 1; } - mutex_unlock(&cntr_names_lock); - - if (!port_num) - return rdma_alloc_hw_stats_struct( - dev_cntr_names, - num_dev_cntrs + num_driver_cntrs, - RDMA_HW_STATS_DEFAULT_LIFESPAN); - else - return rdma_alloc_hw_stats_struct( - port_cntr_names, - num_port_cntrs, - RDMA_HW_STATS_DEFAULT_LIFESPAN); + return rdma_alloc_hw_stats_struct(port_cntr_descs, num_port_cntrs, + RDMA_HW_STATS_DEFAULT_LIFESPAN); } static u64 hfi1_sps_ints(void) { - unsigned long flags; + unsigned long index, flags; struct hfi1_devdata *dd; u64 sps_ints = 0; - spin_lock_irqsave(&hfi1_devs_lock, flags); - list_for_each_entry(dd, &hfi1_dev_list, list) { + xa_lock_irqsave(&hfi1_dev_table, flags); + xa_for_each(&hfi1_dev_table, index, dd) { sps_ints += get_all_cpu_total(dd->int_counter); } - spin_unlock_irqrestore(&hfi1_devs_lock, flags); + xa_unlock_irqrestore(&hfi1_dev_table, flags); return sps_ints; } static int get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, - u8 port, int index) + u32 port, int index) { u64 *values; int count; @@ -1618,13 +1724,20 @@ static int get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, } static const struct ib_device_ops hfi1_dev_ops = { - .alloc_hw_stats = alloc_hw_stats, + .owner = THIS_MODULE, + .driver_id = RDMA_DRIVER_HFI1, + + .alloc_hw_device_stats = hfi1_alloc_hw_device_stats, + .alloc_hw_port_stats = hfi_alloc_hw_port_stats, .alloc_rdma_netdev = hfi1_vnic_alloc_rn, + .device_group = &ib_hfi1_attr_group, .get_dev_fw_str = hfi1_get_dev_fw_str, .get_hw_stats = get_hw_stats, .modify_device = modify_device, + .port_groups = hfi1_attr_port_groups, /* keep process mad in the driver */ .process_mad = hfi1_process_mad, + .rdma_netdev_get_params = hfi1_ipoib_rn_get_params, }; /** @@ -1667,19 +1780,17 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) */ if (!ib_hfi1_sys_image_guid) ib_hfi1_sys_image_guid = ibdev->node_guid; - ibdev->owner = THIS_MODULE; ibdev->phys_port_cnt = dd->num_pports; ibdev->dev.parent = &dd->pcidev->dev; ib_set_device_ops(ibdev, &hfi1_dev_ops); - strlcpy(ibdev->node_desc, init_utsname()->nodename, + strscpy(ibdev->node_desc, init_utsname()->nodename, sizeof(ibdev->node_desc)); /* * Fill in rvt info object. */ - dd->verbs_dev.rdi.driver_f.port_callback = hfi1_create_port_files; dd->verbs_dev.rdi.driver_f.get_pci_dev = get_pci_dev; dd->verbs_dev.rdi.driver_f.check_ah = hfi1_check_ah; dd->verbs_dev.rdi.driver_f.notify_new_ah = hfi1_notify_new_ah; @@ -1697,9 +1808,8 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) dd->verbs_dev.rdi.dparms.qpn_start = 0; dd->verbs_dev.rdi.dparms.qpn_inc = 1; dd->verbs_dev.rdi.dparms.qos_shift = dd->qos_shift; - dd->verbs_dev.rdi.dparms.qpn_res_start = kdeth_qp << 16; - dd->verbs_dev.rdi.dparms.qpn_res_end = - dd->verbs_dev.rdi.dparms.qpn_res_start + 65535; + dd->verbs_dev.rdi.dparms.qpn_res_start = RVT_KDETH_QP_BASE; + dd->verbs_dev.rdi.dparms.qpn_res_end = RVT_AIP_QP_MAX; dd->verbs_dev.rdi.dparms.max_rdma_atomic = HFI1_MAX_RDMA_ATOMIC; dd->verbs_dev.rdi.dparms.psn_mask = PSN_MASK; dd->verbs_dev.rdi.dparms.psn_shift = PSN_SHIFT; @@ -1743,6 +1853,8 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) dd->verbs_dev.rdi.dparms.sge_copy_mode = sge_copy_mode; dd->verbs_dev.rdi.dparms.wss_threshold = wss_threshold; dd->verbs_dev.rdi.dparms.wss_clean_period = wss_clean_period; + dd->verbs_dev.rdi.dparms.reserved_operations = 1; + dd->verbs_dev.rdi.dparms.extra_rdma_atomic = HFI1_TID_RDMA_WRITE_CNT; /* post send table */ dd->verbs_dev.rdi.post_parms = hfi1_post_parms; @@ -1757,10 +1869,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) i, ppd->pkeys); - rdma_set_device_sysfs_group(&dd->verbs_dev.rdi.ibdev, - &ib_hfi1_attr_group); - - ret = rvt_register_device(&dd->verbs_dev.rdi, RDMA_DRIVER_HFI1); + ret = rvt_register_device(&dd->verbs_dev.rdi); if (ret) goto err_verbs_txreq; @@ -1791,16 +1900,13 @@ void hfi1_unregister_ib_device(struct hfi1_devdata *dd) if (!list_empty(&dev->memwait)) dd_dev_err(dd, "memwait list not empty!\n"); - del_timer_sync(&dev->mem_timer); + timer_delete_sync(&dev->mem_timer); verbs_txreq_exit(dev); - mutex_lock(&cntr_names_lock); - kfree(dev_cntr_names); - kfree(port_cntr_names); - dev_cntr_names = NULL; - port_cntr_names = NULL; - cntr_names_initialized = 0; - mutex_unlock(&cntr_names_lock); + kfree(dev_cntr_descs); + kfree(port_cntr_descs); + dev_cntr_descs = NULL; + port_cntr_descs = NULL; } void hfi1_cnp_rcv(struct hfi1_packet *packet) |
