summaryrefslogtreecommitdiff
path: root/drivers/infiniband/hw/hfi1/verbs.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw/hfi1/verbs.c')
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.c462
1 files changed, 284 insertions, 178 deletions
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index ec582d86025f..3cbbfccdd8cd 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright(c) 2015 - 2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2015 - 2020 Intel Corporation.
*/
#include <rdma/ib_mad.h>
@@ -54,6 +12,7 @@
#include <linux/mm.h>
#include <linux/vmalloc.h>
#include <rdma/opa_addr.h>
+#include <linux/nospec.h>
#include "hfi.h"
#include "common.h"
@@ -65,6 +24,7 @@
#include "vnic.h"
#include "fault.h"
#include "affinity.h"
+#include "ipoib.h"
static unsigned int hfi1_lkey_table_size = 16;
module_param_named(lkey_table_size, hfi1_lkey_table_size, uint,
@@ -146,9 +106,6 @@ static int pio_wait(struct rvt_qp *qp,
/* Length of buffer to create verbs txreq cache name */
#define TXREQ_NAME_LEN 24
-/* 16B trailing buffer */
-static const u8 trail_buf[MAX_16B_PADDING];
-
static uint wss_threshold = 80;
module_param(wss_threshold, uint, S_IRUGO);
MODULE_PARM_DESC(wss_threshold, "Percentage (1-100) of LLC to use as a threshold for a cacheless copy");
@@ -161,10 +118,12 @@ MODULE_PARM_DESC(wss_clean_period, "Count of verbs copies before an entry in the
*/
const enum ib_wc_opcode ib_hfi1_wc_opcode[] = {
[IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
+ [IB_WR_TID_RDMA_WRITE] = IB_WC_RDMA_WRITE,
[IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
[IB_WR_SEND] = IB_WC_SEND,
[IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
[IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
+ [IB_WR_TID_RDMA_READ] = IB_WC_RDMA_READ,
[IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
[IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD,
[IB_WR_SEND_WITH_INV] = IB_WC_SEND,
@@ -200,6 +159,14 @@ const u8 hdr_len_by_opcode[256] = {
[IB_OPCODE_RC_FETCH_ADD] = 12 + 8 + 28,
[IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE] = 12 + 8 + 4,
[IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE] = 12 + 8 + 4,
+ [IB_OPCODE_TID_RDMA_READ_REQ] = 12 + 8 + 36,
+ [IB_OPCODE_TID_RDMA_READ_RESP] = 12 + 8 + 36,
+ [IB_OPCODE_TID_RDMA_WRITE_REQ] = 12 + 8 + 36,
+ [IB_OPCODE_TID_RDMA_WRITE_RESP] = 12 + 8 + 36,
+ [IB_OPCODE_TID_RDMA_WRITE_DATA] = 12 + 8 + 36,
+ [IB_OPCODE_TID_RDMA_WRITE_DATA_LAST] = 12 + 8 + 36,
+ [IB_OPCODE_TID_RDMA_ACK] = 12 + 8 + 36,
+ [IB_OPCODE_TID_RDMA_RESYNC] = 12 + 8 + 36,
/* UC */
[IB_OPCODE_UC_SEND_FIRST] = 12 + 8,
[IB_OPCODE_UC_SEND_MIDDLE] = 12 + 8,
@@ -243,6 +210,17 @@ static const opcode_handler opcode_handler_tbl[256] = {
[IB_OPCODE_RC_FETCH_ADD] = &hfi1_rc_rcv,
[IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE] = &hfi1_rc_rcv,
[IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE] = &hfi1_rc_rcv,
+
+ /* TID RDMA has separate handlers for different opcodes.*/
+ [IB_OPCODE_TID_RDMA_WRITE_REQ] = &hfi1_rc_rcv_tid_rdma_write_req,
+ [IB_OPCODE_TID_RDMA_WRITE_RESP] = &hfi1_rc_rcv_tid_rdma_write_resp,
+ [IB_OPCODE_TID_RDMA_WRITE_DATA] = &hfi1_rc_rcv_tid_rdma_write_data,
+ [IB_OPCODE_TID_RDMA_WRITE_DATA_LAST] = &hfi1_rc_rcv_tid_rdma_write_data,
+ [IB_OPCODE_TID_RDMA_READ_REQ] = &hfi1_rc_rcv_tid_rdma_read_req,
+ [IB_OPCODE_TID_RDMA_READ_RESP] = &hfi1_rc_rcv_tid_rdma_read_resp,
+ [IB_OPCODE_TID_RDMA_RESYNC] = &hfi1_rc_rcv_tid_rdma_resync,
+ [IB_OPCODE_TID_RDMA_ACK] = &hfi1_rc_rcv_tid_rdma_ack,
+
/* UC */
[IB_OPCODE_UC_SEND_FIRST] = &hfi1_uc_rcv,
[IB_OPCODE_UC_SEND_MIDDLE] = &hfi1_uc_rcv,
@@ -308,7 +286,7 @@ static inline opcode_handler qp_ok(struct hfi1_packet *packet)
static u64 hfi1_fault_tx(struct rvt_qp *qp, u8 opcode, u64 pbc)
{
#ifdef CONFIG_FAULT_INJECTION
- if ((opcode & IB_OPCODE_MSP) == IB_OPCODE_MSP)
+ if ((opcode & IB_OPCODE_MSP) == IB_OPCODE_MSP) {
/*
* In order to drop non-IB traffic we
* set PbcInsertHrc to NONE (0x2).
@@ -319,8 +297,9 @@ static u64 hfi1_fault_tx(struct rvt_qp *qp, u8 opcode, u64 pbc)
* packet will not be delivered to the
* correct context.
*/
+ pbc &= ~PBC_INSERT_HCRC_SMASK;
pbc |= (u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT;
- else
+ } else {
/*
* In order to drop regular verbs
* traffic we set the PbcTestEbp
@@ -330,10 +309,129 @@ static u64 hfi1_fault_tx(struct rvt_qp *qp, u8 opcode, u64 pbc)
* triggered and will be dropped.
*/
pbc |= PBC_TEST_EBP;
+ }
#endif
return pbc;
}
+static opcode_handler tid_qp_ok(int opcode, struct hfi1_packet *packet)
+{
+ if (packet->qp->ibqp.qp_type != IB_QPT_RC ||
+ !(ib_rvt_state_ops[packet->qp->state] & RVT_PROCESS_RECV_OK))
+ return NULL;
+ if ((opcode & RVT_OPCODE_QP_MASK) == IB_OPCODE_TID_RDMA)
+ return opcode_handler_tbl[opcode];
+ return NULL;
+}
+
+void hfi1_kdeth_eager_rcv(struct hfi1_packet *packet)
+{
+ struct hfi1_ctxtdata *rcd = packet->rcd;
+ struct ib_header *hdr = packet->hdr;
+ u32 tlen = packet->tlen;
+ struct hfi1_pportdata *ppd = rcd->ppd;
+ struct hfi1_ibport *ibp = &ppd->ibport_data;
+ struct rvt_dev_info *rdi = &ppd->dd->verbs_dev.rdi;
+ opcode_handler opcode_handler;
+ unsigned long flags;
+ u32 qp_num;
+ int lnh;
+ u8 opcode;
+
+ /* DW == LRH (2) + BTH (3) + KDETH (9) + CRC (1) */
+ if (unlikely(tlen < 15 * sizeof(u32)))
+ goto drop;
+
+ lnh = be16_to_cpu(hdr->lrh[0]) & 3;
+ if (lnh != HFI1_LRH_BTH)
+ goto drop;
+
+ packet->ohdr = &hdr->u.oth;
+ trace_input_ibhdr(rcd->dd, packet, !!(rhf_dc_info(packet->rhf)));
+
+ opcode = (be32_to_cpu(packet->ohdr->bth[0]) >> 24);
+ inc_opstats(tlen, &rcd->opstats->stats[opcode]);
+
+ /* verbs_qp can be picked up from any tid_rdma header struct */
+ qp_num = be32_to_cpu(packet->ohdr->u.tid_rdma.r_req.verbs_qp) &
+ RVT_QPN_MASK;
+
+ rcu_read_lock();
+ packet->qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num);
+ if (!packet->qp)
+ goto drop_rcu;
+ spin_lock_irqsave(&packet->qp->r_lock, flags);
+ opcode_handler = tid_qp_ok(opcode, packet);
+ if (likely(opcode_handler))
+ opcode_handler(packet);
+ else
+ goto drop_unlock;
+ spin_unlock_irqrestore(&packet->qp->r_lock, flags);
+ rcu_read_unlock();
+
+ return;
+drop_unlock:
+ spin_unlock_irqrestore(&packet->qp->r_lock, flags);
+drop_rcu:
+ rcu_read_unlock();
+drop:
+ ibp->rvp.n_pkt_drops++;
+}
+
+void hfi1_kdeth_expected_rcv(struct hfi1_packet *packet)
+{
+ struct hfi1_ctxtdata *rcd = packet->rcd;
+ struct ib_header *hdr = packet->hdr;
+ u32 tlen = packet->tlen;
+ struct hfi1_pportdata *ppd = rcd->ppd;
+ struct hfi1_ibport *ibp = &ppd->ibport_data;
+ struct rvt_dev_info *rdi = &ppd->dd->verbs_dev.rdi;
+ opcode_handler opcode_handler;
+ unsigned long flags;
+ u32 qp_num;
+ int lnh;
+ u8 opcode;
+
+ /* DW == LRH (2) + BTH (3) + KDETH (9) + CRC (1) */
+ if (unlikely(tlen < 15 * sizeof(u32)))
+ goto drop;
+
+ lnh = be16_to_cpu(hdr->lrh[0]) & 3;
+ if (lnh != HFI1_LRH_BTH)
+ goto drop;
+
+ packet->ohdr = &hdr->u.oth;
+ trace_input_ibhdr(rcd->dd, packet, !!(rhf_dc_info(packet->rhf)));
+
+ opcode = (be32_to_cpu(packet->ohdr->bth[0]) >> 24);
+ inc_opstats(tlen, &rcd->opstats->stats[opcode]);
+
+ /* verbs_qp can be picked up from any tid_rdma header struct */
+ qp_num = be32_to_cpu(packet->ohdr->u.tid_rdma.r_rsp.verbs_qp) &
+ RVT_QPN_MASK;
+
+ rcu_read_lock();
+ packet->qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num);
+ if (!packet->qp)
+ goto drop_rcu;
+ spin_lock_irqsave(&packet->qp->r_lock, flags);
+ opcode_handler = tid_qp_ok(opcode, packet);
+ if (likely(opcode_handler))
+ opcode_handler(packet);
+ else
+ goto drop_unlock;
+ spin_unlock_irqrestore(&packet->qp->r_lock, flags);
+ rcu_read_unlock();
+
+ return;
+drop_unlock:
+ spin_unlock_irqrestore(&packet->qp->r_lock, flags);
+drop_rcu:
+ rcu_read_unlock();
+drop:
+ ibp->rvp.n_pkt_drops++;
+}
+
static int hfi1_do_pkey_check(struct hfi1_packet *packet)
{
struct hfi1_ctxtdata *rcd = packet->rcd;
@@ -376,10 +474,11 @@ static inline void hfi1_handle_packet(struct hfi1_packet *packet,
opa_get_lid(packet->dlid, 9B));
if (!mcast)
goto drop;
+ rcu_read_lock();
list_for_each_entry_rcu(p, &mcast->qp_list, list) {
packet->qp = p->qp;
if (hfi1_do_pkey_check(packet))
- goto drop;
+ goto unlock_drop;
spin_lock_irqsave(&packet->qp->r_lock, flags);
packet_handler = qp_ok(packet);
if (likely(packet_handler))
@@ -388,6 +487,7 @@ static inline void hfi1_handle_packet(struct hfi1_packet *packet,
ibp->rvp.n_pkt_drops++;
spin_unlock_irqrestore(&packet->qp->r_lock, flags);
}
+ rcu_read_unlock();
/*
* Notify rvt_multicast_detach() if it is waiting for us
* to finish.
@@ -454,7 +554,7 @@ void hfi1_16B_rcv(struct hfi1_packet *packet)
*/
static void mem_timer(struct timer_list *t)
{
- struct hfi1_ibdev *dev = from_timer(dev, t, mem_timer);
+ struct hfi1_ibdev *dev = timer_container_of(dev, t, mem_timer);
struct list_head *list = &dev->memwait;
struct rvt_qp *qp = NULL;
struct iowait *wait;
@@ -497,6 +597,8 @@ static void verbs_sdma_complete(
struct hfi1_opa_header *hdr;
hdr = &tx->phdr.hdr;
+ if (unlikely(status == SDMA_TXREQ_S_ABORTED))
+ hfi1_rc_verbs_aborted(qp, hdr);
hfi1_rc_send_complete(qp, hdr);
}
spin_unlock(&qp->s_lock);
@@ -504,11 +606,28 @@ static void verbs_sdma_complete(
hfi1_put_txreq(tx);
}
+void hfi1_wait_kmem(struct rvt_qp *qp)
+{
+ struct hfi1_qp_priv *priv = qp->priv;
+ struct ib_qp *ibqp = &qp->ibqp;
+ struct ib_device *ibdev = ibqp->device;
+ struct hfi1_ibdev *dev = to_idev(ibdev);
+
+ if (list_empty(&priv->s_iowait.list)) {
+ if (list_empty(&dev->memwait))
+ mod_timer(&dev->mem_timer, jiffies + 1);
+ qp->s_flags |= RVT_S_WAIT_KMEM;
+ list_add_tail(&priv->s_iowait.list, &dev->memwait);
+ priv->s_iowait.lock = &dev->iowait_lock;
+ trace_hfi1_qpsleep(qp, RVT_S_WAIT_KMEM);
+ rvt_get_qp(qp);
+ }
+}
+
static int wait_kmem(struct hfi1_ibdev *dev,
struct rvt_qp *qp,
struct hfi1_pkt_state *ps)
{
- struct hfi1_qp_priv *priv = qp->priv;
unsigned long flags;
int ret = 0;
@@ -517,15 +636,7 @@ static int wait_kmem(struct hfi1_ibdev *dev,
write_seqlock(&dev->iowait_lock);
list_add_tail(&ps->s_txreq->txreq.list,
&ps->wait->tx_head);
- if (list_empty(&priv->s_iowait.list)) {
- if (list_empty(&dev->memwait))
- mod_timer(&dev->mem_timer, jiffies + 1);
- qp->s_flags |= RVT_S_WAIT_KMEM;
- list_add_tail(&priv->s_iowait.list, &dev->memwait);
- priv->s_iowait.lock = &dev->iowait_lock;
- trace_hfi1_qpsleep(qp, RVT_S_WAIT_KMEM);
- rvt_get_qp(qp);
- }
+ hfi1_wait_kmem(qp);
write_sequnlock(&dev->iowait_lock);
hfi1_qp_unbusy(qp, ps->wait);
ret = -EBUSY;
@@ -553,11 +664,7 @@ static noinline int build_verbs_ulp_payload(
int ret = 0;
while (length) {
- len = ss->sge.length;
- if (len > length)
- len = length;
- if (len > ss->sge.sge_length)
- len = ss->sge.sge_length;
+ len = rvt_get_sge_length(&ss->sge, length);
WARN_ON_ONCE(len == 0);
ret = sdma_txadd_kvaddr(
sde->dd,
@@ -580,7 +687,7 @@ bail_txadd:
/**
* update_tx_opstats - record stats by opcode
- * @qp; the qp
+ * @qp: the qp
* @ps: transmit packet state
* @plen: the plen in dwords
*
@@ -671,13 +778,22 @@ static int build_verbs_tx_desc(
/* add icrc, lt byte, and padding to flit */
if (extra_bytes)
- ret = sdma_txadd_kvaddr(sde->dd, &tx->txreq,
- (void *)trail_buf, extra_bytes);
+ ret = sdma_txadd_daddr(sde->dd, &tx->txreq, sde->dd->sdma_pad_phys,
+ extra_bytes);
bail_txadd:
return ret;
}
+static u64 update_hcrc(u8 opcode, u64 pbc)
+{
+ if ((opcode & IB_OPCODE_TID_RDMA) == IB_OPCODE_TID_RDMA) {
+ pbc &= ~PBC_INSERT_HCRC_SMASK;
+ pbc |= (u64)PBC_IHCRC_LKDETH << PBC_INSERT_HCRC_SHIFT;
+ }
+ return pbc;
+}
+
int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
u64 pbc)
{
@@ -716,13 +832,17 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
else
pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT);
- if (unlikely(hfi1_dbg_should_fault_tx(qp, ps->opcode)))
- pbc = hfi1_fault_tx(qp, ps->opcode, pbc);
pbc = create_pbc(ppd,
pbc,
qp->srate_mbps,
vl,
plen);
+
+ if (unlikely(hfi1_dbg_should_fault_tx(qp, ps->opcode)))
+ pbc = hfi1_fault_tx(qp, ps->opcode, pbc);
+ else
+ /* Update HCRC based on packet opcode */
+ pbc = update_hcrc(ps->opcode, pbc);
}
tx->wqe = qp->s_wqe;
ret = build_verbs_tx_desc(tx->sde, len, tx, ahg_info, pbc);
@@ -787,6 +907,7 @@ static int pio_wait(struct rvt_qp *qp,
dev->n_piodrain += !!(flag & HFI1_S_WAIT_PIO_DRAIN);
qp->s_flags |= flag;
was_empty = list_empty(&sc->piowait);
+ iowait_get_priority(&priv->s_iowait);
iowait_queue(ps->pkts_sent, &priv->s_iowait,
&sc->piowait);
priv->s_iowait.lock = &sc->waitlock;
@@ -868,17 +989,20 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
else
pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT);
+ pbc = create_pbc(ppd, pbc, qp->srate_mbps, vl, plen);
if (unlikely(hfi1_dbg_should_fault_tx(qp, ps->opcode)))
pbc = hfi1_fault_tx(qp, ps->opcode, pbc);
- pbc = create_pbc(ppd, pbc, qp->srate_mbps, vl, plen);
+ else
+ /* Update HCRC based on packet opcode */
+ pbc = update_hcrc(ps->opcode, pbc);
}
if (cb)
iowait_pio_inc(&priv->s_iowait);
pbuf = sc_buffer_alloc(sc, plen, cb, qp);
- if (unlikely(!pbuf)) {
+ if (IS_ERR_OR_NULL(pbuf)) {
if (cb)
verbs_pio_complete(qp, 0);
- if (ppd->host_link_state != HLS_UP_ACTIVE) {
+ if (IS_ERR(pbuf)) {
/*
* If we have filled the PIO buffers to capacity and are
* not in an active state this request is not going to
@@ -914,12 +1038,8 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
if (ss) {
while (len) {
void *addr = ss->sge.vaddr;
- u32 slen = ss->sge.length;
+ u32 slen = rvt_get_sge_length(&ss->sge, len);
- if (slen > len)
- slen = len;
- if (slen > ss->sge.sge_length)
- slen = ss->sge.sge_length;
rvt_update_sge(ss, slen, false);
seg_pio_copy_mid(pbuf, addr, slen);
len -= slen;
@@ -927,7 +1047,8 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
}
/* add icrc, lt byte, and padding to flit */
if (extra_bytes)
- seg_pio_copy_mid(pbuf, trail_buf, extra_bytes);
+ seg_pio_copy_mid(pbuf, ppd->dd->sdma_pad_dma,
+ extra_bytes);
seg_pio_copy_end(pbuf);
}
@@ -937,15 +1058,15 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
&ps->s_txreq->phdr.hdr, ib_is_sc5(sc5));
pio_bail:
+ spin_lock_irqsave(&qp->s_lock, flags);
if (qp->s_wqe) {
- spin_lock_irqsave(&qp->s_lock, flags);
rvt_send_complete(qp, qp->s_wqe, wc_status);
- spin_unlock_irqrestore(&qp->s_lock, flags);
} else if (qp->ibqp.qp_type == IB_QPT_RC) {
- spin_lock_irqsave(&qp->s_lock, flags);
+ if (unlikely(wc_status == IB_WC_GENERAL_ERR))
+ hfi1_rc_verbs_aborted(qp, &ps->s_txreq->phdr.hdr);
hfi1_rc_send_complete(qp, &ps->s_txreq->phdr.hdr);
- spin_unlock_irqrestore(&qp->s_lock, flags);
}
+ spin_unlock_irqrestore(&qp->s_lock, flags);
ret = 0;
@@ -982,7 +1103,7 @@ static inline int egress_pkey_matches_entry(u16 pkey, u16 ent)
* egress_pkey_check - check P_KEY of a packet
* @ppd: Physical IB port data
* @slid: SLID for packet
- * @bkey: PKEY for header
+ * @pkey: PKEY for header
* @sc5: SC for packet
* @s_pkey_index: It will be used for look up optimization for kernel contexts
* only. If it is negative value, then it means user contexts is calling this
@@ -1043,7 +1164,7 @@ bad:
return 1;
}
-/**
+/*
* get_send_routine - choose an egress routine
*
* Choose an egress routine based on QP type
@@ -1065,15 +1186,16 @@ static inline send_routine get_send_routine(struct rvt_qp *qp,
case IB_QPT_UD:
break;
case IB_QPT_UC:
- case IB_QPT_RC: {
+ case IB_QPT_RC:
+ priv->s_running_pkt_size =
+ (tx->s_cur_size + priv->s_running_pkt_size) / 2;
if (piothreshold &&
- tx->s_cur_size <= min(piothreshold, qp->pmtu) &&
+ priv->s_running_pkt_size <= min(piothreshold, qp->pmtu) &&
(BIT(ps->opcode & OPMASK) & pio_opmask[ps->opcode >> 5]) &&
iowait_sdma_pending(&priv->s_iowait) == 0 &&
!sdma_txreq_built(&tx->txreq))
return dd->process_pio_send;
break;
- }
default:
break;
}
@@ -1178,8 +1300,8 @@ static void hfi1_fill_device_attr(struct hfi1_devdata *dd)
IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE |
- IB_DEVICE_MEM_MGT_EXTENSIONS |
- IB_DEVICE_RDMA_NETDEV_OPA_VNIC;
+ IB_DEVICE_MEM_MGT_EXTENSIONS;
+ rdi->dparms.props.kernel_cap_flags = IBK_RDMA_NETDEV_OPA;
rdi->dparms.props.page_size_cap = PAGE_SIZE;
rdi->dparms.props.vendor_id = dd->oui1 << 16 | dd->oui2 << 8 | dd->oui3;
rdi->dparms.props.vendor_part_id = dd->pcidev->device;
@@ -1188,16 +1310,15 @@ static void hfi1_fill_device_attr(struct hfi1_devdata *dd)
rdi->dparms.props.max_mr_size = U64_MAX;
rdi->dparms.props.max_fast_reg_page_list_len = UINT_MAX;
rdi->dparms.props.max_qp = hfi1_max_qps;
- rdi->dparms.props.max_qp_wr = hfi1_max_qp_wrs;
+ rdi->dparms.props.max_qp_wr =
+ (hfi1_max_qp_wrs >= HFI1_QP_WQE_INVALID ?
+ HFI1_QP_WQE_INVALID - 1 : hfi1_max_qp_wrs);
rdi->dparms.props.max_send_sge = hfi1_max_sges;
rdi->dparms.props.max_recv_sge = hfi1_max_sges;
rdi->dparms.props.max_sge_rd = hfi1_max_sges;
rdi->dparms.props.max_cq = hfi1_max_cqs;
rdi->dparms.props.max_ah = hfi1_max_ahs;
rdi->dparms.props.max_cqe = hfi1_max_cqes;
- rdi->dparms.props.max_mr = rdi->lkey_table.max;
- rdi->dparms.props.max_fmr = rdi->lkey_table.max;
- rdi->dparms.props.max_map_per_fmr = 32767;
rdi->dparms.props.max_pd = hfi1_max_pds;
rdi->dparms.props.max_qp_rd_atom = HFI1_MAX_RDMA_ATOMIC;
rdi->dparms.props.max_qp_init_rd_atom = 255;
@@ -1244,7 +1365,7 @@ static inline u16 opa_width_to_ib(u16 in)
}
}
-static int query_port(struct rvt_dev_info *rdi, u8 port_num,
+static int query_port(struct rvt_dev_info *rdi, u32 port_num,
struct ib_port_attr *props)
{
struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi);
@@ -1261,7 +1382,7 @@ static int query_port(struct rvt_dev_info *rdi, u8 port_num,
props->gid_tbl_len = HFI1_GUIDS_PER_PORT;
props->active_width = (u8)opa_width_to_ib(ppd->link_width_active);
/* see rate_show() in ib core/sysfs.c */
- props->active_speed = (u8)opa_speed_to_ib(ppd->link_speed_active);
+ props->active_speed = opa_speed_to_ib(ppd->link_speed_active);
props->max_vl_num = ppd->vls_supported;
/* Once we are a "first class" citizen and have added the OPA MTUs to
@@ -1276,6 +1397,7 @@ static int query_port(struct rvt_dev_info *rdi, u8 port_num,
4096 : hfi1_max_mtu), IB_MTU_4096);
props->active_mtu = !valid_ib_mtu(ppd->ibmtu) ? props->max_mtu :
mtu_to_enum(ppd->ibmtu, IB_MTU_4096);
+ props->phys_mtu = hfi1_max_mtu;
return 0;
}
@@ -1320,17 +1442,15 @@ bail:
return ret;
}
-static int shut_down_port(struct rvt_dev_info *rdi, u8 port_num)
+static int shut_down_port(struct rvt_dev_info *rdi, u32 port_num)
{
struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi);
struct hfi1_devdata *dd = dd_from_dev(verbs_dev);
struct hfi1_pportdata *ppd = &dd->pport[port_num - 1];
- int ret;
set_link_down_reason(ppd, OPA_LINKDOWN_REASON_UNKNOWN, 0,
OPA_LINKDOWN_REASON_UNKNOWN);
- ret = set_link_state(ppd, HLS_DN_DOWNDEF);
- return ret;
+ return set_link_state(ppd, HLS_DN_DOWNDEF);
}
static int hfi1_get_guid_be(struct rvt_dev_info *rdi, struct rvt_ibport *rvp,
@@ -1375,6 +1495,7 @@ static int hfi1_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr)
sl = rdma_ah_get_sl(ah_attr);
if (sl >= ARRAY_SIZE(ibp->sl_to_sc))
return -EINVAL;
+ sl = array_index_nospec(sl, ARRAY_SIZE(ibp->sl_to_sc));
sc5 = ibp->sl_to_sc[sl];
if (sc_to_vlt(dd, sc5) > num_vls && sc_to_vlt(dd, sc5) != 0xf)
@@ -1477,26 +1598,23 @@ static const char * const driver_cntr_names[] = {
"DRIVER_EgrHdrFull"
};
-static DEFINE_MUTEX(cntr_names_lock); /* protects the *_cntr_names bufers */
-static const char **dev_cntr_names;
-static const char **port_cntr_names;
+static struct rdma_stat_desc *dev_cntr_descs;
+static struct rdma_stat_desc *port_cntr_descs;
int num_driver_cntrs = ARRAY_SIZE(driver_cntr_names);
static int num_dev_cntrs;
static int num_port_cntrs;
-static int cntr_names_initialized;
/*
* Convert a list of names separated by '\n' into an array of NULL terminated
* strings. Optionally some entries can be reserved in the array to hold extra
* external strings.
*/
-static int init_cntr_names(const char *names_in,
- const size_t names_len,
- int num_extra_names,
- int *num_cntrs,
- const char ***cntr_names)
+static int init_cntr_names(const char *names_in, const size_t names_len,
+ int num_extra_names, int *num_cntrs,
+ struct rdma_stat_desc **cntr_descs)
{
- char *names_out, *p, **q;
+ struct rdma_stat_desc *names_out;
+ char *p;
int i, n;
n = 0;
@@ -1504,95 +1622,83 @@ static int init_cntr_names(const char *names_in,
if (names_in[i] == '\n')
n++;
- names_out = kmalloc((n + num_extra_names) * sizeof(char *) + names_len,
+ names_out = kzalloc((n + num_extra_names) * sizeof(*names_out)
+ + names_len,
GFP_KERNEL);
if (!names_out) {
*num_cntrs = 0;
- *cntr_names = NULL;
+ *cntr_descs = NULL;
return -ENOMEM;
}
- p = names_out + (n + num_extra_names) * sizeof(char *);
+ p = (char *)&names_out[n + num_extra_names];
memcpy(p, names_in, names_len);
- q = (char **)names_out;
for (i = 0; i < n; i++) {
- q[i] = p;
+ names_out[i].name = p;
p = strchr(p, '\n');
*p++ = '\0';
}
*num_cntrs = n;
- *cntr_names = (const char **)names_out;
+ *cntr_descs = names_out;
return 0;
}
-static struct rdma_hw_stats *alloc_hw_stats(struct ib_device *ibdev,
- u8 port_num)
+static struct rdma_hw_stats *hfi1_alloc_hw_device_stats(struct ib_device *ibdev)
{
- int i, err;
-
- mutex_lock(&cntr_names_lock);
- if (!cntr_names_initialized) {
+ if (!dev_cntr_descs) {
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
+ int i, err;
- err = init_cntr_names(dd->cntrnames,
- dd->cntrnameslen,
+ err = init_cntr_names(dd->cntrnames, dd->cntrnameslen,
num_driver_cntrs,
- &num_dev_cntrs,
- &dev_cntr_names);
- if (err) {
- mutex_unlock(&cntr_names_lock);
+ &num_dev_cntrs, &dev_cntr_descs);
+ if (err)
return NULL;
- }
for (i = 0; i < num_driver_cntrs; i++)
- dev_cntr_names[num_dev_cntrs + i] =
- driver_cntr_names[i];
+ dev_cntr_descs[num_dev_cntrs + i].name =
+ driver_cntr_names[i];
+ }
+ return rdma_alloc_hw_stats_struct(dev_cntr_descs,
+ num_dev_cntrs + num_driver_cntrs,
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+}
+
+static struct rdma_hw_stats *hfi_alloc_hw_port_stats(struct ib_device *ibdev,
+ u32 port_num)
+{
+ if (!port_cntr_descs) {
+ struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
+ int err;
- err = init_cntr_names(dd->portcntrnames,
- dd->portcntrnameslen,
+ err = init_cntr_names(dd->portcntrnames, dd->portcntrnameslen,
0,
- &num_port_cntrs,
- &port_cntr_names);
- if (err) {
- kfree(dev_cntr_names);
- dev_cntr_names = NULL;
- mutex_unlock(&cntr_names_lock);
+ &num_port_cntrs, &port_cntr_descs);
+ if (err)
return NULL;
- }
- cntr_names_initialized = 1;
}
- mutex_unlock(&cntr_names_lock);
-
- if (!port_num)
- return rdma_alloc_hw_stats_struct(
- dev_cntr_names,
- num_dev_cntrs + num_driver_cntrs,
- RDMA_HW_STATS_DEFAULT_LIFESPAN);
- else
- return rdma_alloc_hw_stats_struct(
- port_cntr_names,
- num_port_cntrs,
- RDMA_HW_STATS_DEFAULT_LIFESPAN);
+ return rdma_alloc_hw_stats_struct(port_cntr_descs, num_port_cntrs,
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
}
static u64 hfi1_sps_ints(void)
{
- unsigned long flags;
+ unsigned long index, flags;
struct hfi1_devdata *dd;
u64 sps_ints = 0;
- spin_lock_irqsave(&hfi1_devs_lock, flags);
- list_for_each_entry(dd, &hfi1_dev_list, list) {
+ xa_lock_irqsave(&hfi1_dev_table, flags);
+ xa_for_each(&hfi1_dev_table, index, dd) {
sps_ints += get_all_cpu_total(dd->int_counter);
}
- spin_unlock_irqrestore(&hfi1_devs_lock, flags);
+ xa_unlock_irqrestore(&hfi1_dev_table, flags);
return sps_ints;
}
static int get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
- u8 port, int index)
+ u32 port, int index)
{
u64 *values;
int count;
@@ -1618,13 +1724,20 @@ static int get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
}
static const struct ib_device_ops hfi1_dev_ops = {
- .alloc_hw_stats = alloc_hw_stats,
+ .owner = THIS_MODULE,
+ .driver_id = RDMA_DRIVER_HFI1,
+
+ .alloc_hw_device_stats = hfi1_alloc_hw_device_stats,
+ .alloc_hw_port_stats = hfi_alloc_hw_port_stats,
.alloc_rdma_netdev = hfi1_vnic_alloc_rn,
+ .device_group = &ib_hfi1_attr_group,
.get_dev_fw_str = hfi1_get_dev_fw_str,
.get_hw_stats = get_hw_stats,
.modify_device = modify_device,
+ .port_groups = hfi1_attr_port_groups,
/* keep process mad in the driver */
.process_mad = hfi1_process_mad,
+ .rdma_netdev_get_params = hfi1_ipoib_rn_get_params,
};
/**
@@ -1667,19 +1780,17 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
*/
if (!ib_hfi1_sys_image_guid)
ib_hfi1_sys_image_guid = ibdev->node_guid;
- ibdev->owner = THIS_MODULE;
ibdev->phys_port_cnt = dd->num_pports;
ibdev->dev.parent = &dd->pcidev->dev;
ib_set_device_ops(ibdev, &hfi1_dev_ops);
- strlcpy(ibdev->node_desc, init_utsname()->nodename,
+ strscpy(ibdev->node_desc, init_utsname()->nodename,
sizeof(ibdev->node_desc));
/*
* Fill in rvt info object.
*/
- dd->verbs_dev.rdi.driver_f.port_callback = hfi1_create_port_files;
dd->verbs_dev.rdi.driver_f.get_pci_dev = get_pci_dev;
dd->verbs_dev.rdi.driver_f.check_ah = hfi1_check_ah;
dd->verbs_dev.rdi.driver_f.notify_new_ah = hfi1_notify_new_ah;
@@ -1697,9 +1808,8 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
dd->verbs_dev.rdi.dparms.qpn_start = 0;
dd->verbs_dev.rdi.dparms.qpn_inc = 1;
dd->verbs_dev.rdi.dparms.qos_shift = dd->qos_shift;
- dd->verbs_dev.rdi.dparms.qpn_res_start = kdeth_qp << 16;
- dd->verbs_dev.rdi.dparms.qpn_res_end =
- dd->verbs_dev.rdi.dparms.qpn_res_start + 65535;
+ dd->verbs_dev.rdi.dparms.qpn_res_start = RVT_KDETH_QP_BASE;
+ dd->verbs_dev.rdi.dparms.qpn_res_end = RVT_AIP_QP_MAX;
dd->verbs_dev.rdi.dparms.max_rdma_atomic = HFI1_MAX_RDMA_ATOMIC;
dd->verbs_dev.rdi.dparms.psn_mask = PSN_MASK;
dd->verbs_dev.rdi.dparms.psn_shift = PSN_SHIFT;
@@ -1743,6 +1853,8 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
dd->verbs_dev.rdi.dparms.sge_copy_mode = sge_copy_mode;
dd->verbs_dev.rdi.dparms.wss_threshold = wss_threshold;
dd->verbs_dev.rdi.dparms.wss_clean_period = wss_clean_period;
+ dd->verbs_dev.rdi.dparms.reserved_operations = 1;
+ dd->verbs_dev.rdi.dparms.extra_rdma_atomic = HFI1_TID_RDMA_WRITE_CNT;
/* post send table */
dd->verbs_dev.rdi.post_parms = hfi1_post_parms;
@@ -1757,10 +1869,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
i,
ppd->pkeys);
- rdma_set_device_sysfs_group(&dd->verbs_dev.rdi.ibdev,
- &ib_hfi1_attr_group);
-
- ret = rvt_register_device(&dd->verbs_dev.rdi, RDMA_DRIVER_HFI1);
+ ret = rvt_register_device(&dd->verbs_dev.rdi);
if (ret)
goto err_verbs_txreq;
@@ -1791,16 +1900,13 @@ void hfi1_unregister_ib_device(struct hfi1_devdata *dd)
if (!list_empty(&dev->memwait))
dd_dev_err(dd, "memwait list not empty!\n");
- del_timer_sync(&dev->mem_timer);
+ timer_delete_sync(&dev->mem_timer);
verbs_txreq_exit(dev);
- mutex_lock(&cntr_names_lock);
- kfree(dev_cntr_names);
- kfree(port_cntr_names);
- dev_cntr_names = NULL;
- port_cntr_names = NULL;
- cntr_names_initialized = 0;
- mutex_unlock(&cntr_names_lock);
+ kfree(dev_cntr_descs);
+ kfree(port_cntr_descs);
+ dev_cntr_descs = NULL;
+ port_cntr_descs = NULL;
}
void hfi1_cnp_rcv(struct hfi1_packet *packet)