summaryrefslogtreecommitdiff
path: root/drivers/infiniband/hw/hfi1/init.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw/hfi1/init.c')
-rw-r--r--drivers/infiniband/hw/hfi1/init.c411
1 files changed, 140 insertions, 271 deletions
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index 7835eb52e7c5..e4aef102dac0 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -1,59 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright(c) 2015 - 2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2015 - 2020 Intel Corporation.
+ * Copyright(c) 2021 Cornelis Networks.
*/
#include <linux/pci.h>
#include <linux/netdevice.h>
#include <linux/vmalloc.h>
#include <linux/delay.h>
-#include <linux/idr.h>
+#include <linux/xarray.h>
#include <linux/module.h>
#include <linux/printk.h>
#include <linux/hrtimer.h>
#include <linux/bitmap.h>
+#include <linux/numa.h>
#include <rdma/rdma_vt.h>
#include "hfi.h"
@@ -68,18 +28,16 @@
#include "affinity.h"
#include "vnic.h"
#include "exp_rcv.h"
+#include "netdev.h"
#undef pr_fmt
#define pr_fmt(fmt) DRIVER_NAME ": " fmt
-#define HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES 5
/*
* min buffers we want to have per context, after driver
*/
#define HFI1_MIN_USER_CTXT_BUFCNT 7
-#define HFI1_MIN_HDRQ_EGRBUF_CNT 2
-#define HFI1_MAX_HDRQ_EGRBUF_CNT 16352
#define HFI1_MIN_EAGER_BUFFER_SIZE (4 * 1024) /* 4KB */
#define HFI1_MAX_EAGER_BUFFER_SIZE (256 * 1024) /* 256KB */
@@ -122,9 +80,7 @@ unsigned int user_credit_return_threshold = 33; /* default is 33% */
module_param(user_credit_return_threshold, uint, S_IRUGO);
MODULE_PARM_DESC(user_credit_return_threshold, "Credit return threshold for user send contexts, return when unreturned credits passes this many blocks (in percent of allocated blocks, 0 is off)");
-static inline u64 encode_rcv_header_entry_size(u16 size);
-
-static struct idr hfi1_unit_table;
+DEFINE_XARRAY_FLAGS(hfi1_dev_table, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
static int hfi1_create_kctxt(struct hfi1_devdata *dd,
struct hfi1_pportdata *ppd)
@@ -154,7 +110,11 @@ static int hfi1_create_kctxt(struct hfi1_devdata *dd,
/* Control context must use DMA_RTAIL */
if (rcd->ctxt == HFI1_CTRL_CTXT)
rcd->flags |= HFI1_CAP_DMA_RTAIL;
- rcd->seq_cnt = 1;
+ rcd->fast_handler = get_dma_rtail_setting(rcd) ?
+ handle_receive_interrupt_dma_rtail :
+ handle_receive_interrupt_nodma_rtail;
+
+ hfi1_set_seq_cnt(rcd, 1);
rcd->sc = sc_alloc(dd, SC_ACK, rcd->rcvhdrqentsize, dd->node);
if (!rcd->sc) {
@@ -215,12 +175,12 @@ static void hfi1_rcd_free(struct kref *kref)
struct hfi1_ctxtdata *rcd =
container_of(kref, struct hfi1_ctxtdata, kref);
- hfi1_free_ctxtdata(rcd->dd, rcd);
-
spin_lock_irqsave(&rcd->dd->uctxt_lock, flags);
rcd->dd->rcd[rcd->ctxt] = NULL;
spin_unlock_irqrestore(&rcd->dd->uctxt_lock, flags);
+ hfi1_free_ctxtdata(rcd->dd, rcd);
+
kfree(rcd);
}
@@ -243,10 +203,13 @@ int hfi1_rcd_put(struct hfi1_ctxtdata *rcd)
* @rcd: pointer to an initialized rcd data structure
*
* Use this to get a reference after the init.
+ *
+ * Return : reflect kref_get_unless_zero(), which returns non-zero on
+ * increment, otherwise 0.
*/
-void hfi1_rcd_get(struct hfi1_ctxtdata *rcd)
+int hfi1_rcd_get(struct hfi1_ctxtdata *rcd)
{
- kref_get(&rcd->kref);
+ return kref_get_unless_zero(&rcd->kref);
}
/**
@@ -307,7 +270,7 @@ struct hfi1_ctxtdata *hfi1_rcd_get_by_index_safe(struct hfi1_devdata *dd,
}
/**
- * hfi1_rcd_get_by_index
+ * hfi1_rcd_get_by_index - get by index
* @dd: pointer to a valid devdata structure
* @ctxt: the index of an possilbe rcd
*
@@ -326,7 +289,8 @@ struct hfi1_ctxtdata *hfi1_rcd_get_by_index(struct hfi1_devdata *dd, u16 ctxt)
spin_lock_irqsave(&dd->uctxt_lock, flags);
if (dd->rcd[ctxt]) {
rcd = dd->rcd[ctxt];
- hfi1_rcd_get(rcd);
+ if (!hfi1_rcd_get(rcd))
+ rcd = NULL;
}
spin_unlock_irqrestore(&dd->uctxt_lock, flags);
@@ -369,10 +333,16 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa,
rcd->numa_id = numa;
rcd->rcv_array_groups = dd->rcv_entries.ngroups;
rcd->rhf_rcv_function_map = normal_rhf_rcv_functions;
+ rcd->slow_handler = handle_receive_interrupt;
+ rcd->do_interrupt = rcd->slow_handler;
+ rcd->msix_intr = CCE_NUM_MSIX_VECTORS;
mutex_init(&rcd->exp_mutex);
+ spin_lock_init(&rcd->exp_lock);
+ INIT_LIST_HEAD(&rcd->flow_queue.queue_head);
+ INIT_LIST_HEAD(&rcd->rarr_queue.queue_head);
- hfi1_cdbg(PROC, "setting up context %u\n", rcd->ctxt);
+ hfi1_cdbg(PROC, "setting up context %u", rcd->ctxt);
/*
* Calculate the context's RcvArray entry starting point.
@@ -430,7 +400,7 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa,
rcd->egrbufs.count = MAX_EAGER_ENTRIES;
}
hfi1_cdbg(PROC,
- "ctxt%u: max Eager buffer RcvArray entries: %u\n",
+ "ctxt%u: max Eager buffer RcvArray entries: %u",
rcd->ctxt, rcd->egrbufs.count);
/*
@@ -462,7 +432,7 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa,
if (rcd->egrbufs.size < hfi1_max_mtu) {
rcd->egrbufs.size = __roundup_pow_of_two(hfi1_max_mtu);
hfi1_cdbg(PROC,
- "ctxt%u: eager bufs size too small. Adjusting to %zu\n",
+ "ctxt%u: eager bufs size too small. Adjusting to %u",
rcd->ctxt, rcd->egrbufs.size);
}
rcd->egrbufs.rcvtid_size = HFI1_MAX_EAGER_BUFFER_SIZE;
@@ -473,6 +443,9 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa,
GFP_KERNEL, numa);
if (!rcd->opstats)
goto bail;
+
+ /* Initialize TID flow generations for the context */
+ hfi1_kern_init_ctxt_generations(rcd);
}
*context = rcd;
@@ -486,12 +459,12 @@ bail:
}
/**
- * hfi1_free_ctxt
+ * hfi1_free_ctxt - free context
* @rcd: pointer to an initialized rcd data structure
*
* This wrapper is the free function that matches hfi1_create_ctxtdata().
* When a context is done being used (kernel or user), this function is called
- * for the "final" put to match the kref init from hf1i_create_ctxtdata().
+ * for the "final" put to match the kref init from hfi1_create_ctxtdata().
* Other users of the context do a get/put sequence to make sure that the
* structure isn't removed while in use.
*/
@@ -501,23 +474,6 @@ void hfi1_free_ctxt(struct hfi1_ctxtdata *rcd)
}
/*
- * Convert a receive header entry size that to the encoding used in the CSR.
- *
- * Return a zero if the given size is invalid.
- */
-static inline u64 encode_rcv_header_entry_size(u16 size)
-{
- /* there are only 3 valid receive header entry sizes */
- if (size == 2)
- return 1;
- if (size == 16)
- return 2;
- else if (size == 32)
- return 4;
- return 0; /* invalid */
-}
-
-/*
* Select the largest ccti value over all SLs to determine the intra-
* packet gap for the link.
*
@@ -533,7 +489,7 @@ void set_link_ipg(struct hfi1_pportdata *ppd)
u16 shift, mult;
u64 src;
u32 current_egress_rate; /* Mbits /sec */
- u32 max_pkt_time;
+ u64 max_pkt_time;
/*
* max_pkt_time is the maximum packet egress time in units
* of the fabric clock period 1/(805 MHz).
@@ -631,7 +587,7 @@ static enum hrtimer_restart cca_timer_fn(struct hrtimer *t)
* Common code for initializing the physical port structure.
*/
void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
- struct hfi1_devdata *dd, u8 hw_pidx, u8 port)
+ struct hfi1_devdata *dd, u8 hw_pidx, u32 port)
{
int i;
uint default_pkey_idx;
@@ -654,12 +610,7 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
ppd->pkeys[default_pkey_idx] = DEFAULT_P_KEY;
ppd->part_enforce |= HFI1_PART_ENFORCE_IN;
-
- if (loopback) {
- dd_dev_err(dd, "Faking data partition 0x8001 in idx %u\n",
- !default_pkey_idx);
- ppd->pkeys[!default_pkey_idx] = 0x8001;
- }
+ ppd->pkeys[0] = 0x8001;
INIT_WORK(&ppd->link_vc_work, handle_verify_cap);
INIT_WORK(&ppd->link_up_work, handle_link_up);
@@ -684,12 +635,11 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
spin_lock_init(&ppd->cca_timer_lock);
for (i = 0; i < OPA_MAX_SLS; i++) {
- hrtimer_init(&ppd->cca_timer[i].hrtimer, CLOCK_MONOTONIC,
- HRTIMER_MODE_REL);
ppd->cca_timer[i].ppd = ppd;
ppd->cca_timer[i].sl = i;
ppd->cca_timer[i].ccti = 0;
- ppd->cca_timer[i].hrtimer.function = cca_timer_fn;
+ hrtimer_setup(&ppd->cca_timer[i].hrtimer, cca_timer_fn, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL);
}
ppd->cc_max_table_entries = IB_CC_TABLE_CAP_DEFAULT;
@@ -772,6 +722,8 @@ static void enable_chip(struct hfi1_devdata *dd)
rcvmask |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB;
if (HFI1_CAP_KGET_MASK(rcd->flags, NODROP_EGR_FULL))
rcvmask |= HFI1_RCVCTRL_NO_EGR_DROP_ENB;
+ if (HFI1_CAP_IS_KSET(TID_RDMA))
+ rcvmask |= HFI1_RCVCTRL_TIDFLOW_ENB;
hfi1_rcvctrl(dd, rcvmask, rcd);
sc_enable(rcd->sc);
hfi1_rcd_put(rcd);
@@ -793,7 +745,8 @@ static int create_workqueues(struct hfi1_devdata *dd)
ppd->hfi1_wq =
alloc_workqueue(
"hfi%d_%d",
- WQ_SYSFS | WQ_HIGHPRI | WQ_CPU_INTENSIVE,
+ WQ_SYSFS | WQ_HIGHPRI | WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM |
+ WQ_PERCPU,
HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES,
dd->unit, pidx);
if (!ppd->hfi1_wq)
@@ -832,6 +785,29 @@ wq_error:
}
/**
+ * destroy_workqueues - destroy per port workqueues
+ * @dd: the hfi1_ib device
+ */
+static void destroy_workqueues(struct hfi1_devdata *dd)
+{
+ int pidx;
+ struct hfi1_pportdata *ppd;
+
+ for (pidx = 0; pidx < dd->num_pports; ++pidx) {
+ ppd = dd->pport + pidx;
+
+ if (ppd->hfi1_wq) {
+ destroy_workqueue(ppd->hfi1_wq);
+ ppd->hfi1_wq = NULL;
+ }
+ if (ppd->link_wq) {
+ destroy_workqueue(ppd->link_wq);
+ ppd->link_wq = NULL;
+ }
+ }
+}
+
+/**
* enable_general_intr() - Enable the IRQs that will be handled by the
* general interrupt handler.
* @dd: valid devdata
@@ -879,10 +855,10 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
if (is_ax(dd)) {
atomic_set(&dd->drop_packet, DROP_PACKET_ON);
- dd->do_drop = 1;
+ dd->do_drop = true;
} else {
atomic_set(&dd->drop_packet, DROP_PACKET_OFF);
- dd->do_drop = 0;
+ dd->do_drop = false;
}
/* make sure the link is not "up" */
@@ -898,18 +874,6 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
if (ret)
goto done;
- /* allocate dummy tail memory for all receive contexts */
- dd->rcvhdrtail_dummy_kvaddr = dma_alloc_coherent(&dd->pcidev->dev,
- sizeof(u64),
- &dd->rcvhdrtail_dummy_dma,
- GFP_KERNEL);
-
- if (!dd->rcvhdrtail_dummy_kvaddr) {
- dd_dev_err(dd, "cannot allocate dummy tail memory\n");
- ret = -ENOMEM;
- goto done;
- }
-
/* dd->rcd can be NULL if early initialization failed */
for (i = 0; dd->rcd && i < dd->first_dyn_alloc_ctxt; ++i) {
/*
@@ -922,11 +886,11 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
if (!rcd)
continue;
- rcd->do_interrupt = &handle_receive_interrupt;
-
lastfail = hfi1_create_rcvhdrq(dd, rcd);
if (!lastfail)
lastfail = hfi1_setup_eagerbufs(rcd);
+ if (!lastfail)
+ lastfail = hfi1_kern_exp_rcv_init(rcd, reinit);
if (lastfail) {
dd_dev_err(dd,
"failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n");
@@ -1004,21 +968,9 @@ done:
return ret;
}
-static inline struct hfi1_devdata *__hfi1_lookup(int unit)
-{
- return idr_find(&hfi1_unit_table, unit);
-}
-
struct hfi1_devdata *hfi1_lookup(int unit)
{
- struct hfi1_devdata *dd;
- unsigned long flags;
-
- spin_lock_irqsave(&hfi1_devs_lock, flags);
- dd = __hfi1_lookup(unit);
- spin_unlock_irqrestore(&hfi1_devs_lock, flags);
-
- return dd;
+ return xa_load(&hfi1_dev_table, unit);
}
/*
@@ -1033,7 +985,7 @@ static void stop_timers(struct hfi1_devdata *dd)
for (pidx = 0; pidx < dd->num_pports; ++pidx) {
ppd = dd->pport + pidx;
if (ppd->led_override_timer.function) {
- del_timer_sync(&ppd->led_override_timer);
+ timer_delete_sync(&ppd->led_override_timer);
atomic_set(&ppd->led_override_timer_active, 0);
}
}
@@ -1074,7 +1026,6 @@ static void shutdown_device(struct hfi1_devdata *dd)
msix_clean_up_interrupts(dd);
for (pidx = 0; pidx < dd->num_pports; ++pidx) {
- ppd = dd->pport + pidx;
for (i = 0; i < dd->num_rcv_contexts; i++) {
rcd = hfi1_rcd_get_by_index(dd, i);
hfi1_rcvctrl(dd, HFI1_RCVCTRL_TAILUPD_DIS |
@@ -1114,15 +1065,10 @@ static void shutdown_device(struct hfi1_devdata *dd)
* We can't count on interrupts since we are stopping.
*/
hfi1_quiet_serdes(ppd);
-
- if (ppd->hfi1_wq) {
- destroy_workqueue(ppd->hfi1_wq);
- ppd->hfi1_wq = NULL;
- }
- if (ppd->link_wq) {
- destroy_workqueue(ppd->link_wq);
- ppd->link_wq = NULL;
- }
+ if (ppd->hfi1_wq)
+ flush_workqueue(ppd->hfi1_wq);
+ if (ppd->link_wq)
+ flush_workqueue(ppd->link_wq);
}
sdma_exit(dd);
}
@@ -1146,9 +1092,9 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
dma_free_coherent(&dd->pcidev->dev, rcvhdrq_size(rcd),
rcd->rcvhdrq, rcd->rcvhdrq_dma);
rcd->rcvhdrq = NULL;
- if (rcd->rcvhdrtail_kvaddr) {
+ if (hfi1_rcvhdrtail_kvaddr(rcd)) {
dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
- (void *)rcd->rcvhdrtail_kvaddr,
+ (void *)hfi1_rcvhdrtail_kvaddr(rcd),
rcd->rcvhdrqtailaddr_dma);
rcd->rcvhdrtail_kvaddr = NULL;
}
@@ -1159,7 +1105,7 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
rcd->egrbufs.rcvtids = NULL;
for (e = 0; e < rcd->egrbufs.alloced; e++) {
- if (rcd->egrbufs.buffers[e].dma)
+ if (rcd->egrbufs.buffers[e].addr)
dma_free_coherent(&dd->pcidev->dev,
rcd->egrbufs.buffers[e].len,
rcd->egrbufs.buffers[e].addr,
@@ -1186,7 +1132,7 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
/*
* Release our hold on the shared asic data. If we are the last one,
* return the structure to be finalized outside the lock. Must be
- * holding hfi1_devs_lock.
+ * holding hfi1_dev_table lock.
*/
static struct hfi1_asic_data *release_asic_data(struct hfi1_devdata *dd)
{
@@ -1211,24 +1157,21 @@ static void finalize_asic_data(struct hfi1_devdata *dd,
}
/**
- * hfi1_clean_devdata - cleans up per-unit data structure
+ * hfi1_free_devdata - cleans up and frees per-unit data structure
* @dd: pointer to a valid devdata structure
*
- * It cleans up all data structures set up by
+ * It cleans up and frees all data structures set up by
* by hfi1_alloc_devdata().
*/
-static void hfi1_clean_devdata(struct hfi1_devdata *dd)
+void hfi1_free_devdata(struct hfi1_devdata *dd)
{
struct hfi1_asic_data *ad;
unsigned long flags;
- spin_lock_irqsave(&hfi1_devs_lock, flags);
- if (!list_empty(&dd->list)) {
- idr_remove(&hfi1_unit_table, dd->unit);
- list_del_init(&dd->list);
- }
+ xa_lock_irqsave(&hfi1_dev_table, flags);
+ __xa_erase(&hfi1_dev_table, dd->unit);
ad = release_asic_data(dd);
- spin_unlock_irqrestore(&hfi1_devs_lock, flags);
+ xa_unlock_irqrestore(&hfi1_dev_table, flags);
finalize_asic_data(dd, ad);
free_platform_config(dd);
@@ -1243,27 +1186,15 @@ static void hfi1_clean_devdata(struct hfi1_devdata *dd)
dd->tx_opstats = NULL;
kfree(dd->comp_vect);
dd->comp_vect = NULL;
+ if (dd->rcvhdrtail_dummy_kvaddr)
+ dma_free_coherent(&dd->pcidev->dev, sizeof(u64),
+ (void *)dd->rcvhdrtail_dummy_kvaddr,
+ dd->rcvhdrtail_dummy_dma);
+ dd->rcvhdrtail_dummy_kvaddr = NULL;
sdma_clean(dd, dd->num_sdma);
rvt_dealloc_device(&dd->verbs_dev.rdi);
}
-static void __hfi1_free_devdata(struct kobject *kobj)
-{
- struct hfi1_devdata *dd =
- container_of(kobj, struct hfi1_devdata, kobj);
-
- hfi1_clean_devdata(dd);
-}
-
-static struct kobj_type hfi1_devdata_type = {
- .release = __hfi1_free_devdata,
-};
-
-void hfi1_free_devdata(struct hfi1_devdata *dd)
-{
- kobject_put(&dd->kobj);
-}
-
/**
* hfi1_alloc_devdata - Allocate our primary per-unit data structure.
* @pdev: Valid PCI device
@@ -1272,13 +1203,10 @@ void hfi1_free_devdata(struct hfi1_devdata *dd)
* Must be done via verbs allocator, because the verbs cleanup process
* both does cleanup and free of the data structure.
* "extra" is for chip-specific data.
- *
- * Use the idr mechanism to get a unit number for this unit.
*/
static struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev,
size_t extra)
{
- unsigned long flags;
struct hfi1_devdata *dd;
int ret, nports;
@@ -1294,26 +1222,23 @@ static struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev,
dd->pcidev = pdev;
pci_set_drvdata(pdev, dd);
- INIT_LIST_HEAD(&dd->list);
- idr_preload(GFP_KERNEL);
- spin_lock_irqsave(&hfi1_devs_lock, flags);
-
- ret = idr_alloc(&hfi1_unit_table, dd, 0, 0, GFP_NOWAIT);
- if (ret >= 0) {
- dd->unit = ret;
- list_add(&dd->list, &hfi1_dev_list);
- }
- dd->node = -1;
-
- spin_unlock_irqrestore(&hfi1_devs_lock, flags);
- idr_preload_end();
-
+ ret = xa_alloc_irq(&hfi1_dev_table, &dd->unit, dd, xa_limit_32b,
+ GFP_KERNEL);
if (ret < 0) {
dev_err(&pdev->dev,
"Could not allocate unit ID: error %d\n", -ret);
goto bail;
}
rvt_set_ibdev_name(&dd->verbs_dev.rdi, "%s_%d", class_name(), dd->unit);
+ /*
+ * If the BIOS does not have the NUMA node information set, select
+ * NUMA 0 so we get consistent performance.
+ */
+ dd->node = pcibus_to_node(pdev->bus);
+ if (dd->node == NUMA_NO_NODE) {
+ dd_dev_err(dd, "Invalid PCI NUMA node. Performance may be affected\n");
+ dd->node = 0;
+ }
/*
* Initialize all locks for the device. This needs to be as early as
@@ -1363,11 +1288,20 @@ static struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev,
goto bail;
}
- kobject_init(&dd->kobj, &hfi1_devdata_type);
+ /* allocate dummy tail memory for all receive contexts */
+ dd->rcvhdrtail_dummy_kvaddr =
+ dma_alloc_coherent(&dd->pcidev->dev, sizeof(u64),
+ &dd->rcvhdrtail_dummy_dma, GFP_KERNEL);
+ if (!dd->rcvhdrtail_dummy_kvaddr) {
+ ret = -ENOMEM;
+ goto bail;
+ }
+
+ atomic_set(&dd->ipoib_rsm_usr_num, 0);
return dd;
bail:
- hfi1_clean_devdata(dd);
+ hfi1_free_devdata(dd);
return ERR_PTR(ret);
}
@@ -1408,7 +1342,7 @@ static void remove_one(struct pci_dev *);
static int init_one(struct pci_dev *, const struct pci_device_id *);
static void shutdown_one(struct pci_dev *);
-#define DRIVER_LOAD_MSG "Intel " DRIVER_NAME " loaded: "
+#define DRIVER_LOAD_MSG "Cornelis " DRIVER_NAME " loaded: "
#define PFX DRIVER_NAME ": "
const struct pci_device_id hfi1_pci_tbl[] = {
@@ -1497,12 +1431,16 @@ static int __init hfi1_mod_init(void)
/* sanitize link CRC options */
link_crc_mask &= SUPPORTED_CRCS;
+ ret = opfn_init();
+ if (ret < 0) {
+ pr_err("Failed to allocate opfn_wq");
+ goto bail_dev;
+ }
+
/*
* These must be called before the driver is registered with
* the PCI subsystem.
*/
- idr_init(&hfi1_unit_table);
-
hfi1_dbg_init();
ret = pci_register_driver(&hfi1_pci_driver);
if (ret < 0) {
@@ -1513,7 +1451,6 @@ static int __init hfi1_mod_init(void)
bail_dev:
hfi1_dbg_exit();
- idr_destroy(&hfi1_unit_table);
dev_cleanup();
bail:
return ret;
@@ -1527,10 +1464,11 @@ module_init(hfi1_mod_init);
static void __exit hfi1_mod_cleanup(void)
{
pci_unregister_driver(&hfi1_pci_driver);
+ opfn_exit();
node_affinity_destroy_all();
hfi1_dbg_exit();
- idr_destroy(&hfi1_unit_table);
+ WARN_ON(!xa_empty(&hfi1_dev_table));
dispose_firmware(); /* asymmetric with obtain_firmware() */
dev_cleanup();
}
@@ -1566,13 +1504,6 @@ static void cleanup_device_data(struct hfi1_devdata *dd)
free_credit_return(dd);
- if (dd->rcvhdrtail_dummy_kvaddr) {
- dma_free_coherent(&dd->pcidev->dev, sizeof(u64),
- (void *)dd->rcvhdrtail_dummy_kvaddr,
- dd->rcvhdrtail_dummy_dma);
- dd->rcvhdrtail_dummy_kvaddr = NULL;
- }
-
/*
* Free any resources still in use (usually just kernel contexts)
* at unload; we do for ctxtcnt, because that's what we allocate.
@@ -1581,7 +1512,7 @@ static void cleanup_device_data(struct hfi1_devdata *dd)
struct hfi1_ctxtdata *rcd = dd->rcd[ctxt];
if (rcd) {
- hfi1_clear_tids(rcd);
+ hfi1_free_ctxt_rcv_groups(rcd);
hfi1_free_ctxt(rcd);
}
}
@@ -1621,29 +1552,6 @@ static void postinit_cleanup(struct hfi1_devdata *dd)
hfi1_free_devdata(dd);
}
-static int init_validate_rcvhdrcnt(struct hfi1_devdata *dd, uint thecnt)
-{
- if (thecnt <= HFI1_MIN_HDRQ_EGRBUF_CNT) {
- dd_dev_err(dd, "Receive header queue count too small\n");
- return -EINVAL;
- }
-
- if (thecnt > HFI1_MAX_HDRQ_EGRBUF_CNT) {
- dd_dev_err(dd,
- "Receive header queue count cannot be greater than %u\n",
- HFI1_MAX_HDRQ_EGRBUF_CNT);
- return -EINVAL;
- }
-
- if (thecnt % HDRQ_INCREMENT) {
- dd_dev_err(dd, "Receive header queue count %d must be divisible by %lu\n",
- thecnt, HDRQ_INCREMENT);
- return -EINVAL;
- }
-
- return 0;
-}
-
static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
{
int ret = 0, j, pidx, initfail;
@@ -1671,7 +1579,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
}
/* Validate some global module parameters */
- ret = init_validate_rcvhdrcnt(dd, rcvhdrcnt);
+ ret = hfi1_validate_rcvhdrcnt(dd, rcvhdrcnt);
if (ret)
goto bail;
@@ -1730,9 +1638,6 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
/* do the generic initialization */
initfail = hfi1_init(dd, 0);
- /* setup vnic */
- hfi1_vnic_setup(dd);
-
ret = hfi1_register_ib_device(dd);
/*
@@ -1771,7 +1676,6 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
hfi1_device_remove(dd);
if (!ret)
hfi1_unregister_ib_device(dd);
- hfi1_vnic_cleanup(dd);
postinit_cleanup(dd);
if (initfail)
ret = initfail;
@@ -1794,7 +1698,7 @@ static void wait_for_clients(struct hfi1_devdata *dd)
* Remove the device init value and complete the device if there is
* no clients or wait for active clients to finish.
*/
- if (atomic_dec_and_test(&dd->user_refcount))
+ if (refcount_dec_and_test(&dd->user_refcount))
complete(&dd->user_comp);
wait_for_completion(&dd->user_comp);
@@ -1816,14 +1720,15 @@ static void remove_one(struct pci_dev *pdev)
/* unregister from IB core */
hfi1_unregister_ib_device(dd);
- /* cleanup vnic */
- hfi1_vnic_cleanup(dd);
+ /* free netdev data */
+ hfi1_free_rx(dd);
/*
* Disable the IB link, disable interrupts on the device,
* clear dma engines, etc.
*/
shutdown_device(dd);
+ destroy_workqueues(dd);
stop_timers(dd);
@@ -1852,20 +1757,13 @@ static void shutdown_one(struct pci_dev *pdev)
int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
{
unsigned amt;
- u64 reg;
if (!rcd->rcvhdrq) {
- gfp_t gfp_flags;
-
amt = rcvhdrq_size(rcd);
- if (rcd->ctxt < dd->first_dyn_alloc_ctxt || rcd->is_vnic)
- gfp_flags = GFP_KERNEL;
- else
- gfp_flags = GFP_USER;
rcd->rcvhdrq = dma_alloc_coherent(&dd->pcidev->dev, amt,
&rcd->rcvhdrq_dma,
- gfp_flags | __GFP_COMP);
+ GFP_KERNEL);
if (!rcd->rcvhdrq) {
dd_dev_err(dd,
@@ -1879,35 +1777,14 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
rcd->rcvhdrtail_kvaddr = dma_alloc_coherent(&dd->pcidev->dev,
PAGE_SIZE,
&rcd->rcvhdrqtailaddr_dma,
- gfp_flags);
+ GFP_KERNEL);
if (!rcd->rcvhdrtail_kvaddr)
goto bail_free;
}
}
- /*
- * These values are per-context:
- * RcvHdrCnt
- * RcvHdrEntSize
- * RcvHdrSize
- */
- reg = ((u64)(rcd->rcvhdrq_cnt >> HDRQ_SIZE_SHIFT)
- & RCV_HDR_CNT_CNT_MASK)
- << RCV_HDR_CNT_CNT_SHIFT;
- write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_CNT, reg);
- reg = (encode_rcv_header_entry_size(rcd->rcvhdrqentsize)
- & RCV_HDR_ENT_SIZE_ENT_SIZE_MASK)
- << RCV_HDR_ENT_SIZE_ENT_SIZE_SHIFT;
- write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_ENT_SIZE, reg);
- reg = ((u64)DEFAULT_RCVHDRSIZE & RCV_HDR_SIZE_HDR_SIZE_MASK)
- << RCV_HDR_SIZE_HDR_SIZE_SHIFT;
- write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_SIZE, reg);
- /*
- * Program dummy tail address for every receive context
- * before enabling any receive context
- */
- write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_TAIL_ADDR,
- dd->rcvhdrtail_dummy_dma);
+ set_hdrq_regs(rcd->dd, rcd->ctxt, rcd->rcvhdrqentsize,
+ rcd->rcvhdrq_cnt);
return 0;
@@ -1923,7 +1800,8 @@ bail:
}
/**
- * allocate eager buffers, both kernel and user contexts.
+ * hfi1_setup_eagerbufs - llocate eager buffers, both kernel and user
+ * contexts.
* @rcd: the context we are setting up.
*
* Allocate the eager TID buffers and program them into hip.
@@ -1935,20 +1813,11 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
{
struct hfi1_devdata *dd = rcd->dd;
u32 max_entries, egrtop, alloced_bytes = 0;
- gfp_t gfp_flags;
u16 order, idx = 0;
int ret = 0;
u16 round_mtu = roundup_pow_of_two(hfi1_max_mtu);
/*
- * GFP_USER, but without GFP_FS, so buffer cache can be
- * coalesced (we hope); otherwise, even at order 4,
- * heavy filesystem activity makes these fail, and we can
- * use compound pages.
- */
- gfp_flags = __GFP_RECLAIM | __GFP_IO | __GFP_COMP;
-
- /*
* The minimum size of the eager buffers is a groups of MTU-sized
* buffers.
* The global eager_buffer_size parameter is checked against the
@@ -1978,7 +1847,7 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
dma_alloc_coherent(&dd->pcidev->dev,
rcd->egrbufs.rcvtid_size,
&rcd->egrbufs.buffers[idx].dma,
- gfp_flags);
+ GFP_KERNEL);
if (rcd->egrbufs.buffers[idx].addr) {
rcd->egrbufs.buffers[idx].len =
rcd->egrbufs.rcvtid_size;
@@ -2049,7 +1918,7 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
rcd->egrbufs.size = alloced_bytes;
hfi1_cdbg(PROC,
- "ctxt%u: Alloced %u rcv tid entries @ %uKB, total %zuKB\n",
+ "ctxt%u: Alloced %u rcv tid entries @ %uKB, total %uKB",
rcd->ctxt, rcd->egrbufs.alloced,
rcd->egrbufs.rcvtid_size / 1024, rcd->egrbufs.size / 1024);
@@ -2072,13 +1941,13 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
rcd->expected_count = MAX_TID_PAIR_ENTRIES * 2;
rcd->expected_base = rcd->eager_base + egrtop;
- hfi1_cdbg(PROC, "ctxt%u: eager:%u, exp:%u, egrbase:%u, expbase:%u\n",
+ hfi1_cdbg(PROC, "ctxt%u: eager:%u, exp:%u, egrbase:%u, expbase:%u",
rcd->ctxt, rcd->egrbufs.alloced, rcd->expected_count,
rcd->eager_base, rcd->expected_base);
if (!hfi1_rcvbuf_validate(rcd->egrbufs.rcvtid_size, PT_EAGER, &order)) {
hfi1_cdbg(PROC,
- "ctxt%u: current Eager buffer size is invalid %u\n",
+ "ctxt%u: current Eager buffer size is invalid %u",
rcd->ctxt, rcd->egrbufs.rcvtid_size);
ret = -EINVAL;
goto bail_rcvegrbuf_phys;