diff options
Diffstat (limited to 'drivers/infiniband/hw/hfi1/init.c')
| -rw-r--r-- | drivers/infiniband/hw/hfi1/init.c | 411 |
1 files changed, 140 insertions, 271 deletions
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 7835eb52e7c5..e4aef102dac0 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -1,59 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* - * Copyright(c) 2015 - 2018 Intel Corporation. - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * BSD LICENSE - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * + * Copyright(c) 2015 - 2020 Intel Corporation. + * Copyright(c) 2021 Cornelis Networks. */ #include <linux/pci.h> #include <linux/netdevice.h> #include <linux/vmalloc.h> #include <linux/delay.h> -#include <linux/idr.h> +#include <linux/xarray.h> #include <linux/module.h> #include <linux/printk.h> #include <linux/hrtimer.h> #include <linux/bitmap.h> +#include <linux/numa.h> #include <rdma/rdma_vt.h> #include "hfi.h" @@ -68,18 +28,16 @@ #include "affinity.h" #include "vnic.h" #include "exp_rcv.h" +#include "netdev.h" #undef pr_fmt #define pr_fmt(fmt) DRIVER_NAME ": " fmt -#define HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES 5 /* * min buffers we want to have per context, after driver */ #define HFI1_MIN_USER_CTXT_BUFCNT 7 -#define HFI1_MIN_HDRQ_EGRBUF_CNT 2 -#define HFI1_MAX_HDRQ_EGRBUF_CNT 16352 #define HFI1_MIN_EAGER_BUFFER_SIZE (4 * 1024) /* 4KB */ #define HFI1_MAX_EAGER_BUFFER_SIZE (256 * 1024) /* 256KB */ @@ -122,9 +80,7 @@ unsigned int user_credit_return_threshold = 33; /* default is 33% */ module_param(user_credit_return_threshold, uint, S_IRUGO); MODULE_PARM_DESC(user_credit_return_threshold, "Credit return threshold for user send contexts, return when unreturned credits passes this many blocks (in percent of allocated blocks, 0 is off)"); -static inline u64 encode_rcv_header_entry_size(u16 size); - -static struct idr hfi1_unit_table; +DEFINE_XARRAY_FLAGS(hfi1_dev_table, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ); static int hfi1_create_kctxt(struct hfi1_devdata *dd, struct hfi1_pportdata *ppd) @@ -154,7 +110,11 @@ static int hfi1_create_kctxt(struct hfi1_devdata *dd, /* Control context must use DMA_RTAIL */ if (rcd->ctxt == HFI1_CTRL_CTXT) rcd->flags |= HFI1_CAP_DMA_RTAIL; - rcd->seq_cnt = 1; + rcd->fast_handler = get_dma_rtail_setting(rcd) ? + handle_receive_interrupt_dma_rtail : + handle_receive_interrupt_nodma_rtail; + + hfi1_set_seq_cnt(rcd, 1); rcd->sc = sc_alloc(dd, SC_ACK, rcd->rcvhdrqentsize, dd->node); if (!rcd->sc) { @@ -215,12 +175,12 @@ static void hfi1_rcd_free(struct kref *kref) struct hfi1_ctxtdata *rcd = container_of(kref, struct hfi1_ctxtdata, kref); - hfi1_free_ctxtdata(rcd->dd, rcd); - spin_lock_irqsave(&rcd->dd->uctxt_lock, flags); rcd->dd->rcd[rcd->ctxt] = NULL; spin_unlock_irqrestore(&rcd->dd->uctxt_lock, flags); + hfi1_free_ctxtdata(rcd->dd, rcd); + kfree(rcd); } @@ -243,10 +203,13 @@ int hfi1_rcd_put(struct hfi1_ctxtdata *rcd) * @rcd: pointer to an initialized rcd data structure * * Use this to get a reference after the init. + * + * Return : reflect kref_get_unless_zero(), which returns non-zero on + * increment, otherwise 0. */ -void hfi1_rcd_get(struct hfi1_ctxtdata *rcd) +int hfi1_rcd_get(struct hfi1_ctxtdata *rcd) { - kref_get(&rcd->kref); + return kref_get_unless_zero(&rcd->kref); } /** @@ -307,7 +270,7 @@ struct hfi1_ctxtdata *hfi1_rcd_get_by_index_safe(struct hfi1_devdata *dd, } /** - * hfi1_rcd_get_by_index + * hfi1_rcd_get_by_index - get by index * @dd: pointer to a valid devdata structure * @ctxt: the index of an possilbe rcd * @@ -326,7 +289,8 @@ struct hfi1_ctxtdata *hfi1_rcd_get_by_index(struct hfi1_devdata *dd, u16 ctxt) spin_lock_irqsave(&dd->uctxt_lock, flags); if (dd->rcd[ctxt]) { rcd = dd->rcd[ctxt]; - hfi1_rcd_get(rcd); + if (!hfi1_rcd_get(rcd)) + rcd = NULL; } spin_unlock_irqrestore(&dd->uctxt_lock, flags); @@ -369,10 +333,16 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa, rcd->numa_id = numa; rcd->rcv_array_groups = dd->rcv_entries.ngroups; rcd->rhf_rcv_function_map = normal_rhf_rcv_functions; + rcd->slow_handler = handle_receive_interrupt; + rcd->do_interrupt = rcd->slow_handler; + rcd->msix_intr = CCE_NUM_MSIX_VECTORS; mutex_init(&rcd->exp_mutex); + spin_lock_init(&rcd->exp_lock); + INIT_LIST_HEAD(&rcd->flow_queue.queue_head); + INIT_LIST_HEAD(&rcd->rarr_queue.queue_head); - hfi1_cdbg(PROC, "setting up context %u\n", rcd->ctxt); + hfi1_cdbg(PROC, "setting up context %u", rcd->ctxt); /* * Calculate the context's RcvArray entry starting point. @@ -430,7 +400,7 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa, rcd->egrbufs.count = MAX_EAGER_ENTRIES; } hfi1_cdbg(PROC, - "ctxt%u: max Eager buffer RcvArray entries: %u\n", + "ctxt%u: max Eager buffer RcvArray entries: %u", rcd->ctxt, rcd->egrbufs.count); /* @@ -462,7 +432,7 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa, if (rcd->egrbufs.size < hfi1_max_mtu) { rcd->egrbufs.size = __roundup_pow_of_two(hfi1_max_mtu); hfi1_cdbg(PROC, - "ctxt%u: eager bufs size too small. Adjusting to %zu\n", + "ctxt%u: eager bufs size too small. Adjusting to %u", rcd->ctxt, rcd->egrbufs.size); } rcd->egrbufs.rcvtid_size = HFI1_MAX_EAGER_BUFFER_SIZE; @@ -473,6 +443,9 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa, GFP_KERNEL, numa); if (!rcd->opstats) goto bail; + + /* Initialize TID flow generations for the context */ + hfi1_kern_init_ctxt_generations(rcd); } *context = rcd; @@ -486,12 +459,12 @@ bail: } /** - * hfi1_free_ctxt + * hfi1_free_ctxt - free context * @rcd: pointer to an initialized rcd data structure * * This wrapper is the free function that matches hfi1_create_ctxtdata(). * When a context is done being used (kernel or user), this function is called - * for the "final" put to match the kref init from hf1i_create_ctxtdata(). + * for the "final" put to match the kref init from hfi1_create_ctxtdata(). * Other users of the context do a get/put sequence to make sure that the * structure isn't removed while in use. */ @@ -501,23 +474,6 @@ void hfi1_free_ctxt(struct hfi1_ctxtdata *rcd) } /* - * Convert a receive header entry size that to the encoding used in the CSR. - * - * Return a zero if the given size is invalid. - */ -static inline u64 encode_rcv_header_entry_size(u16 size) -{ - /* there are only 3 valid receive header entry sizes */ - if (size == 2) - return 1; - if (size == 16) - return 2; - else if (size == 32) - return 4; - return 0; /* invalid */ -} - -/* * Select the largest ccti value over all SLs to determine the intra- * packet gap for the link. * @@ -533,7 +489,7 @@ void set_link_ipg(struct hfi1_pportdata *ppd) u16 shift, mult; u64 src; u32 current_egress_rate; /* Mbits /sec */ - u32 max_pkt_time; + u64 max_pkt_time; /* * max_pkt_time is the maximum packet egress time in units * of the fabric clock period 1/(805 MHz). @@ -631,7 +587,7 @@ static enum hrtimer_restart cca_timer_fn(struct hrtimer *t) * Common code for initializing the physical port structure. */ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd, - struct hfi1_devdata *dd, u8 hw_pidx, u8 port) + struct hfi1_devdata *dd, u8 hw_pidx, u32 port) { int i; uint default_pkey_idx; @@ -654,12 +610,7 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd, ppd->pkeys[default_pkey_idx] = DEFAULT_P_KEY; ppd->part_enforce |= HFI1_PART_ENFORCE_IN; - - if (loopback) { - dd_dev_err(dd, "Faking data partition 0x8001 in idx %u\n", - !default_pkey_idx); - ppd->pkeys[!default_pkey_idx] = 0x8001; - } + ppd->pkeys[0] = 0x8001; INIT_WORK(&ppd->link_vc_work, handle_verify_cap); INIT_WORK(&ppd->link_up_work, handle_link_up); @@ -684,12 +635,11 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd, spin_lock_init(&ppd->cca_timer_lock); for (i = 0; i < OPA_MAX_SLS; i++) { - hrtimer_init(&ppd->cca_timer[i].hrtimer, CLOCK_MONOTONIC, - HRTIMER_MODE_REL); ppd->cca_timer[i].ppd = ppd; ppd->cca_timer[i].sl = i; ppd->cca_timer[i].ccti = 0; - ppd->cca_timer[i].hrtimer.function = cca_timer_fn; + hrtimer_setup(&ppd->cca_timer[i].hrtimer, cca_timer_fn, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); } ppd->cc_max_table_entries = IB_CC_TABLE_CAP_DEFAULT; @@ -772,6 +722,8 @@ static void enable_chip(struct hfi1_devdata *dd) rcvmask |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB; if (HFI1_CAP_KGET_MASK(rcd->flags, NODROP_EGR_FULL)) rcvmask |= HFI1_RCVCTRL_NO_EGR_DROP_ENB; + if (HFI1_CAP_IS_KSET(TID_RDMA)) + rcvmask |= HFI1_RCVCTRL_TIDFLOW_ENB; hfi1_rcvctrl(dd, rcvmask, rcd); sc_enable(rcd->sc); hfi1_rcd_put(rcd); @@ -793,7 +745,8 @@ static int create_workqueues(struct hfi1_devdata *dd) ppd->hfi1_wq = alloc_workqueue( "hfi%d_%d", - WQ_SYSFS | WQ_HIGHPRI | WQ_CPU_INTENSIVE, + WQ_SYSFS | WQ_HIGHPRI | WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | + WQ_PERCPU, HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES, dd->unit, pidx); if (!ppd->hfi1_wq) @@ -832,6 +785,29 @@ wq_error: } /** + * destroy_workqueues - destroy per port workqueues + * @dd: the hfi1_ib device + */ +static void destroy_workqueues(struct hfi1_devdata *dd) +{ + int pidx; + struct hfi1_pportdata *ppd; + + for (pidx = 0; pidx < dd->num_pports; ++pidx) { + ppd = dd->pport + pidx; + + if (ppd->hfi1_wq) { + destroy_workqueue(ppd->hfi1_wq); + ppd->hfi1_wq = NULL; + } + if (ppd->link_wq) { + destroy_workqueue(ppd->link_wq); + ppd->link_wq = NULL; + } + } +} + +/** * enable_general_intr() - Enable the IRQs that will be handled by the * general interrupt handler. * @dd: valid devdata @@ -879,10 +855,10 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit) if (is_ax(dd)) { atomic_set(&dd->drop_packet, DROP_PACKET_ON); - dd->do_drop = 1; + dd->do_drop = true; } else { atomic_set(&dd->drop_packet, DROP_PACKET_OFF); - dd->do_drop = 0; + dd->do_drop = false; } /* make sure the link is not "up" */ @@ -898,18 +874,6 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit) if (ret) goto done; - /* allocate dummy tail memory for all receive contexts */ - dd->rcvhdrtail_dummy_kvaddr = dma_alloc_coherent(&dd->pcidev->dev, - sizeof(u64), - &dd->rcvhdrtail_dummy_dma, - GFP_KERNEL); - - if (!dd->rcvhdrtail_dummy_kvaddr) { - dd_dev_err(dd, "cannot allocate dummy tail memory\n"); - ret = -ENOMEM; - goto done; - } - /* dd->rcd can be NULL if early initialization failed */ for (i = 0; dd->rcd && i < dd->first_dyn_alloc_ctxt; ++i) { /* @@ -922,11 +886,11 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit) if (!rcd) continue; - rcd->do_interrupt = &handle_receive_interrupt; - lastfail = hfi1_create_rcvhdrq(dd, rcd); if (!lastfail) lastfail = hfi1_setup_eagerbufs(rcd); + if (!lastfail) + lastfail = hfi1_kern_exp_rcv_init(rcd, reinit); if (lastfail) { dd_dev_err(dd, "failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n"); @@ -1004,21 +968,9 @@ done: return ret; } -static inline struct hfi1_devdata *__hfi1_lookup(int unit) -{ - return idr_find(&hfi1_unit_table, unit); -} - struct hfi1_devdata *hfi1_lookup(int unit) { - struct hfi1_devdata *dd; - unsigned long flags; - - spin_lock_irqsave(&hfi1_devs_lock, flags); - dd = __hfi1_lookup(unit); - spin_unlock_irqrestore(&hfi1_devs_lock, flags); - - return dd; + return xa_load(&hfi1_dev_table, unit); } /* @@ -1033,7 +985,7 @@ static void stop_timers(struct hfi1_devdata *dd) for (pidx = 0; pidx < dd->num_pports; ++pidx) { ppd = dd->pport + pidx; if (ppd->led_override_timer.function) { - del_timer_sync(&ppd->led_override_timer); + timer_delete_sync(&ppd->led_override_timer); atomic_set(&ppd->led_override_timer_active, 0); } } @@ -1074,7 +1026,6 @@ static void shutdown_device(struct hfi1_devdata *dd) msix_clean_up_interrupts(dd); for (pidx = 0; pidx < dd->num_pports; ++pidx) { - ppd = dd->pport + pidx; for (i = 0; i < dd->num_rcv_contexts; i++) { rcd = hfi1_rcd_get_by_index(dd, i); hfi1_rcvctrl(dd, HFI1_RCVCTRL_TAILUPD_DIS | @@ -1114,15 +1065,10 @@ static void shutdown_device(struct hfi1_devdata *dd) * We can't count on interrupts since we are stopping. */ hfi1_quiet_serdes(ppd); - - if (ppd->hfi1_wq) { - destroy_workqueue(ppd->hfi1_wq); - ppd->hfi1_wq = NULL; - } - if (ppd->link_wq) { - destroy_workqueue(ppd->link_wq); - ppd->link_wq = NULL; - } + if (ppd->hfi1_wq) + flush_workqueue(ppd->hfi1_wq); + if (ppd->link_wq) + flush_workqueue(ppd->link_wq); } sdma_exit(dd); } @@ -1146,9 +1092,9 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) dma_free_coherent(&dd->pcidev->dev, rcvhdrq_size(rcd), rcd->rcvhdrq, rcd->rcvhdrq_dma); rcd->rcvhdrq = NULL; - if (rcd->rcvhdrtail_kvaddr) { + if (hfi1_rcvhdrtail_kvaddr(rcd)) { dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE, - (void *)rcd->rcvhdrtail_kvaddr, + (void *)hfi1_rcvhdrtail_kvaddr(rcd), rcd->rcvhdrqtailaddr_dma); rcd->rcvhdrtail_kvaddr = NULL; } @@ -1159,7 +1105,7 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) rcd->egrbufs.rcvtids = NULL; for (e = 0; e < rcd->egrbufs.alloced; e++) { - if (rcd->egrbufs.buffers[e].dma) + if (rcd->egrbufs.buffers[e].addr) dma_free_coherent(&dd->pcidev->dev, rcd->egrbufs.buffers[e].len, rcd->egrbufs.buffers[e].addr, @@ -1186,7 +1132,7 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) /* * Release our hold on the shared asic data. If we are the last one, * return the structure to be finalized outside the lock. Must be - * holding hfi1_devs_lock. + * holding hfi1_dev_table lock. */ static struct hfi1_asic_data *release_asic_data(struct hfi1_devdata *dd) { @@ -1211,24 +1157,21 @@ static void finalize_asic_data(struct hfi1_devdata *dd, } /** - * hfi1_clean_devdata - cleans up per-unit data structure + * hfi1_free_devdata - cleans up and frees per-unit data structure * @dd: pointer to a valid devdata structure * - * It cleans up all data structures set up by + * It cleans up and frees all data structures set up by * by hfi1_alloc_devdata(). */ -static void hfi1_clean_devdata(struct hfi1_devdata *dd) +void hfi1_free_devdata(struct hfi1_devdata *dd) { struct hfi1_asic_data *ad; unsigned long flags; - spin_lock_irqsave(&hfi1_devs_lock, flags); - if (!list_empty(&dd->list)) { - idr_remove(&hfi1_unit_table, dd->unit); - list_del_init(&dd->list); - } + xa_lock_irqsave(&hfi1_dev_table, flags); + __xa_erase(&hfi1_dev_table, dd->unit); ad = release_asic_data(dd); - spin_unlock_irqrestore(&hfi1_devs_lock, flags); + xa_unlock_irqrestore(&hfi1_dev_table, flags); finalize_asic_data(dd, ad); free_platform_config(dd); @@ -1243,27 +1186,15 @@ static void hfi1_clean_devdata(struct hfi1_devdata *dd) dd->tx_opstats = NULL; kfree(dd->comp_vect); dd->comp_vect = NULL; + if (dd->rcvhdrtail_dummy_kvaddr) + dma_free_coherent(&dd->pcidev->dev, sizeof(u64), + (void *)dd->rcvhdrtail_dummy_kvaddr, + dd->rcvhdrtail_dummy_dma); + dd->rcvhdrtail_dummy_kvaddr = NULL; sdma_clean(dd, dd->num_sdma); rvt_dealloc_device(&dd->verbs_dev.rdi); } -static void __hfi1_free_devdata(struct kobject *kobj) -{ - struct hfi1_devdata *dd = - container_of(kobj, struct hfi1_devdata, kobj); - - hfi1_clean_devdata(dd); -} - -static struct kobj_type hfi1_devdata_type = { - .release = __hfi1_free_devdata, -}; - -void hfi1_free_devdata(struct hfi1_devdata *dd) -{ - kobject_put(&dd->kobj); -} - /** * hfi1_alloc_devdata - Allocate our primary per-unit data structure. * @pdev: Valid PCI device @@ -1272,13 +1203,10 @@ void hfi1_free_devdata(struct hfi1_devdata *dd) * Must be done via verbs allocator, because the verbs cleanup process * both does cleanup and free of the data structure. * "extra" is for chip-specific data. - * - * Use the idr mechanism to get a unit number for this unit. */ static struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra) { - unsigned long flags; struct hfi1_devdata *dd; int ret, nports; @@ -1294,26 +1222,23 @@ static struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, dd->pcidev = pdev; pci_set_drvdata(pdev, dd); - INIT_LIST_HEAD(&dd->list); - idr_preload(GFP_KERNEL); - spin_lock_irqsave(&hfi1_devs_lock, flags); - - ret = idr_alloc(&hfi1_unit_table, dd, 0, 0, GFP_NOWAIT); - if (ret >= 0) { - dd->unit = ret; - list_add(&dd->list, &hfi1_dev_list); - } - dd->node = -1; - - spin_unlock_irqrestore(&hfi1_devs_lock, flags); - idr_preload_end(); - + ret = xa_alloc_irq(&hfi1_dev_table, &dd->unit, dd, xa_limit_32b, + GFP_KERNEL); if (ret < 0) { dev_err(&pdev->dev, "Could not allocate unit ID: error %d\n", -ret); goto bail; } rvt_set_ibdev_name(&dd->verbs_dev.rdi, "%s_%d", class_name(), dd->unit); + /* + * If the BIOS does not have the NUMA node information set, select + * NUMA 0 so we get consistent performance. + */ + dd->node = pcibus_to_node(pdev->bus); + if (dd->node == NUMA_NO_NODE) { + dd_dev_err(dd, "Invalid PCI NUMA node. Performance may be affected\n"); + dd->node = 0; + } /* * Initialize all locks for the device. This needs to be as early as @@ -1363,11 +1288,20 @@ static struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, goto bail; } - kobject_init(&dd->kobj, &hfi1_devdata_type); + /* allocate dummy tail memory for all receive contexts */ + dd->rcvhdrtail_dummy_kvaddr = + dma_alloc_coherent(&dd->pcidev->dev, sizeof(u64), + &dd->rcvhdrtail_dummy_dma, GFP_KERNEL); + if (!dd->rcvhdrtail_dummy_kvaddr) { + ret = -ENOMEM; + goto bail; + } + + atomic_set(&dd->ipoib_rsm_usr_num, 0); return dd; bail: - hfi1_clean_devdata(dd); + hfi1_free_devdata(dd); return ERR_PTR(ret); } @@ -1408,7 +1342,7 @@ static void remove_one(struct pci_dev *); static int init_one(struct pci_dev *, const struct pci_device_id *); static void shutdown_one(struct pci_dev *); -#define DRIVER_LOAD_MSG "Intel " DRIVER_NAME " loaded: " +#define DRIVER_LOAD_MSG "Cornelis " DRIVER_NAME " loaded: " #define PFX DRIVER_NAME ": " const struct pci_device_id hfi1_pci_tbl[] = { @@ -1497,12 +1431,16 @@ static int __init hfi1_mod_init(void) /* sanitize link CRC options */ link_crc_mask &= SUPPORTED_CRCS; + ret = opfn_init(); + if (ret < 0) { + pr_err("Failed to allocate opfn_wq"); + goto bail_dev; + } + /* * These must be called before the driver is registered with * the PCI subsystem. */ - idr_init(&hfi1_unit_table); - hfi1_dbg_init(); ret = pci_register_driver(&hfi1_pci_driver); if (ret < 0) { @@ -1513,7 +1451,6 @@ static int __init hfi1_mod_init(void) bail_dev: hfi1_dbg_exit(); - idr_destroy(&hfi1_unit_table); dev_cleanup(); bail: return ret; @@ -1527,10 +1464,11 @@ module_init(hfi1_mod_init); static void __exit hfi1_mod_cleanup(void) { pci_unregister_driver(&hfi1_pci_driver); + opfn_exit(); node_affinity_destroy_all(); hfi1_dbg_exit(); - idr_destroy(&hfi1_unit_table); + WARN_ON(!xa_empty(&hfi1_dev_table)); dispose_firmware(); /* asymmetric with obtain_firmware() */ dev_cleanup(); } @@ -1566,13 +1504,6 @@ static void cleanup_device_data(struct hfi1_devdata *dd) free_credit_return(dd); - if (dd->rcvhdrtail_dummy_kvaddr) { - dma_free_coherent(&dd->pcidev->dev, sizeof(u64), - (void *)dd->rcvhdrtail_dummy_kvaddr, - dd->rcvhdrtail_dummy_dma); - dd->rcvhdrtail_dummy_kvaddr = NULL; - } - /* * Free any resources still in use (usually just kernel contexts) * at unload; we do for ctxtcnt, because that's what we allocate. @@ -1581,7 +1512,7 @@ static void cleanup_device_data(struct hfi1_devdata *dd) struct hfi1_ctxtdata *rcd = dd->rcd[ctxt]; if (rcd) { - hfi1_clear_tids(rcd); + hfi1_free_ctxt_rcv_groups(rcd); hfi1_free_ctxt(rcd); } } @@ -1621,29 +1552,6 @@ static void postinit_cleanup(struct hfi1_devdata *dd) hfi1_free_devdata(dd); } -static int init_validate_rcvhdrcnt(struct hfi1_devdata *dd, uint thecnt) -{ - if (thecnt <= HFI1_MIN_HDRQ_EGRBUF_CNT) { - dd_dev_err(dd, "Receive header queue count too small\n"); - return -EINVAL; - } - - if (thecnt > HFI1_MAX_HDRQ_EGRBUF_CNT) { - dd_dev_err(dd, - "Receive header queue count cannot be greater than %u\n", - HFI1_MAX_HDRQ_EGRBUF_CNT); - return -EINVAL; - } - - if (thecnt % HDRQ_INCREMENT) { - dd_dev_err(dd, "Receive header queue count %d must be divisible by %lu\n", - thecnt, HDRQ_INCREMENT); - return -EINVAL; - } - - return 0; -} - static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { int ret = 0, j, pidx, initfail; @@ -1671,7 +1579,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) } /* Validate some global module parameters */ - ret = init_validate_rcvhdrcnt(dd, rcvhdrcnt); + ret = hfi1_validate_rcvhdrcnt(dd, rcvhdrcnt); if (ret) goto bail; @@ -1730,9 +1638,6 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* do the generic initialization */ initfail = hfi1_init(dd, 0); - /* setup vnic */ - hfi1_vnic_setup(dd); - ret = hfi1_register_ib_device(dd); /* @@ -1771,7 +1676,6 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) hfi1_device_remove(dd); if (!ret) hfi1_unregister_ib_device(dd); - hfi1_vnic_cleanup(dd); postinit_cleanup(dd); if (initfail) ret = initfail; @@ -1794,7 +1698,7 @@ static void wait_for_clients(struct hfi1_devdata *dd) * Remove the device init value and complete the device if there is * no clients or wait for active clients to finish. */ - if (atomic_dec_and_test(&dd->user_refcount)) + if (refcount_dec_and_test(&dd->user_refcount)) complete(&dd->user_comp); wait_for_completion(&dd->user_comp); @@ -1816,14 +1720,15 @@ static void remove_one(struct pci_dev *pdev) /* unregister from IB core */ hfi1_unregister_ib_device(dd); - /* cleanup vnic */ - hfi1_vnic_cleanup(dd); + /* free netdev data */ + hfi1_free_rx(dd); /* * Disable the IB link, disable interrupts on the device, * clear dma engines, etc. */ shutdown_device(dd); + destroy_workqueues(dd); stop_timers(dd); @@ -1852,20 +1757,13 @@ static void shutdown_one(struct pci_dev *pdev) int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) { unsigned amt; - u64 reg; if (!rcd->rcvhdrq) { - gfp_t gfp_flags; - amt = rcvhdrq_size(rcd); - if (rcd->ctxt < dd->first_dyn_alloc_ctxt || rcd->is_vnic) - gfp_flags = GFP_KERNEL; - else - gfp_flags = GFP_USER; rcd->rcvhdrq = dma_alloc_coherent(&dd->pcidev->dev, amt, &rcd->rcvhdrq_dma, - gfp_flags | __GFP_COMP); + GFP_KERNEL); if (!rcd->rcvhdrq) { dd_dev_err(dd, @@ -1879,35 +1777,14 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) rcd->rcvhdrtail_kvaddr = dma_alloc_coherent(&dd->pcidev->dev, PAGE_SIZE, &rcd->rcvhdrqtailaddr_dma, - gfp_flags); + GFP_KERNEL); if (!rcd->rcvhdrtail_kvaddr) goto bail_free; } } - /* - * These values are per-context: - * RcvHdrCnt - * RcvHdrEntSize - * RcvHdrSize - */ - reg = ((u64)(rcd->rcvhdrq_cnt >> HDRQ_SIZE_SHIFT) - & RCV_HDR_CNT_CNT_MASK) - << RCV_HDR_CNT_CNT_SHIFT; - write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_CNT, reg); - reg = (encode_rcv_header_entry_size(rcd->rcvhdrqentsize) - & RCV_HDR_ENT_SIZE_ENT_SIZE_MASK) - << RCV_HDR_ENT_SIZE_ENT_SIZE_SHIFT; - write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_ENT_SIZE, reg); - reg = ((u64)DEFAULT_RCVHDRSIZE & RCV_HDR_SIZE_HDR_SIZE_MASK) - << RCV_HDR_SIZE_HDR_SIZE_SHIFT; - write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_SIZE, reg); - /* - * Program dummy tail address for every receive context - * before enabling any receive context - */ - write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_TAIL_ADDR, - dd->rcvhdrtail_dummy_dma); + set_hdrq_regs(rcd->dd, rcd->ctxt, rcd->rcvhdrqentsize, + rcd->rcvhdrq_cnt); return 0; @@ -1923,7 +1800,8 @@ bail: } /** - * allocate eager buffers, both kernel and user contexts. + * hfi1_setup_eagerbufs - llocate eager buffers, both kernel and user + * contexts. * @rcd: the context we are setting up. * * Allocate the eager TID buffers and program them into hip. @@ -1935,20 +1813,11 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd) { struct hfi1_devdata *dd = rcd->dd; u32 max_entries, egrtop, alloced_bytes = 0; - gfp_t gfp_flags; u16 order, idx = 0; int ret = 0; u16 round_mtu = roundup_pow_of_two(hfi1_max_mtu); /* - * GFP_USER, but without GFP_FS, so buffer cache can be - * coalesced (we hope); otherwise, even at order 4, - * heavy filesystem activity makes these fail, and we can - * use compound pages. - */ - gfp_flags = __GFP_RECLAIM | __GFP_IO | __GFP_COMP; - - /* * The minimum size of the eager buffers is a groups of MTU-sized * buffers. * The global eager_buffer_size parameter is checked against the @@ -1978,7 +1847,7 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd) dma_alloc_coherent(&dd->pcidev->dev, rcd->egrbufs.rcvtid_size, &rcd->egrbufs.buffers[idx].dma, - gfp_flags); + GFP_KERNEL); if (rcd->egrbufs.buffers[idx].addr) { rcd->egrbufs.buffers[idx].len = rcd->egrbufs.rcvtid_size; @@ -2049,7 +1918,7 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd) rcd->egrbufs.size = alloced_bytes; hfi1_cdbg(PROC, - "ctxt%u: Alloced %u rcv tid entries @ %uKB, total %zuKB\n", + "ctxt%u: Alloced %u rcv tid entries @ %uKB, total %uKB", rcd->ctxt, rcd->egrbufs.alloced, rcd->egrbufs.rcvtid_size / 1024, rcd->egrbufs.size / 1024); @@ -2072,13 +1941,13 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd) rcd->expected_count = MAX_TID_PAIR_ENTRIES * 2; rcd->expected_base = rcd->eager_base + egrtop; - hfi1_cdbg(PROC, "ctxt%u: eager:%u, exp:%u, egrbase:%u, expbase:%u\n", + hfi1_cdbg(PROC, "ctxt%u: eager:%u, exp:%u, egrbase:%u, expbase:%u", rcd->ctxt, rcd->egrbufs.alloced, rcd->expected_count, rcd->eager_base, rcd->expected_base); if (!hfi1_rcvbuf_validate(rcd->egrbufs.rcvtid_size, PT_EAGER, &order)) { hfi1_cdbg(PROC, - "ctxt%u: current Eager buffer size is invalid %u\n", + "ctxt%u: current Eager buffer size is invalid %u", rcd->ctxt, rcd->egrbufs.rcvtid_size); ret = -EINVAL; goto bail_rcvegrbuf_phys; |
