summaryrefslogtreecommitdiff
path: root/drivers/infiniband/hw/hfi1/chip.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-11-15 14:54:53 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2017-11-15 14:54:53 -0800
commitad0835a93008e5901415a0a27847d6a27649aa3a (patch)
treee48be396ebfbb4f1fb02e7ca76461bdb1427490d /drivers/infiniband/hw/hfi1/chip.c
parent22714a2ba4b55737cd7d5299db7aaf1fa8287354 (diff)
parent4190b4e96954e2c3597021d29435c3f8db8d3129 (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma
Pull rdma updates from Doug Ledford: "This is a fairly plain pull request. Lots of driver updates across the stack, a huge number of static analysis cleanups including a close to 50 patch series from Bart Van Assche, and a number of new features inside the stack such as general CQ moderation support. Nothing really stands out, but there might be a few conflicts as you take things in. In particular, the cleanups touched some of the same lines as the new timer_setup changes. Everything in this pull request has been through 0day and at least two days of linux-next (since Stephen doesn't necessarily flag new errors/warnings until day2). A few more items (about 30 patches) from Intel and Mellanox showed up on the list on Tuesday. I've excluded those from this pull request, and I'm sure some of them qualify as fixes suitable to send any time, but I still have to review them fully. If they contain mostly fixes and little or no new development, then I will probably send them through by the end of the week just to get them out of the way. There was a break in my acceptance of patches which coincides with the computer problems I had, and then when I got things mostly back under control I had a backlog of patches to process, which I did mostly last Friday and Monday. So there is a larger number of patches processed in that timeframe than I was striving for. Summary: - Add iWARP support to qedr driver - Lots of misc fixes across subsystem - Multiple update series to hns roce driver - Multiple update series to hfi1 driver - Updates to vnic driver - Add kref to wait struct in cxgb4 driver - Updates to i40iw driver - Mellanox shared pull request - timer_setup changes - massive cleanup series from Bart Van Assche - Two series of SRP/SRPT changes from Bart Van Assche - Core updates from Mellanox - i40iw updates - IPoIB updates - mlx5 updates - mlx4 updates - hns updates - bnxt_re fixes - PCI write padding support - Sparse/Smatch/warning cleanups/fixes - CQ moderation support - SRQ support in vmw_pvrdma" * tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma: (296 commits) RDMA/core: Rename kernel modify_cq to better describe its usage IB/mlx5: Add CQ moderation capability to query_device IB/mlx4: Add CQ moderation capability to query_device IB/uverbs: Add CQ moderation capability to query_device IB/mlx5: Exposing modify CQ callback to uverbs layer IB/mlx4: Exposing modify CQ callback to uverbs layer IB/uverbs: Allow CQ moderation with modify CQ iw_cxgb4: atomically flush the qp iw_cxgb4: only call the cq comp_handler when the cq is armed iw_cxgb4: Fix possible circular dependency locking warning RDMA/bnxt_re: report vlan_id and sl in qp1 recv completion IB/core: Only maintain real QPs in the security lists IB/ocrdma_hw: remove unnecessary code in ocrdma_mbx_dealloc_lkey RDMA/core: Make function rdma_copy_addr return void RDMA/vmw_pvrdma: Add shared receive queue support RDMA/core: avoid uninitialized variable warning in create_udata RDMA/bnxt_re: synchronize poll_cq and req_notify_cq verbs RDMA/bnxt_re: Flush CQ notification Work Queue before destroying QP RDMA/bnxt_re: Set QP state in case of response completion errors RDMA/bnxt_re: Add memory barriers when processing CQ/EQ entries ...
Diffstat (limited to 'drivers/infiniband/hw/hfi1/chip.c')
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c385
1 files changed, 196 insertions, 189 deletions
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 0be42787759f..4f057e8ffe50 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -1036,7 +1036,6 @@ static void read_vc_local_link_width(struct hfi1_devdata *dd, u8 *misc_bits,
u8 *flag_bits, u16 *link_widths);
static void read_remote_device_id(struct hfi1_devdata *dd, u16 *device_id,
u8 *device_rev);
-static void read_mgmt_allowed(struct hfi1_devdata *dd, u8 *mgmt_allowed);
static void read_local_lni(struct hfi1_devdata *dd, u8 *enable_lane_rx);
static int read_tx_settings(struct hfi1_devdata *dd, u8 *enable_lane_tx,
u8 *tx_polarity_inversion,
@@ -5538,9 +5537,9 @@ static void handle_cce_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
* associated with them.
*/
#define RCVERR_CHECK_TIME 10
-static void update_rcverr_timer(unsigned long opaque)
+static void update_rcverr_timer(struct timer_list *t)
{
- struct hfi1_devdata *dd = (struct hfi1_devdata *)opaque;
+ struct hfi1_devdata *dd = from_timer(dd, t, rcverr_timer);
struct hfi1_pportdata *ppd = dd->pport;
u32 cur_ovfl_cnt = read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL);
@@ -5559,7 +5558,7 @@ static void update_rcverr_timer(unsigned long opaque)
static int init_rcverr(struct hfi1_devdata *dd)
{
- setup_timer(&dd->rcverr_timer, update_rcverr_timer, (unsigned long)dd);
+ timer_setup(&dd->rcverr_timer, update_rcverr_timer, 0);
/* Assume the hardware counter has been reset */
dd->rcv_ovfl_cnt = 0;
return mod_timer(&dd->rcverr_timer, jiffies + HZ * RCVERR_CHECK_TIME);
@@ -5567,9 +5566,8 @@ static int init_rcverr(struct hfi1_devdata *dd)
static void free_rcverr(struct hfi1_devdata *dd)
{
- if (dd->rcverr_timer.data)
+ if (dd->rcverr_timer.function)
del_timer_sync(&dd->rcverr_timer);
- dd->rcverr_timer.data = 0;
}
static void handle_rxe_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
@@ -6520,12 +6518,11 @@ static void _dc_start(struct hfi1_devdata *dd)
if (!dd->dc_shutdown)
return;
- /* Take the 8051 out of reset */
- write_csr(dd, DC_DC8051_CFG_RST, 0ull);
- /* Wait until 8051 is ready */
- if (wait_fm_ready(dd, TIMEOUT_8051_START))
- dd_dev_err(dd, "%s: timeout starting 8051 firmware\n",
- __func__);
+ /*
+ * Take the 8051 out of reset, wait until 8051 is ready, and set host
+ * version bit.
+ */
+ release_and_wait_ready_8051_firmware(dd);
/* Take away reset for LCB and RX FPE (set in lcb_shutdown). */
write_csr(dd, DCC_CFG_RESET, 0x10);
@@ -6819,7 +6816,8 @@ static void rxe_kernel_unfreeze(struct hfi1_devdata *dd)
rcd = hfi1_rcd_get_by_index(dd, i);
/* Ensure all non-user contexts(including vnic) are enabled */
- if (!rcd || !rcd->sc || (rcd->sc->type == SC_USER)) {
+ if (!rcd ||
+ (i >= dd->first_dyn_alloc_ctxt && !rcd->is_vnic)) {
hfi1_rcd_put(rcd);
continue;
}
@@ -7199,27 +7197,6 @@ static int lcb_to_port_ltp(int lcb_crc)
return port_ltp;
}
-/*
- * Our neighbor has indicated that we are allowed to act as a fabric
- * manager, so place the full management partition key in the second
- * (0-based) pkey array position (see OPAv1, section 20.2.2.6.8). Note
- * that we should already have the limited management partition key in
- * array element 1, and also that the port is not yet up when
- * add_full_mgmt_pkey() is invoked.
- */
-static void add_full_mgmt_pkey(struct hfi1_pportdata *ppd)
-{
- struct hfi1_devdata *dd = ppd->dd;
-
- /* Sanity check - ppd->pkeys[2] should be 0, or already initialized */
- if (!((ppd->pkeys[2] == 0) || (ppd->pkeys[2] == FULL_MGMT_P_KEY)))
- dd_dev_warn(dd, "%s pkey[2] already set to 0x%x, resetting it to 0x%x\n",
- __func__, ppd->pkeys[2], FULL_MGMT_P_KEY);
- ppd->pkeys[2] = FULL_MGMT_P_KEY;
- (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0);
- hfi1_event_pkey_change(ppd->dd, ppd->port);
-}
-
static void clear_full_mgmt_pkey(struct hfi1_pportdata *ppd)
{
if (ppd->pkeys[2] != 0) {
@@ -7416,11 +7393,7 @@ void handle_verify_cap(struct work_struct *work)
&partner_supported_crc);
read_vc_remote_link_width(dd, &remote_tx_rate, &link_widths);
read_remote_device_id(dd, &device_id, &device_rev);
- /*
- * And the 'MgmtAllowed' information, which is exchanged during
- * LNI, is also be available at this point.
- */
- read_mgmt_allowed(dd, &ppd->mgmt_allowed);
+
/* print the active widths */
get_link_widths(dd, &active_tx, &active_rx);
dd_dev_info(dd,
@@ -7548,9 +7521,6 @@ void handle_verify_cap(struct work_struct *work)
write_csr(dd, DC_LCB_ERR_EN, 0); /* mask LCB errors */
set_8051_lcb_access(dd);
- if (ppd->mgmt_allowed)
- add_full_mgmt_pkey(ppd);
-
/* tell the 8051 to go to LinkUp */
set_link_state(ppd, HLS_GOING_UP);
}
@@ -8124,8 +8094,7 @@ static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source)
rcd = hfi1_rcd_get_by_index(dd, source);
if (rcd) {
/* Check for non-user contexts, including vnic */
- if ((source < dd->first_dyn_alloc_ctxt) ||
- (rcd->sc && (rcd->sc->type == SC_KERNEL)))
+ if (source < dd->first_dyn_alloc_ctxt || rcd->is_vnic)
rcd->do_interrupt(rcd, 0);
else
handle_user_interrupt(rcd);
@@ -8155,8 +8124,8 @@ static void is_rcv_urgent_int(struct hfi1_devdata *dd, unsigned int source)
rcd = hfi1_rcd_get_by_index(dd, source);
if (rcd) {
/* only pay attention to user urgent interrupts */
- if ((source >= dd->first_dyn_alloc_ctxt) &&
- (!rcd->sc || (rcd->sc->type == SC_USER)))
+ if (source >= dd->first_dyn_alloc_ctxt &&
+ !rcd->is_vnic)
handle_user_interrupt(rcd);
hfi1_rcd_put(rcd);
@@ -8595,30 +8564,23 @@ int write_lcb_csr(struct hfi1_devdata *dd, u32 addr, u64 data)
}
/*
+ * If the 8051 is in reset mode (dd->dc_shutdown == 1), this function
+ * will still continue executing.
+ *
* Returns:
* < 0 = Linux error, not able to get access
* > 0 = 8051 command RETURN_CODE
*/
-static int do_8051_command(
- struct hfi1_devdata *dd,
- u32 type,
- u64 in_data,
- u64 *out_data)
+static int _do_8051_command(struct hfi1_devdata *dd, u32 type, u64 in_data,
+ u64 *out_data)
{
u64 reg, completed;
int return_code;
unsigned long timeout;
+ lockdep_assert_held(&dd->dc8051_lock);
hfi1_cdbg(DC8051, "type %d, data 0x%012llx", type, in_data);
- mutex_lock(&dd->dc8051_lock);
-
- /* We can't send any commands to the 8051 if it's in reset */
- if (dd->dc_shutdown) {
- return_code = -ENODEV;
- goto fail;
- }
-
/*
* If an 8051 host command timed out previously, then the 8051 is
* stuck.
@@ -8719,6 +8681,29 @@ static int do_8051_command(
write_csr(dd, DC_DC8051_CFG_HOST_CMD_0, 0);
fail:
+ return return_code;
+}
+
+/*
+ * Returns:
+ * < 0 = Linux error, not able to get access
+ * > 0 = 8051 command RETURN_CODE
+ */
+static int do_8051_command(struct hfi1_devdata *dd, u32 type, u64 in_data,
+ u64 *out_data)
+{
+ int return_code;
+
+ mutex_lock(&dd->dc8051_lock);
+ /* We can't send any commands to the 8051 if it's in reset */
+ if (dd->dc_shutdown) {
+ return_code = -ENODEV;
+ goto fail;
+ }
+
+ return_code = _do_8051_command(dd, type, in_data, out_data);
+
+fail:
mutex_unlock(&dd->dc8051_lock);
return return_code;
}
@@ -8728,16 +8713,17 @@ static int set_physical_link_state(struct hfi1_devdata *dd, u64 state)
return do_8051_command(dd, HCMD_CHANGE_PHY_STATE, state, NULL);
}
-int load_8051_config(struct hfi1_devdata *dd, u8 field_id,
- u8 lane_id, u32 config_data)
+static int _load_8051_config(struct hfi1_devdata *dd, u8 field_id,
+ u8 lane_id, u32 config_data)
{
u64 data;
int ret;
+ lockdep_assert_held(&dd->dc8051_lock);
data = (u64)field_id << LOAD_DATA_FIELD_ID_SHIFT
| (u64)lane_id << LOAD_DATA_LANE_ID_SHIFT
| (u64)config_data << LOAD_DATA_DATA_SHIFT;
- ret = do_8051_command(dd, HCMD_LOAD_CONFIG_DATA, data, NULL);
+ ret = _do_8051_command(dd, HCMD_LOAD_CONFIG_DATA, data, NULL);
if (ret != HCMD_SUCCESS) {
dd_dev_err(dd,
"load 8051 config: field id %d, lane %d, err %d\n",
@@ -8746,6 +8732,18 @@ int load_8051_config(struct hfi1_devdata *dd, u8 field_id,
return ret;
}
+int load_8051_config(struct hfi1_devdata *dd, u8 field_id,
+ u8 lane_id, u32 config_data)
+{
+ int return_code;
+
+ mutex_lock(&dd->dc8051_lock);
+ return_code = _load_8051_config(dd, field_id, lane_id, config_data);
+ mutex_unlock(&dd->dc8051_lock);
+
+ return return_code;
+}
+
/*
* Read the 8051 firmware "registers". Use the RAM directly. Always
* set the result, even on error.
@@ -8861,13 +8859,14 @@ int write_host_interface_version(struct hfi1_devdata *dd, u8 version)
u32 frame;
u32 mask;
+ lockdep_assert_held(&dd->dc8051_lock);
mask = (HOST_INTERFACE_VERSION_MASK << HOST_INTERFACE_VERSION_SHIFT);
read_8051_config(dd, RESERVED_REGISTERS, GENERAL_CONFIG, &frame);
/* Clear, then set field */
frame &= ~mask;
frame |= ((u32)version << HOST_INTERFACE_VERSION_SHIFT);
- return load_8051_config(dd, RESERVED_REGISTERS, GENERAL_CONFIG,
- frame);
+ return _load_8051_config(dd, RESERVED_REGISTERS, GENERAL_CONFIG,
+ frame);
}
void read_misc_status(struct hfi1_devdata *dd, u8 *ver_major, u8 *ver_minor,
@@ -8932,14 +8931,6 @@ static void read_local_lni(struct hfi1_devdata *dd, u8 *enable_lane_rx)
*enable_lane_rx = (frame >> ENABLE_LANE_RX_SHIFT) & ENABLE_LANE_RX_MASK;
}
-static void read_mgmt_allowed(struct hfi1_devdata *dd, u8 *mgmt_allowed)
-{
- u32 frame;
-
- read_8051_config(dd, REMOTE_LNI_INFO, GENERAL_CONFIG, &frame);
- *mgmt_allowed = (frame >> MGMT_ALLOWED_SHIFT) & MGMT_ALLOWED_MASK;
-}
-
static void read_last_local_state(struct hfi1_devdata *dd, u32 *lls)
{
read_8051_config(dd, LAST_LOCAL_STATE_COMPLETE, GENERAL_CONFIG, lls);
@@ -9161,25 +9152,6 @@ static int do_quick_linkup(struct hfi1_devdata *dd)
}
/*
- * Set the SerDes to internal loopback mode.
- * Returns 0 on success, -errno on error.
- */
-static int set_serdes_loopback_mode(struct hfi1_devdata *dd)
-{
- int ret;
-
- ret = set_physical_link_state(dd, PLS_INTERNAL_SERDES_LOOPBACK);
- if (ret == HCMD_SUCCESS)
- return 0;
- dd_dev_err(dd,
- "Set physical link state to SerDes Loopback failed with return %d\n",
- ret);
- if (ret >= 0)
- ret = -EINVAL;
- return ret;
-}
-
-/*
* Do all special steps to set up loopback.
*/
static int init_loopback(struct hfi1_devdata *dd)
@@ -9204,13 +9176,11 @@ static int init_loopback(struct hfi1_devdata *dd)
return 0;
}
- /* handle serdes loopback */
- if (loopback == LOOPBACK_SERDES) {
- /* internal serdes loopack needs quick linkup on RTL */
- if (dd->icode == ICODE_RTL_SILICON)
- quick_linkup = 1;
- return set_serdes_loopback_mode(dd);
- }
+ /*
+ * SerDes loopback init sequence is handled in set_local_link_attributes
+ */
+ if (loopback == LOOPBACK_SERDES)
+ return 0;
/* LCB loopback - handled at poll time */
if (loopback == LOOPBACK_LCB) {
@@ -9269,7 +9239,7 @@ static int set_local_link_attributes(struct hfi1_pportdata *ppd)
u8 tx_polarity_inversion;
u8 rx_polarity_inversion;
int ret;
-
+ u32 misc_bits = 0;
/* reset our fabric serdes to clear any lingering problems */
fabric_serdes_reset(dd);
@@ -9315,7 +9285,14 @@ static int set_local_link_attributes(struct hfi1_pportdata *ppd)
if (ret != HCMD_SUCCESS)
goto set_local_link_attributes_fail;
- ret = write_vc_local_link_width(dd, 0, 0,
+ /*
+ * SerDes loopback init sequence requires
+ * setting bit 0 of MISC_CONFIG_BITS
+ */
+ if (loopback == LOOPBACK_SERDES)
+ misc_bits |= 1 << LOOPBACK_SERDES_CONFIG_BIT_MASK_SHIFT;
+
+ ret = write_vc_local_link_width(dd, misc_bits, 0,
opa_to_vc_link_widths(
ppd->link_width_enabled));
if (ret != HCMD_SUCCESS)
@@ -9809,9 +9786,9 @@ void hfi1_quiet_serdes(struct hfi1_pportdata *ppd)
cancel_delayed_work_sync(&ppd->start_link_work);
ppd->offline_disabled_reason =
- HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED);
- set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SMA_DISABLED, 0,
- OPA_LINKDOWN_REASON_SMA_DISABLED);
+ HFI1_ODR_MASK(OPA_LINKDOWN_REASON_REBOOT);
+ set_link_down_reason(ppd, OPA_LINKDOWN_REASON_REBOOT, 0,
+ OPA_LINKDOWN_REASON_REBOOT);
set_link_state(ppd, HLS_DN_OFFLINE);
/* disable the port */
@@ -9952,7 +9929,7 @@ int hfi1_get_ib_cfg(struct hfi1_pportdata *ppd, int which)
goto unimplemented;
case HFI1_IB_CFG_OP_VLS:
- val = ppd->vls_operational;
+ val = ppd->actual_vls_operational;
break;
case HFI1_IB_CFG_VL_HIGH_CAP: /* VL arb high priority table size */
val = VL_ARB_HIGH_PRIO_TABLE_SIZE;
@@ -9967,7 +9944,7 @@ int hfi1_get_ib_cfg(struct hfi1_pportdata *ppd, int which)
val = ppd->phy_error_threshold;
break;
case HFI1_IB_CFG_LINKDEFAULT: /* IB link default (sleep/poll) */
- val = dd->link_default;
+ val = HLS_DEFAULT;
break;
case HFI1_IB_CFG_HRTBT: /* Heartbeat off/enable/auto */
@@ -10170,6 +10147,10 @@ static const char * const state_complete_reasons[] = {
[0x33] =
"Link partner completed the VerifyCap state, but the passing lanes do not meet the local link width policy",
[0x34] = tx_out_of_policy,
+ [0x35] = "Negotiated link width is mutually exclusive",
+ [0x36] =
+ "Timed out before receiving verifycap frames in VerifyCap.Exchange",
+ [0x37] = "Unable to resolve secure data exchange",
};
static const char *state_complete_reason_code_string(struct hfi1_pportdata *ppd,
@@ -10298,9 +10279,6 @@ static void force_logical_link_state_down(struct hfi1_pportdata *ppd)
write_csr(dd, DC_LCB_CFG_ALLOW_LINK_UP, 0);
write_csr(dd, DC_LCB_CFG_IGNORE_LOST_RCLK, 0);
- /* adjust ppd->statusp, if needed */
- update_statusp(ppd, IB_PORT_DOWN);
-
dd_dev_info(ppd->dd, "logical state forced to LINK_DOWN\n");
}
@@ -10382,6 +10360,7 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
force_logical_link_state_down(ppd);
ppd->host_link_state = HLS_LINK_COOLDOWN; /* LCB access allowed */
+ update_statusp(ppd, IB_PORT_DOWN);
/*
* The LNI has a mandatory wait time after the physical state
@@ -10569,7 +10548,7 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
orig_new_state = state;
if (state == HLS_DN_DOWNDEF)
- state = dd->link_default;
+ state = HLS_DEFAULT;
/* interpret poll -> poll as a link bounce */
poll_bounce = ppd->host_link_state == HLS_DN_POLL &&
@@ -10643,6 +10622,7 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
handle_linkup_change(dd, 1);
ppd->host_link_state = HLS_UP_INIT;
+ update_statusp(ppd, IB_PORT_INIT);
break;
case HLS_UP_ARMED:
if (ppd->host_link_state != HLS_UP_INIT)
@@ -10664,6 +10644,7 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
break;
}
ppd->host_link_state = HLS_UP_ARMED;
+ update_statusp(ppd, IB_PORT_ARMED);
/*
* The simulator does not currently implement SMA messages,
* so neighbor_normal is not set. Set it here when we first
@@ -10686,6 +10667,7 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
/* tell all engines to go running */
sdma_all_running(dd);
ppd->host_link_state = HLS_UP_ACTIVE;
+ update_statusp(ppd, IB_PORT_ACTIVE);
/* Signal the IB layer that the port has went active */
event.device = &dd->verbs_dev.rdi.ibdev;
@@ -12089,9 +12071,8 @@ static void free_cntrs(struct hfi1_devdata *dd)
struct hfi1_pportdata *ppd;
int i;
- if (dd->synth_stats_timer.data)
+ if (dd->synth_stats_timer.function)
del_timer_sync(&dd->synth_stats_timer);
- dd->synth_stats_timer.data = 0;
ppd = (struct hfi1_pportdata *)(dd + 1);
for (i = 0; i < dd->num_pports; i++, ppd++) {
kfree(ppd->cntrs);
@@ -12367,9 +12348,9 @@ static void do_update_synth_timer(struct work_struct *work)
}
}
-static void update_synth_timer(unsigned long opaque)
+static void update_synth_timer(struct timer_list *t)
{
- struct hfi1_devdata *dd = (struct hfi1_devdata *)opaque;
+ struct hfi1_devdata *dd = from_timer(dd, t, synth_stats_timer);
queue_work(dd->update_cntr_wq, &dd->update_cntr_work);
mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME);
@@ -12387,8 +12368,7 @@ static int init_cntrs(struct hfi1_devdata *dd)
const int bit_type_32_sz = strlen(bit_type_32);
/* set up the stats timer; the add_timer is done at the end */
- setup_timer(&dd->synth_stats_timer, update_synth_timer,
- (unsigned long)dd);
+ timer_setup(&dd->synth_stats_timer, update_synth_timer, 0);
/***********************/
/* per device counters */
@@ -12701,6 +12681,17 @@ const char *opa_pstate_name(u32 pstate)
return "unknown";
}
+/**
+ * update_statusp - Update userspace status flag
+ * @ppd: Port data structure
+ * @state: port state information
+ *
+ * Actual port status is determined by the host_link_state value
+ * in the ppd.
+ *
+ * host_link_state MUST be updated before updating the user space
+ * statusp.
+ */
static void update_statusp(struct hfi1_pportdata *ppd, u32 state)
{
/*
@@ -12726,9 +12717,11 @@ static void update_statusp(struct hfi1_pportdata *ppd, u32 state)
break;
}
}
+ dd_dev_info(ppd->dd, "logical state changed to %s (0x%x)\n",
+ opa_lstate_name(state), state);
}
-/*
+/**
* wait_logical_linkstate - wait for an IB link state change to occur
* @ppd: port device
* @state: the state to wait for
@@ -12759,11 +12752,6 @@ static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state,
msleep(20);
}
- update_statusp(ppd, state);
- dd_dev_info(ppd->dd,
- "logical state changed to %s (0x%x)\n",
- opa_lstate_name(state),
- state);
return 0;
}
@@ -12910,6 +12898,32 @@ int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp)
return ret;
}
+/**
+ * get_int_mask - get 64 bit int mask
+ * @dd - the devdata
+ * @i - the csr (relative to CCE_INT_MASK)
+ *
+ * Returns the mask with the urgent interrupt mask
+ * bit clear for kernel receive contexts.
+ */
+static u64 get_int_mask(struct hfi1_devdata *dd, u32 i)
+{
+ u64 mask = U64_MAX; /* default to no change */
+
+ if (i >= (IS_RCVURGENT_START / 64) && i < (IS_RCVURGENT_END / 64)) {
+ int j = (i - (IS_RCVURGENT_START / 64)) * 64;
+ int k = !j ? IS_RCVURGENT_START % 64 : 0;
+
+ if (j)
+ j -= IS_RCVURGENT_START % 64;
+ /* j = 0..dd->first_dyn_alloc_ctxt - 1,k = 0..63 */
+ for (; j < dd->first_dyn_alloc_ctxt && k < 64; j++, k++)
+ /* convert to bit in mask and clear */
+ mask &= ~BIT_ULL(k);
+ }
+ return mask;
+}
+
/* ========================================================================= */
/*
@@ -12923,9 +12937,12 @@ void set_intr_state(struct hfi1_devdata *dd, u32 enable)
* In HFI, the mask needs to be 1 to allow interrupts.
*/
if (enable) {
- /* enable all interrupts */
- for (i = 0; i < CCE_NUM_INT_CSRS; i++)
- write_csr(dd, CCE_INT_MASK + (8 * i), ~(u64)0);
+ /* enable all interrupts but urgent on kernel contexts */
+ for (i = 0; i < CCE_NUM_INT_CSRS; i++) {
+ u64 mask = get_int_mask(dd, i);
+
+ write_csr(dd, CCE_INT_MASK + (8 * i), mask);
+ }
init_qsfp_int(dd);
} else {
@@ -12980,7 +12997,7 @@ static void clean_up_interrupts(struct hfi1_devdata *dd)
if (!me->arg) /* => no irq, no affinity */
continue;
hfi1_put_irq_affinity(dd, me);
- free_irq(me->irq, me->arg);
+ pci_free_irq(dd->pcidev, i, me->arg);
}
/* clean structures */
@@ -12990,7 +13007,7 @@ static void clean_up_interrupts(struct hfi1_devdata *dd)
} else {
/* INTx */
if (dd->requested_intx_irq) {
- free_irq(dd->pcidev->irq, dd);
+ pci_free_irq(dd->pcidev, 0, dd);
dd->requested_intx_irq = 0;
}
disable_intx(dd->pcidev);
@@ -13049,10 +13066,8 @@ static int request_intx_irq(struct hfi1_devdata *dd)
{
int ret;
- snprintf(dd->intx_name, sizeof(dd->intx_name), DRIVER_NAME "_%d",
- dd->unit);
- ret = request_irq(dd->pcidev->irq, general_interrupt,
- IRQF_SHARED, dd->intx_name, dd);
+ ret = pci_request_irq(dd->pcidev, 0, general_interrupt, NULL, dd,
+ DRIVER_NAME "_%d", dd->unit);
if (ret)
dd_dev_err(dd, "unable to request INTx interrupt, err %d\n",
ret);
@@ -13074,7 +13089,7 @@ static int request_msix_irqs(struct hfi1_devdata *dd)
first_sdma = last_general;
last_sdma = first_sdma + dd->num_sdma;
first_rx = last_sdma;
- last_rx = first_rx + dd->n_krcv_queues + HFI1_NUM_VNIC_CTXT;
+ last_rx = first_rx + dd->n_krcv_queues + dd->num_vnic_contexts;
/* VNIC MSIx interrupts get mapped when VNIC contexts are created */
dd->first_dyn_msix_idx = first_rx + dd->n_krcv_queues;
@@ -13095,13 +13110,14 @@ static int request_msix_irqs(struct hfi1_devdata *dd)
int idx;
struct hfi1_ctxtdata *rcd = NULL;
struct sdma_engine *sde = NULL;
+ char name[MAX_NAME_SIZE];
- /* obtain the arguments to request_irq */
+ /* obtain the arguments to pci_request_irq */
if (first_general <= i && i < last_general) {
idx = i - first_general;
handler = general_interrupt;
arg = dd;
- snprintf(me->name, sizeof(me->name),
+ snprintf(name, sizeof(name),
DRIVER_NAME "_%d", dd->unit);
err_info = "general";
me->type = IRQ_GENERAL;
@@ -13110,14 +13126,14 @@ static int request_msix_irqs(struct hfi1_devdata *dd)
sde = &dd->per_sdma[idx];
handler = sdma_interrupt;
arg = sde;
- snprintf(me->name, sizeof(me->name),
+ snprintf(name, sizeof(name),
DRIVER_NAME "_%d sdma%d", dd->unit, idx);
err_info = "sdma";
remap_sdma_interrupts(dd, idx, i);
me->type = IRQ_SDMA;
} else if (first_rx <= i && i < last_rx) {
idx = i - first_rx;
- rcd = hfi1_rcd_get_by_index(dd, idx);
+ rcd = hfi1_rcd_get_by_index_safe(dd, idx);
if (rcd) {
/*
* Set the interrupt register and mask for this
@@ -13129,7 +13145,7 @@ static int request_msix_irqs(struct hfi1_devdata *dd)
handler = receive_context_interrupt;
thread = receive_context_thread;
arg = rcd;
- snprintf(me->name, sizeof(me->name),
+ snprintf(name, sizeof(name),
DRIVER_NAME "_%d kctxt%d",
dd->unit, idx);
err_info = "receive context";
@@ -13150,18 +13166,10 @@ static int request_msix_irqs(struct hfi1_devdata *dd)
if (!arg)
continue;
/* make sure the name is terminated */
- me->name[sizeof(me->name) - 1] = 0;
+ name[sizeof(name) - 1] = 0;
me->irq = pci_irq_vector(dd->pcidev, i);
- /*
- * On err return me->irq. Don't need to clear this
- * because 'arg' has not been set, and cleanup will
- * do the right thing.
- */
- if (me->irq < 0)
- return me->irq;
-
- ret = request_threaded_irq(me->irq, handler, thread, 0,
- me->name, arg);
+ ret = pci_request_irq(dd->pcidev, i, handler, thread, arg,
+ name);
if (ret) {
dd_dev_err(dd,
"unable to allocate %s interrupt, irq %d, index %d, err %d\n",
@@ -13169,7 +13177,7 @@ static int request_msix_irqs(struct hfi1_devdata *dd)
return ret;
}
/*
- * assign arg after request_irq call, so it will be
+ * assign arg after pci_request_irq call, so it will be
* cleaned up
*/
me->arg = arg;
@@ -13187,7 +13195,7 @@ void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd)
int i;
if (!dd->num_msix_entries) {
- synchronize_irq(dd->pcidev->irq);
+ synchronize_irq(pci_irq_vector(dd->pcidev, 0));
return;
}
@@ -13208,7 +13216,7 @@ void hfi1_reset_vnic_msix_info(struct hfi1_ctxtdata *rcd)
return;
hfi1_put_irq_affinity(dd, me);
- free_irq(me->irq, me->arg);
+ pci_free_irq(dd->pcidev, rcd->msix_intr, me->arg);
me->arg = NULL;
}
@@ -13231,28 +13239,21 @@ void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd)
rcd->ireg = (IS_RCVAVAIL_START + idx) / 64;
rcd->imask = ((u64)1) <<
((IS_RCVAVAIL_START + idx) % 64);
-
- snprintf(me->name, sizeof(me->name),
- DRIVER_NAME "_%d kctxt%d", dd->unit, idx);
- me->name[sizeof(me->name) - 1] = 0;
me->type = IRQ_RCVCTXT;
me->irq = pci_irq_vector(dd->pcidev, rcd->msix_intr);
- if (me->irq < 0) {
- dd_dev_err(dd, "vnic irq vector request (idx %d) fail %d\n",
- idx, me->irq);
- return;
- }
remap_intr(dd, IS_RCVAVAIL_START + idx, rcd->msix_intr);
- ret = request_threaded_irq(me->irq, receive_context_interrupt,
- receive_context_thread, 0, me->name, arg);
+ ret = pci_request_irq(dd->pcidev, rcd->msix_intr,
+ receive_context_interrupt,
+ receive_context_thread, arg,
+ DRIVER_NAME "_%d kctxt%d", dd->unit, idx);
if (ret) {
dd_dev_err(dd, "vnic irq request (irq %d, idx %d) fail %d\n",
me->irq, idx, ret);
return;
}
/*
- * assign arg after request_irq call, so it will be
+ * assign arg after pci_request_irq call, so it will be
* cleaned up
*/
me->arg = arg;
@@ -13261,7 +13262,7 @@ void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd)
if (ret) {
dd_dev_err(dd,
"unable to pin IRQ %d\n", ret);
- free_irq(me->irq, me->arg);
+ pci_free_irq(dd->pcidev, rcd->msix_intr, me->arg);
}
}
@@ -13294,8 +13295,9 @@ static int set_up_interrupts(struct hfi1_devdata *dd)
* slow source, SDMACleanupDone)
* N interrupts - one per used SDMA engine
* M interrupt - one per kernel receive context
+ * V interrupt - one for each VNIC context
*/
- total = 1 + dd->num_sdma + dd->n_krcv_queues + HFI1_NUM_VNIC_CTXT;
+ total = 1 + dd->num_sdma + dd->n_krcv_queues + dd->num_vnic_contexts;
/* ask for MSI-X interrupts */
request = request_msix(dd, total);
@@ -13356,15 +13358,18 @@ fail:
* in array of contexts
* freectxts - number of free user contexts
* num_send_contexts - number of PIO send contexts being used
+ * num_vnic_contexts - number of contexts reserved for VNIC
*/
static int set_up_context_variables(struct hfi1_devdata *dd)
{
unsigned long num_kernel_contexts;
+ u16 num_vnic_contexts = HFI1_NUM_VNIC_CTXT;
int total_contexts;
int ret;
unsigned ngroups;
int qos_rmt_count;
int user_rmt_reduced;
+ u32 n_usr_ctxts;
/*
* Kernel receive contexts:
@@ -13393,59 +13398,63 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
num_kernel_contexts);
num_kernel_contexts = dd->chip_send_contexts - num_vls - 1;
}
+
+ /* Accommodate VNIC contexts if possible */
+ if ((num_kernel_contexts + num_vnic_contexts) > dd->chip_rcv_contexts) {
+ dd_dev_err(dd, "No receive contexts available for VNIC\n");
+ num_vnic_contexts = 0;
+ }
+ total_contexts = num_kernel_contexts + num_vnic_contexts;
+
/*
* User contexts:
* - default to 1 user context per real (non-HT) CPU core if
* num_user_contexts is negative
*/
if (num_user_contexts < 0)
- num_user_contexts =
- cpumask_weight(&node_affinity.real_cpu_mask);
-
- total_contexts = num_kernel_contexts + num_user_contexts;
-
+ n_usr_ctxts = cpumask_weight(&node_affinity.real_cpu_mask);
+ else
+ n_usr_ctxts = num_user_contexts;
/*
* Adjust the counts given a global max.
*/
- if (total_contexts > dd->chip_rcv_contexts) {
+ if (total_contexts + n_usr_ctxts > dd->chip_rcv_contexts) {
dd_dev_err(dd,
- "Reducing # user receive contexts to: %d, from %d\n",
- (int)(dd->chip_rcv_contexts - num_kernel_contexts),
- (int)num_user_contexts);
- num_user_contexts = dd->chip_rcv_contexts - num_kernel_contexts;
+ "Reducing # user receive contexts to: %d, from %u\n",
+ (int)(dd->chip_rcv_contexts - total_contexts),
+ n_usr_ctxts);
/* recalculate */
- total_contexts = num_kernel_contexts + num_user_contexts;
+ n_usr_ctxts = dd->chip_rcv_contexts - total_contexts;
}
/* each user context requires an entry in the RMT */
qos_rmt_count = qos_rmt_entries(dd, NULL, NULL);
- if (qos_rmt_count + num_user_contexts > NUM_MAP_ENTRIES) {
+ if (qos_rmt_count + n_usr_ctxts > NUM_MAP_ENTRIES) {
user_rmt_reduced = NUM_MAP_ENTRIES - qos_rmt_count;
dd_dev_err(dd,
- "RMT size is reducing the number of user receive contexts from %d to %d\n",
- (int)num_user_contexts,
+ "RMT size is reducing the number of user receive contexts from %u to %d\n",
+ n_usr_ctxts,
user_rmt_reduced);
/* recalculate */
- num_user_contexts = user_rmt_reduced;
- total_contexts = num_kernel_contexts + num_user_contexts;
+ n_usr_ctxts = user_rmt_reduced;
}
- /* Accommodate VNIC contexts */
- if ((total_contexts + HFI1_NUM_VNIC_CTXT) <= dd->chip_rcv_contexts)
- total_contexts += HFI1_NUM_VNIC_CTXT;
+ total_contexts += n_usr_ctxts;
/* the first N are kernel contexts, the rest are user/vnic contexts */
dd->num_rcv_contexts = total_contexts;
dd->n_krcv_queues = num_kernel_contexts;
dd->first_dyn_alloc_ctxt = num_kernel_contexts;
- dd->num_user_contexts = num_user_contexts;
- dd->freectxts = num_user_contexts;
+ dd->num_vnic_contexts = num_vnic_contexts;
+ dd->num_user_contexts = n_usr_ctxts;
+ dd->freectxts = n_usr_ctxts;
dd_dev_info(dd,
- "rcv contexts: chip %d, used %d (kernel %d, user %d)\n",
+ "rcv contexts: chip %d, used %d (kernel %d, vnic %u, user %u)\n",
(int)dd->chip_rcv_contexts,
(int)dd->num_rcv_contexts,
(int)dd->n_krcv_queues,
- (int)dd->num_rcv_contexts - dd->n_krcv_queues);
+ dd->num_vnic_contexts,
+ dd->num_user_contexts);
/*
* Receive array allocation:
@@ -14962,8 +14971,6 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
init_vl_arb_caches(ppd);
}
- dd->link_default = HLS_DN_POLL;
-
/*
* Do remaining PCIe setup and save PCIe values in dd.
* Any error printing is already done by the init code.