summaryrefslogtreecommitdiff
path: root/drivers/infiniband/hw/hfi1/chip.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw/hfi1/chip.c')
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c808
1 files changed, 466 insertions, 342 deletions
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 94b54850ec75..b2ed4b9cda6e 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -1012,14 +1012,15 @@ static struct flag_table dc8051_info_err_flags[] = {
*/
static struct flag_table dc8051_info_host_msg_flags[] = {
FLAG_ENTRY0("Host request done", 0x0001),
- FLAG_ENTRY0("BC SMA message", 0x0002),
- FLAG_ENTRY0("BC PWR_MGM message", 0x0004),
+ FLAG_ENTRY0("BC PWR_MGM message", 0x0002),
+ FLAG_ENTRY0("BC SMA message", 0x0004),
FLAG_ENTRY0("BC Unknown message (BCC)", 0x0008),
FLAG_ENTRY0("BC Unknown message (LCB)", 0x0010),
FLAG_ENTRY0("External device config request", 0x0020),
FLAG_ENTRY0("VerifyCap all frames received", 0x0040),
FLAG_ENTRY0("LinkUp achieved", 0x0080),
FLAG_ENTRY0("Link going down", 0x0100),
+ FLAG_ENTRY0("Link width downgraded", 0x0200),
};
static u32 encoded_size(u32 size);
@@ -1064,8 +1065,13 @@ static int do_8051_command(struct hfi1_devdata *dd, u32 type, u64 in_data,
static int read_idle_sma(struct hfi1_devdata *dd, u64 *data);
static int thermal_init(struct hfi1_devdata *dd);
+static void update_statusp(struct hfi1_pportdata *ppd, u32 state);
static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state,
int msecs);
+static void log_state_transition(struct hfi1_pportdata *ppd, u32 state);
+static void log_physical_state(struct hfi1_pportdata *ppd, u32 state);
+static int wait_physical_linkstate(struct hfi1_pportdata *ppd, u32 state,
+ int msecs);
static void read_planned_down_reason_code(struct hfi1_devdata *dd, u8 *pdrrc);
static void read_link_down_reason(struct hfi1_devdata *dd, u8 *ldr);
static void handle_temp_err(struct hfi1_devdata *dd);
@@ -1294,25 +1300,71 @@ CNTR_ELEM(#name, \
CNTR_SYNTH, \
access_ibp_##cntr)
+/**
+ * hfi_addr_from_offset - return addr for readq/writeq
+ * @dd - the dd device
+ * @offset - the offset of the CSR within bar0
+ *
+ * This routine selects the appropriate base address
+ * based on the indicated offset.
+ */
+static inline void __iomem *hfi1_addr_from_offset(
+ const struct hfi1_devdata *dd,
+ u32 offset)
+{
+ if (offset >= dd->base2_start)
+ return dd->kregbase2 + (offset - dd->base2_start);
+ return dd->kregbase1 + offset;
+}
+
+/**
+ * read_csr - read CSR at the indicated offset
+ * @dd - the dd device
+ * @offset - the offset of the CSR within bar0
+ *
+ * Return: the value read or all FF's if there
+ * is no mapping
+ */
u64 read_csr(const struct hfi1_devdata *dd, u32 offset)
{
- if (dd->flags & HFI1_PRESENT) {
- return readq((void __iomem *)dd->kregbase + offset);
- }
+ if (dd->flags & HFI1_PRESENT)
+ return readq(hfi1_addr_from_offset(dd, offset));
return -1;
}
+/**
+ * write_csr - write CSR at the indicated offset
+ * @dd - the dd device
+ * @offset - the offset of the CSR within bar0
+ * @value - value to write
+ */
void write_csr(const struct hfi1_devdata *dd, u32 offset, u64 value)
{
- if (dd->flags & HFI1_PRESENT)
- writeq(value, (void __iomem *)dd->kregbase + offset);
+ if (dd->flags & HFI1_PRESENT) {
+ void __iomem *base = hfi1_addr_from_offset(dd, offset);
+
+ /* avoid write to RcvArray */
+ if (WARN_ON(offset >= RCV_ARRAY && offset < dd->base2_start))
+ return;
+ writeq(value, base);
+ }
}
+/**
+ * get_csr_addr - return te iomem address for offset
+ * @dd - the dd device
+ * @offset - the offset of the CSR within bar0
+ *
+ * Return: The iomem address to use in subsequent
+ * writeq/readq operations.
+ */
void __iomem *get_csr_addr(
- struct hfi1_devdata *dd,
+ const struct hfi1_devdata *dd,
u32 offset)
{
- return (void __iomem *)dd->kregbase + offset;
+ if (dd->flags & HFI1_PRESENT)
+ return hfi1_addr_from_offset(dd, offset);
+ return NULL;
}
static inline u64 read_write_csr(const struct hfi1_devdata *dd, u32 csr,
@@ -5496,7 +5548,7 @@ static void update_rcverr_timer(unsigned long opaque)
set_link_down_reason(
ppd, OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN, 0,
OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN);
- queue_work(ppd->hfi1_wq, &ppd->link_bounce_work);
+ queue_work(ppd->link_wq, &ppd->link_bounce_work);
}
dd->rcv_ovfl_cnt = (u32)cur_ovfl_cnt;
@@ -6051,7 +6103,7 @@ static void handle_qsfp_int(struct hfi1_devdata *dd, u32 src_ctx, u64 reg)
* will not happen. We have to do it here
* before turning the DC off.
*/
- queue_work(ppd->hfi1_wq, &ppd->link_down_work);
+ queue_work(ppd->link_wq, &ppd->link_down_work);
}
} else {
dd_dev_info(dd, "%s: QSFP module inserted\n",
@@ -6086,7 +6138,7 @@ static void handle_qsfp_int(struct hfi1_devdata *dd, u32 src_ctx, u64 reg)
/* Schedule the QSFP work only if there is a cable attached. */
if (qsfp_mod_present(ppd))
- queue_work(ppd->hfi1_wq, &ppd->qsfp_info.qsfp_work);
+ queue_work(ppd->link_wq, &ppd->qsfp_info.qsfp_work);
}
static int request_host_lcb_access(struct hfi1_devdata *dd)
@@ -6735,13 +6787,17 @@ static void wait_for_freeze_status(struct hfi1_devdata *dd, int freeze)
static void rxe_freeze(struct hfi1_devdata *dd)
{
int i;
+ struct hfi1_ctxtdata *rcd;
/* disable port */
clear_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
/* disable all receive contexts */
- for (i = 0; i < dd->num_rcv_contexts; i++)
- hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS, i);
+ for (i = 0; i < dd->num_rcv_contexts; i++) {
+ rcd = hfi1_rcd_get_by_index(dd, i);
+ hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS, rcd);
+ hfi1_rcd_put(rcd);
+ }
}
/*
@@ -6753,21 +6809,24 @@ static void rxe_freeze(struct hfi1_devdata *dd)
static void rxe_kernel_unfreeze(struct hfi1_devdata *dd)
{
u32 rcvmask;
- int i;
+ u16 i;
+ struct hfi1_ctxtdata *rcd;
/* enable all kernel contexts */
for (i = 0; i < dd->num_rcv_contexts; i++) {
- struct hfi1_ctxtdata *rcd = dd->rcd[i];
+ rcd = hfi1_rcd_get_by_index(dd, i);
/* Ensure all non-user contexts(including vnic) are enabled */
- if (!rcd || !rcd->sc || (rcd->sc->type == SC_USER))
+ if (!rcd || !rcd->sc || (rcd->sc->type == SC_USER)) {
+ hfi1_rcd_put(rcd);
continue;
-
+ }
rcvmask = HFI1_RCVCTRL_CTXT_ENB;
/* HFI1_RCVCTRL_TAILUPD_[ENB|DIS] needs to be set explicitly */
- rcvmask |= HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, DMA_RTAIL) ?
+ rcvmask |= HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL) ?
HFI1_RCVCTRL_TAILUPD_ENB : HFI1_RCVCTRL_TAILUPD_DIS;
- hfi1_rcvctrl(dd, rcvmask, i);
+ hfi1_rcvctrl(dd, rcvmask, rcd);
+ hfi1_rcd_put(rcd);
}
/* enable port */
@@ -6906,7 +6965,7 @@ static void reset_neighbor_info(struct hfi1_pportdata *ppd)
static const char * const link_down_reason_strs[] = {
[OPA_LINKDOWN_REASON_NONE] = "None",
- [OPA_LINKDOWN_REASON_RCV_ERROR_0] = "Recive error 0",
+ [OPA_LINKDOWN_REASON_RCV_ERROR_0] = "Receive error 0",
[OPA_LINKDOWN_REASON_BAD_PKT_LEN] = "Bad packet length",
[OPA_LINKDOWN_REASON_PKT_TOO_LONG] = "Packet too long",
[OPA_LINKDOWN_REASON_PKT_TOO_SHORT] = "Packet too short",
@@ -6996,6 +7055,7 @@ void handle_link_down(struct work_struct *work)
/* Go offline first, then deal with reading/writing through 8051 */
was_up = !!(ppd->host_link_state & HLS_UP);
set_link_state(ppd, HLS_DN_OFFLINE);
+ xchg(&ppd->is_link_down_queued, 0);
if (was_up) {
lcl_reason = 0;
@@ -7330,7 +7390,7 @@ void handle_verify_cap(struct work_struct *work)
struct hfi1_devdata *dd = ppd->dd;
u64 reg;
u8 power_management;
- u8 continious;
+ u8 continuous;
u8 vcu;
u8 vau;
u8 z;
@@ -7349,7 +7409,7 @@ void handle_verify_cap(struct work_struct *work)
lcb_shutdown(dd, 0);
adjust_lcb_for_fpga_serdes(dd);
- read_vc_remote_phy(dd, &power_management, &continious);
+ read_vc_remote_phy(dd, &power_management, &continuous);
read_vc_remote_fabric(dd, &vau, &z, &vcu, &vl15buf,
&partner_supported_crc);
read_vc_remote_link_width(dd, &remote_tx_rate, &link_widths);
@@ -7363,7 +7423,7 @@ void handle_verify_cap(struct work_struct *work)
get_link_widths(dd, &active_tx, &active_rx);
dd_dev_info(dd,
"Peer PHY: power management 0x%x, continuous updates 0x%x\n",
- (int)power_management, (int)continious);
+ (int)power_management, (int)continuous);
dd_dev_info(dd,
"Peer Fabric: vAU %d, Z %d, vCU %d, vl15 credits 0x%x, CRC sizes 0x%x\n",
(int)vau, (int)z, (int)vcu, (int)vl15buf,
@@ -7689,12 +7749,12 @@ static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg)
host_msg &= ~(u64)HOST_REQ_DONE;
}
if (host_msg & BC_SMA_MSG) {
- queue_work(ppd->hfi1_wq, &ppd->sma_message_work);
+ queue_work(ppd->link_wq, &ppd->sma_message_work);
host_msg &= ~(u64)BC_SMA_MSG;
}
if (host_msg & LINKUP_ACHIEVED) {
dd_dev_info(dd, "8051: Link up\n");
- queue_work(ppd->hfi1_wq, &ppd->link_up_work);
+ queue_work(ppd->link_wq, &ppd->link_up_work);
host_msg &= ~(u64)LINKUP_ACHIEVED;
}
if (host_msg & EXT_DEVICE_CFG_REQ) {
@@ -7702,7 +7762,7 @@ static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg)
host_msg &= ~(u64)EXT_DEVICE_CFG_REQ;
}
if (host_msg & VERIFY_CAP_FRAME) {
- queue_work(ppd->hfi1_wq, &ppd->link_vc_work);
+ queue_work(ppd->link_wq, &ppd->link_vc_work);
host_msg &= ~(u64)VERIFY_CAP_FRAME;
}
if (host_msg & LINK_GOING_DOWN) {
@@ -7717,7 +7777,7 @@ static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg)
host_msg &= ~(u64)LINK_GOING_DOWN;
}
if (host_msg & LINK_WIDTH_DOWNGRADED) {
- queue_work(ppd->hfi1_wq, &ppd->link_downgrade_work);
+ queue_work(ppd->link_wq, &ppd->link_downgrade_work);
host_msg &= ~(u64)LINK_WIDTH_DOWNGRADED;
}
if (host_msg) {
@@ -7752,15 +7812,22 @@ static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg)
if (queue_link_down) {
/*
* if the link is already going down or disabled, do not
- * queue another
+ * queue another. If there's a link down entry already
+ * queued, don't queue another one.
*/
if ((ppd->host_link_state &
(HLS_GOING_OFFLINE | HLS_LINK_COOLDOWN)) ||
ppd->link_enabled == 0) {
- dd_dev_info(dd, "%s: not queuing link down\n",
- __func__);
+ dd_dev_info(dd, "%s: not queuing link down. host_link_state %x, link_enabled %x\n",
+ __func__, ppd->host_link_state,
+ ppd->link_enabled);
} else {
- queue_work(ppd->hfi1_wq, &ppd->link_down_work);
+ if (xchg(&ppd->is_link_down_queued, 1) == 1)
+ dd_dev_info(dd,
+ "%s: link down request already queued\n",
+ __func__);
+ else
+ queue_work(ppd->link_wq, &ppd->link_down_work);
}
}
}
@@ -7968,7 +8035,7 @@ static void handle_dcc_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
dd_dev_info_ratelimited(dd, "%s: PortErrorAction bounce\n",
__func__);
set_link_down_reason(ppd, lcl_reason, 0, lcl_reason);
- queue_work(ppd->hfi1_wq, &ppd->link_bounce_work);
+ queue_work(ppd->link_wq, &ppd->link_bounce_work);
}
}
@@ -8052,7 +8119,7 @@ static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source)
char *err_detail;
if (likely(source < dd->num_rcv_contexts)) {
- rcd = dd->rcd[source];
+ rcd = hfi1_rcd_get_by_index(dd, source);
if (rcd) {
/* Check for non-user contexts, including vnic */
if ((source < dd->first_dyn_alloc_ctxt) ||
@@ -8060,6 +8127,8 @@ static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source)
rcd->do_interrupt(rcd, 0);
else
handle_user_interrupt(rcd);
+
+ hfi1_rcd_put(rcd);
return; /* OK */
}
/* received an interrupt, but no rcd */
@@ -8081,12 +8150,14 @@ static void is_rcv_urgent_int(struct hfi1_devdata *dd, unsigned int source)
char *err_detail;
if (likely(source < dd->num_rcv_contexts)) {
- rcd = dd->rcd[source];
+ rcd = hfi1_rcd_get_by_index(dd, source);
if (rcd) {
/* only pay attention to user urgent interrupts */
if ((source >= dd->first_dyn_alloc_ctxt) &&
(!rcd->sc || (rcd->sc->type == SC_USER)))
handle_user_interrupt(rcd);
+
+ hfi1_rcd_put(rcd);
return; /* OK */
}
/* received an interrupt, but no rcd */
@@ -8219,8 +8290,8 @@ static irqreturn_t sdma_interrupt(int irq, void *data)
/* handle the interrupt(s) */
sdma_engine_interrupt(sde, status);
} else {
- dd_dev_err(dd, "SDMA engine %u interrupt, but no status bits set\n",
- sde->this_idx);
+ dd_dev_err_ratelimited(dd, "SDMA engine %u interrupt, but no status bits set\n",
+ sde->this_idx);
}
return IRQ_HANDLED;
}
@@ -8291,7 +8362,7 @@ static irqreturn_t receive_context_interrupt(int irq, void *data)
int disposition;
int present;
- trace_hfi1_receive_interrupt(dd, rcd->ctxt);
+ trace_hfi1_receive_interrupt(dd, rcd);
this_cpu_inc(*dd->int_counter);
aspm_ctx_disable(rcd);
@@ -8781,6 +8852,20 @@ static void read_remote_device_id(struct hfi1_devdata *dd, u16 *device_id,
& REMOTE_DEVICE_REV_MASK;
}
+int write_host_interface_version(struct hfi1_devdata *dd, u8 version)
+{
+ u32 frame;
+ u32 mask;
+
+ mask = (HOST_INTERFACE_VERSION_MASK << HOST_INTERFACE_VERSION_SHIFT);
+ read_8051_config(dd, RESERVED_REGISTERS, GENERAL_CONFIG, &frame);
+ /* Clear, then set field */
+ frame &= ~mask;
+ frame |= ((u32)version << HOST_INTERFACE_VERSION_SHIFT);
+ return load_8051_config(dd, RESERVED_REGISTERS, GENERAL_CONFIG,
+ frame);
+}
+
void read_misc_status(struct hfi1_devdata *dd, u8 *ver_major, u8 *ver_minor,
u8 *ver_patch)
{
@@ -9257,12 +9342,6 @@ int start_link(struct hfi1_pportdata *ppd)
*/
tune_serdes(ppd);
- if (!ppd->link_enabled) {
- dd_dev_info(ppd->dd,
- "%s: stopping link start because link is disabled\n",
- __func__);
- return 0;
- }
if (!ppd->driver_link_ready) {
dd_dev_info(ppd->dd,
"%s: stopping link start because driver is not ready\n",
@@ -9373,13 +9452,13 @@ static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd,
if ((qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_ALARM) ||
(qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_WARNING))
- dd_dev_info(dd, "%s: QSFP cable temperature too high\n",
- __func__);
+ dd_dev_err(dd, "%s: QSFP cable temperature too high\n",
+ __func__);
if ((qsfp_interrupt_status[0] & QSFP_LOW_TEMP_ALARM) ||
(qsfp_interrupt_status[0] & QSFP_LOW_TEMP_WARNING))
- dd_dev_info(dd, "%s: QSFP cable temperature too low\n",
- __func__);
+ dd_dev_err(dd, "%s: QSFP cable temperature too low\n",
+ __func__);
/*
* The remaining alarms/warnings don't matter if the link is down.
@@ -9389,75 +9468,75 @@ static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd,
if ((qsfp_interrupt_status[1] & QSFP_HIGH_VCC_ALARM) ||
(qsfp_interrupt_status[1] & QSFP_HIGH_VCC_WARNING))
- dd_dev_info(dd, "%s: QSFP supply voltage too high\n",
- __func__);
+ dd_dev_err(dd, "%s: QSFP supply voltage too high\n",
+ __func__);
if ((qsfp_interrupt_status[1] & QSFP_LOW_VCC_ALARM) ||
(qsfp_interrupt_status[1] & QSFP_LOW_VCC_WARNING))
- dd_dev_info(dd, "%s: QSFP supply voltage too low\n",
- __func__);
+ dd_dev_err(dd, "%s: QSFP supply voltage too low\n",
+ __func__);
/* Byte 2 is vendor specific */
if ((qsfp_interrupt_status[3] & QSFP_HIGH_POWER_ALARM) ||
(qsfp_interrupt_status[3] & QSFP_HIGH_POWER_WARNING))
- dd_dev_info(dd, "%s: Cable RX channel 1/2 power too high\n",
- __func__);
+ dd_dev_err(dd, "%s: Cable RX channel 1/2 power too high\n",
+ __func__);
if ((qsfp_interrupt_status[3] & QSFP_LOW_POWER_ALARM) ||
(qsfp_interrupt_status[3] & QSFP_LOW_POWER_WARNING))
- dd_dev_info(dd, "%s: Cable RX channel 1/2 power too low\n",
- __func__);
+ dd_dev_err(dd, "%s: Cable RX channel 1/2 power too low\n",
+ __func__);
if ((qsfp_interrupt_status[4] & QSFP_HIGH_POWER_ALARM) ||
(qsfp_interrupt_status[4] & QSFP_HIGH_POWER_WARNING))
- dd_dev_info(dd, "%s: Cable RX channel 3/4 power too high\n",
- __func__);
+ dd_dev_err(dd, "%s: Cable RX channel 3/4 power too high\n",
+ __func__);
if ((qsfp_interrupt_status[4] & QSFP_LOW_POWER_ALARM) ||
(qsfp_interrupt_status[4] & QSFP_LOW_POWER_WARNING))
- dd_dev_info(dd, "%s: Cable RX channel 3/4 power too low\n",
- __func__);
+ dd_dev_err(dd, "%s: Cable RX channel 3/4 power too low\n",
+ __func__);
if ((qsfp_interrupt_status[5] & QSFP_HIGH_BIAS_ALARM) ||
(qsfp_interrupt_status[5] & QSFP_HIGH_BIAS_WARNING))
- dd_dev_info(dd, "%s: Cable TX channel 1/2 bias too high\n",
- __func__);
+ dd_dev_err(dd, "%s: Cable TX channel 1/2 bias too high\n",
+ __func__);
if ((qsfp_interrupt_status[5] & QSFP_LOW_BIAS_ALARM) ||
(qsfp_interrupt_status[5] & QSFP_LOW_BIAS_WARNING))
- dd_dev_info(dd, "%s: Cable TX channel 1/2 bias too low\n",
- __func__);
+ dd_dev_err(dd, "%s: Cable TX channel 1/2 bias too low\n",
+ __func__);
if ((qsfp_interrupt_status[6] & QSFP_HIGH_BIAS_ALARM) ||
(qsfp_interrupt_status[6] & QSFP_HIGH_BIAS_WARNING))
- dd_dev_info(dd, "%s: Cable TX channel 3/4 bias too high\n",
- __func__);
+ dd_dev_err(dd, "%s: Cable TX channel 3/4 bias too high\n",
+ __func__);
if ((qsfp_interrupt_status[6] & QSFP_LOW_BIAS_ALARM) ||
(qsfp_interrupt_status[6] & QSFP_LOW_BIAS_WARNING))
- dd_dev_info(dd, "%s: Cable TX channel 3/4 bias too low\n",
- __func__);
+ dd_dev_err(dd, "%s: Cable TX channel 3/4 bias too low\n",
+ __func__);
if ((qsfp_interrupt_status[7] & QSFP_HIGH_POWER_ALARM) ||
(qsfp_interrupt_status[7] & QSFP_HIGH_POWER_WARNING))
- dd_dev_info(dd, "%s: Cable TX channel 1/2 power too high\n",
- __func__);
+ dd_dev_err(dd, "%s: Cable TX channel 1/2 power too high\n",
+ __func__);
if ((qsfp_interrupt_status[7] & QSFP_LOW_POWER_ALARM) ||
(qsfp_interrupt_status[7] & QSFP_LOW_POWER_WARNING))
- dd_dev_info(dd, "%s: Cable TX channel 1/2 power too low\n",
- __func__);
+ dd_dev_err(dd, "%s: Cable TX channel 1/2 power too low\n",
+ __func__);
if ((qsfp_interrupt_status[8] & QSFP_HIGH_POWER_ALARM) ||
(qsfp_interrupt_status[8] & QSFP_HIGH_POWER_WARNING))
- dd_dev_info(dd, "%s: Cable TX channel 3/4 power too high\n",
- __func__);
+ dd_dev_err(dd, "%s: Cable TX channel 3/4 power too high\n",
+ __func__);
if ((qsfp_interrupt_status[8] & QSFP_LOW_POWER_ALARM) ||
(qsfp_interrupt_status[8] & QSFP_LOW_POWER_WARNING))
- dd_dev_info(dd, "%s: Cable TX channel 3/4 power too low\n",
- __func__);
+ dd_dev_err(dd, "%s: Cable TX channel 3/4 power too low\n",
+ __func__);
/* Bytes 9-10 and 11-12 are reserved */
/* Bytes 13-15 are vendor specific */
@@ -9480,6 +9559,13 @@ void qsfp_event(struct work_struct *work)
if (!qsfp_mod_present(ppd))
return;
+ if (ppd->host_link_state == HLS_DN_DISABLE) {
+ dd_dev_info(ppd->dd,
+ "%s: stopping link start because link is disabled\n",
+ __func__);
+ return;
+ }
+
/*
* Turn DC back on after cable has been re-inserted. Up until
* now, the DC has been in reset to save power.
@@ -9635,7 +9721,7 @@ static void try_start_link(struct hfi1_pportdata *ppd)
"QSFP not responding, waiting and retrying %d\n",
(int)ppd->qsfp_retry_count);
ppd->qsfp_retry_count++;
- queue_delayed_work(ppd->hfi1_wq, &ppd->start_link_work,
+ queue_delayed_work(ppd->link_wq, &ppd->start_link_work,
msecs_to_jiffies(QSFP_RETRY_WAIT));
return;
}
@@ -9742,17 +9828,6 @@ static inline int init_cpu_counters(struct hfi1_devdata *dd)
return 0;
}
-static const char * const pt_names[] = {
- "expected",
- "eager",
- "invalid"
-};
-
-static const char *pt_name(u32 type)
-{
- return type >= ARRAY_SIZE(pt_names) ? "unknown" : pt_names[type];
-}
-
/*
* index is the index into the receive array
*/
@@ -9760,35 +9835,34 @@ void hfi1_put_tid(struct hfi1_devdata *dd, u32 index,
u32 type, unsigned long pa, u16 order)
{
u64 reg;
- void __iomem *base = (dd->rcvarray_wc ? dd->rcvarray_wc :
- (dd->kregbase + RCV_ARRAY));
if (!(dd->flags & HFI1_PRESENT))
goto done;
- if (type == PT_INVALID) {
+ if (type == PT_INVALID || type == PT_INVALID_FLUSH) {
pa = 0;
+ order = 0;
} else if (type > PT_INVALID) {
dd_dev_err(dd,
"unexpected receive array type %u for index %u, not handled\n",
type, index);
goto done;
}
-
- hfi1_cdbg(TID, "type %s, index 0x%x, pa 0x%lx, bsize 0x%lx",
- pt_name(type), index, pa, (unsigned long)order);
+ trace_hfi1_put_tid(dd, index, type, pa, order);
#define RT_ADDR_SHIFT 12 /* 4KB kernel address boundary */
reg = RCV_ARRAY_RT_WRITE_ENABLE_SMASK
| (u64)order << RCV_ARRAY_RT_BUF_SIZE_SHIFT
| ((pa >> RT_ADDR_SHIFT) & RCV_ARRAY_RT_ADDR_MASK)
<< RCV_ARRAY_RT_ADDR_SHIFT;
- writeq(reg, base + (index * 8));
+ trace_hfi1_write_rcvarray(dd->rcvarray_wc + (index * 8), reg);
+ writeq(reg, dd->rcvarray_wc + (index * 8));
- if (type == PT_EAGER)
+ if (type == PT_EAGER || type == PT_INVALID_FLUSH || (index & 3) == 3)
/*
- * Eager entries are written one-by-one so we have to push them
- * after we write the entry.
+ * Eager entries are written and flushed
+ *
+ * Expected entries are flushed every 4 writes
*/
flush_wc();
done:
@@ -9810,15 +9884,6 @@ void hfi1_clear_tids(struct hfi1_ctxtdata *rcd)
hfi1_put_tid(dd, i, PT_INVALID, 0, 0);
}
-struct ib_header *hfi1_get_msgheader(
- struct hfi1_devdata *dd, __le32 *rhf_addr)
-{
- u32 offset = rhf_hdrq_offset(rhf_to_cpu(rhf_addr));
-
- return (struct ib_header *)
- (rhf_addr - dd->rhf_offset + offset);
-}
-
static const char * const ib_cfg_name_strings[] = {
"HFI1_IB_CFG_LIDLMC",
"HFI1_IB_CFG_LWID_DG_ENB",
@@ -10010,10 +10075,16 @@ static void set_lidlmc(struct hfi1_pportdata *ppd)
struct hfi1_devdata *dd = ppd->dd;
u32 mask = ~((1U << ppd->lmc) - 1);
u64 c1 = read_csr(ppd->dd, DCC_CFG_PORT_CONFIG1);
+ u32 lid;
+ /*
+ * Program 0 in CSR if port lid is extended. This prevents
+ * 9B packets being sent out for large lids.
+ */
+ lid = (ppd->lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) ? 0 : ppd->lid;
c1 &= ~(DCC_CFG_PORT_CONFIG1_TARGET_DLID_SMASK
| DCC_CFG_PORT_CONFIG1_DLID_MASK_SMASK);
- c1 |= ((ppd->lid & DCC_CFG_PORT_CONFIG1_TARGET_DLID_MASK)
+ c1 |= ((lid & DCC_CFG_PORT_CONFIG1_TARGET_DLID_MASK)
<< DCC_CFG_PORT_CONFIG1_TARGET_DLID_SHIFT) |
((mask & DCC_CFG_PORT_CONFIG1_DLID_MASK_MASK)
<< DCC_CFG_PORT_CONFIG1_DLID_MASK_SHIFT);
@@ -10024,7 +10095,7 @@ static void set_lidlmc(struct hfi1_pportdata *ppd)
*/
sreg = ((mask & SEND_CTXT_CHECK_SLID_MASK_MASK) <<
SEND_CTXT_CHECK_SLID_MASK_SHIFT) |
- (((ppd->lid & mask) & SEND_CTXT_CHECK_SLID_VALUE_MASK) <<
+ (((lid & mask) & SEND_CTXT_CHECK_SLID_VALUE_MASK) <<
SEND_CTXT_CHECK_SLID_VALUE_SHIFT);
for (i = 0; i < dd->chip_send_contexts; i++) {
@@ -10034,29 +10105,7 @@ static void set_lidlmc(struct hfi1_pportdata *ppd)
}
/* Now we have to do the same thing for the sdma engines */
- sdma_update_lmc(dd, mask, ppd->lid);
-}
-
-static int wait_phy_linkstate(struct hfi1_devdata *dd, u32 state, u32 msecs)
-{
- unsigned long timeout;
- u32 curr_state;
-
- timeout = jiffies + msecs_to_jiffies(msecs);
- while (1) {
- curr_state = read_physical_state(dd);
- if (curr_state == state)
- break;
- if (time_after(jiffies, timeout)) {
- dd_dev_err(dd,
- "timeout waiting for phy link state 0x%x, current state is 0x%x\n",
- state, curr_state);
- return -ETIMEDOUT;
- }
- usleep_range(1950, 2050); /* sleep 2ms-ish */
- }
-
- return 0;
+ sdma_update_lmc(dd, mask, lid);
}
static const char *state_completed_string(u32 completed)
@@ -10238,8 +10287,10 @@ static void force_logical_link_state_down(struct hfi1_pportdata *ppd)
write_csr(dd, DC_LCB_CFG_ALLOW_LINK_UP, 0);
write_csr(dd, DC_LCB_CFG_IGNORE_LOST_RCLK, 0);
- /* call again to adjust ppd->statusp, if needed */
- get_logical_state(ppd);
+ /* adjust ppd->statusp, if needed */
+ update_statusp(ppd, IB_PORT_DOWN);
+
+ dd_dev_info(ppd->dd, "logical state forced to LINK_DOWN\n");
}
/*
@@ -10253,49 +10304,35 @@ static void force_logical_link_state_down(struct hfi1_pportdata *ppd)
static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
{
struct hfi1_devdata *dd = ppd->dd;
- u32 pstate, previous_state;
+ u32 previous_state;
int ret;
- int do_transition;
- int do_wait;
update_lcb_cache(dd);
previous_state = ppd->host_link_state;
ppd->host_link_state = HLS_GOING_OFFLINE;
- pstate = read_physical_state(dd);
- if (pstate == PLS_OFFLINE) {
- do_transition = 0; /* in right state */
- do_wait = 0; /* ...no need to wait */
- } else if ((pstate & 0xf0) == PLS_OFFLINE) {
- do_transition = 0; /* in an offline transient state */
- do_wait = 1; /* ...wait for it to settle */
- } else {
- do_transition = 1; /* need to move to offline */
- do_wait = 1; /* ...will need to wait */
- }
- if (do_transition) {
- ret = set_physical_link_state(dd,
- (rem_reason << 8) | PLS_OFFLINE);
+ /* start offline transition */
+ ret = set_physical_link_state(dd, (rem_reason << 8) | PLS_OFFLINE);
- if (ret != HCMD_SUCCESS) {
- dd_dev_err(dd,
- "Failed to transition to Offline link state, return %d\n",
- ret);
- return -EINVAL;
- }
- if (ppd->offline_disabled_reason ==
- HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE))
- ppd->offline_disabled_reason =
- HFI1_ODR_MASK(OPA_LINKDOWN_REASON_TRANSIENT);
+ if (ret != HCMD_SUCCESS) {
+ dd_dev_err(dd,
+ "Failed to transition to Offline link state, return %d\n",
+ ret);
+ return -EINVAL;
}
+ if (ppd->offline_disabled_reason ==
+ HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE))
+ ppd->offline_disabled_reason =
+ HFI1_ODR_MASK(OPA_LINKDOWN_REASON_TRANSIENT);
- if (do_wait) {
- /* it can take a while for the link to go down */
- ret = wait_phy_linkstate(dd, PLS_OFFLINE, 10000);
- if (ret < 0)
- return ret;
- }
+ /*
+ * Wait for offline transition. It can take a while for
+ * the link to go down.
+ */
+ ret = wait_physical_linkstate(ppd, PLS_OFFLINE, 10000);
+ if (ret < 0)
+ return ret;
/*
* Now in charge of LCB - must be after the physical state is
@@ -10415,11 +10452,11 @@ static const char *link_state_reason_name(struct hfi1_pportdata *ppd, u32 state)
}
/*
- * driver_physical_state - convert the driver's notion of a port's
+ * driver_pstate - convert the driver's notion of a port's
* state (an HLS_*) into a physical state (a {IB,OPA}_PORTPHYSSTATE_*).
* Return -1 (converted to a u32) to indicate error.
*/
-u32 driver_physical_state(struct hfi1_pportdata *ppd)
+u32 driver_pstate(struct hfi1_pportdata *ppd)
{
switch (ppd->host_link_state) {
case HLS_UP_INIT:
@@ -10449,11 +10486,11 @@ u32 driver_physical_state(struct hfi1_pportdata *ppd)
}
/*
- * driver_logical_state - convert the driver's notion of a port's
+ * driver_lstate - convert the driver's notion of a port's
* state (an HLS_*) into a logical state (a IB_PORT_*). Return -1
* (converted to a u32) to indicate error.
*/
-u32 driver_logical_state(struct hfi1_pportdata *ppd)
+u32 driver_lstate(struct hfi1_pportdata *ppd)
{
if (ppd->host_link_state && (ppd->host_link_state & HLS_DOWN))
return IB_PORT_DOWN;
@@ -10484,6 +10521,14 @@ void set_link_down_reason(struct hfi1_pportdata *ppd, u8 lcl_reason,
}
/*
+ * Verify if BCT for data VLs is non-zero.
+ */
+static inline bool data_vls_operational(struct hfi1_pportdata *ppd)
+{
+ return !!ppd->actual_vls_operational;
+}
+
+/*
* Change the physical and/or logical link state.
*
* Do not call this routine while inside an interrupt. It contains
@@ -10545,38 +10590,58 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
goto unexpected;
}
+ /*
+ * Wait for Link_Up physical state.
+ * Physical and Logical states should already be
+ * be transitioned to LinkUp and LinkInit respectively.
+ */
+ ret = wait_physical_linkstate(ppd, PLS_LINKUP, 1000);
+ if (ret) {
+ dd_dev_err(dd,
+ "%s: physical state did not change to LINK-UP\n",
+ __func__);
+ break;
+ }
+
ret = wait_logical_linkstate(ppd, IB_PORT_INIT, 1000);
if (ret) {
dd_dev_err(dd,
"%s: logical state did not change to INIT\n",
__func__);
- } else {
- /* clear old transient LINKINIT_REASON code */
- if (ppd->linkinit_reason >= OPA_LINKINIT_REASON_CLEAR)
- ppd->linkinit_reason =
- OPA_LINKINIT_REASON_LINKUP;
+ break;
+ }
- /* enable the port */
- add_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
+ /* clear old transient LINKINIT_REASON code */
+ if (ppd->linkinit_reason >= OPA_LINKINIT_REASON_CLEAR)
+ ppd->linkinit_reason =
+ OPA_LINKINIT_REASON_LINKUP;
- handle_linkup_change(dd, 1);
- ppd->host_link_state = HLS_UP_INIT;
- }
+ /* enable the port */
+ add_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
+
+ handle_linkup_change(dd, 1);
+ ppd->host_link_state = HLS_UP_INIT;
break;
case HLS_UP_ARMED:
if (ppd->host_link_state != HLS_UP_INIT)
goto unexpected;
- ppd->host_link_state = HLS_UP_ARMED;
+ if (!data_vls_operational(ppd)) {
+ dd_dev_err(dd,
+ "%s: data VLs not operational\n", __func__);
+ ret = -EINVAL;
+ break;
+ }
+
set_logical_state(dd, LSTATE_ARMED);
ret = wait_logical_linkstate(ppd, IB_PORT_ARMED, 1000);
if (ret) {
- /* logical state didn't change, stay at init */
- ppd->host_link_state = HLS_UP_INIT;
dd_dev_err(dd,
"%s: logical state did not change to ARMED\n",
__func__);
+ break;
}
+ ppd->host_link_state = HLS_UP_ARMED;
/*
* The simulator does not currently implement SMA messages,
* so neighbor_normal is not set. Set it here when we first
@@ -10589,18 +10654,16 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
if (ppd->host_link_state != HLS_UP_ARMED)
goto unexpected;
- ppd->host_link_state = HLS_UP_ACTIVE;
set_logical_state(dd, LSTATE_ACTIVE);
ret = wait_logical_linkstate(ppd, IB_PORT_ACTIVE, 1000);
if (ret) {
- /* logical state didn't change, stay at armed */
- ppd->host_link_state = HLS_UP_ARMED;
dd_dev_err(dd,
"%s: logical state did not change to ACTIVE\n",
__func__);
} else {
/* tell all engines to go running */
sdma_all_running(dd);
+ ppd->host_link_state = HLS_UP_ACTIVE;
/* Signal the IB layer that the port has went active */
event.device = &dd->verbs_dev.rdi.ibdev;
@@ -10658,6 +10721,8 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
*/
if (ret)
goto_offline(ppd, 0);
+ else
+ log_physical_state(ppd, PLS_POLLING);
break;
case HLS_DN_DISABLE:
/* link is disabled */
@@ -10682,6 +10747,13 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
ret = -EINVAL;
break;
}
+ ret = wait_physical_linkstate(ppd, PLS_DISABLED, 10000);
+ if (ret) {
+ dd_dev_err(dd,
+ "%s: physical state did not change to DISABLED\n",
+ __func__);
+ break;
+ }
dc_shutdown(dd);
}
ppd->host_link_state = HLS_DN_DISABLE;
@@ -10699,6 +10771,7 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
if (ppd->host_link_state != HLS_DN_POLL)
goto unexpected;
ppd->host_link_state = HLS_VERIFY_CAP;
+ log_physical_state(ppd, PLS_CONFIGPHY_VERIFYCAP);
break;
case HLS_GOING_UP:
if (ppd->host_link_state != HLS_VERIFY_CAP)
@@ -11693,16 +11766,18 @@ static u32 encoded_size(u32 size)
return 0x1; /* if invalid, go with the minimum size */
}
-void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt)
+void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op,
+ struct hfi1_ctxtdata *rcd)
{
- struct hfi1_ctxtdata *rcd;
u64 rcvctrl, reg;
int did_enable = 0;
+ u16 ctxt;
- rcd = dd->rcd[ctxt];
if (!rcd)
return;
+ ctxt = rcd->ctxt;
+
hfi1_cdbg(RCVCTRL, "ctxt %d op 0x%x", ctxt, op);
rcvctrl = read_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL);
@@ -12604,20 +12679,8 @@ const char *opa_pstate_name(u32 pstate)
return "unknown";
}
-/*
- * Read the hardware link state and set the driver's cached value of it.
- * Return the (new) current value.
- */
-u32 get_logical_state(struct hfi1_pportdata *ppd)
+static void update_statusp(struct hfi1_pportdata *ppd, u32 state)
{
- u32 new_state;
-
- new_state = chip_to_opa_lstate(ppd->dd, read_logical_state(ppd->dd));
- if (new_state != ppd->lstate) {
- dd_dev_info(ppd->dd, "logical state changed to %s (0x%x)\n",
- opa_lstate_name(new_state), new_state);
- ppd->lstate = new_state;
- }
/*
* Set port status flags in the page mapped into userspace
* memory. Do it here to ensure a reliable state - this is
@@ -12627,7 +12690,7 @@ u32 get_logical_state(struct hfi1_pportdata *ppd)
* function.
*/
if (ppd->statusp) {
- switch (ppd->lstate) {
+ switch (state) {
case IB_PORT_DOWN:
case IB_PORT_INIT:
*ppd->statusp &= ~(HFI1_STATUS_IB_CONF |
@@ -12641,10 +12704,9 @@ u32 get_logical_state(struct hfi1_pportdata *ppd)
break;
}
}
- return ppd->lstate;
}
-/**
+/*
* wait_logical_linkstate - wait for an IB link state change to occur
* @ppd: port device
* @state: the state to wait for
@@ -12658,35 +12720,88 @@ static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state,
int msecs)
{
unsigned long timeout;
+ u32 new_state;
timeout = jiffies + msecs_to_jiffies(msecs);
while (1) {
- if (get_logical_state(ppd) == state)
- return 0;
- if (time_after(jiffies, timeout))
+ new_state = chip_to_opa_lstate(ppd->dd,
+ read_logical_state(ppd->dd));
+ if (new_state == state)
break;
+ if (time_after(jiffies, timeout)) {
+ dd_dev_err(ppd->dd,
+ "timeout waiting for link state 0x%x\n",
+ state);
+ return -ETIMEDOUT;
+ }
msleep(20);
}
- dd_dev_err(ppd->dd, "timeout waiting for link state 0x%x\n", state);
- return -ETIMEDOUT;
+ update_statusp(ppd, state);
+ dd_dev_info(ppd->dd,
+ "logical state changed to %s (0x%x)\n",
+ opa_lstate_name(state),
+ state);
+ return 0;
}
-u8 hfi1_ibphys_portstate(struct hfi1_pportdata *ppd)
+static void log_state_transition(struct hfi1_pportdata *ppd, u32 state)
{
- u32 pstate;
- u32 ib_pstate;
+ u32 ib_pstate = chip_to_opa_pstate(ppd->dd, state);
- pstate = read_physical_state(ppd->dd);
- ib_pstate = chip_to_opa_pstate(ppd->dd, pstate);
- if (ppd->last_pstate != ib_pstate) {
- dd_dev_info(ppd->dd,
- "%s: physical state changed to %s (0x%x), phy 0x%x\n",
- __func__, opa_pstate_name(ib_pstate), ib_pstate,
- pstate);
- ppd->last_pstate = ib_pstate;
+ dd_dev_info(ppd->dd,
+ "physical state changed to %s (0x%x), phy 0x%x\n",
+ opa_pstate_name(ib_pstate), ib_pstate, state);
+}
+
+/*
+ * Read the physical hardware link state and check if it matches host
+ * drivers anticipated state.
+ */
+static void log_physical_state(struct hfi1_pportdata *ppd, u32 state)
+{
+ u32 read_state = read_physical_state(ppd->dd);
+
+ if (read_state == state) {
+ log_state_transition(ppd, state);
+ } else {
+ dd_dev_err(ppd->dd,
+ "anticipated phy link state 0x%x, read 0x%x\n",
+ state, read_state);
}
- return ib_pstate;
+}
+
+/*
+ * wait_physical_linkstate - wait for an physical link state change to occur
+ * @ppd: port device
+ * @state: the state to wait for
+ * @msecs: the number of milliseconds to wait
+ *
+ * Wait up to msecs milliseconds for physical link state change to occur.
+ * Returns 0 if state reached, otherwise -ETIMEDOUT.
+ */
+static int wait_physical_linkstate(struct hfi1_pportdata *ppd, u32 state,
+ int msecs)
+{
+ u32 read_state;
+ unsigned long timeout;
+
+ timeout = jiffies + msecs_to_jiffies(msecs);
+ while (1) {
+ read_state = read_physical_state(ppd->dd);
+ if (read_state == state)
+ break;
+ if (time_after(jiffies, timeout)) {
+ dd_dev_err(ppd->dd,
+ "timeout waiting for phy link state 0x%x\n",
+ state);
+ return -ETIMEDOUT;
+ }
+ usleep_range(1950, 2050); /* sleep 2ms-ish */
+ }
+
+ log_state_transition(ppd, state);
+ return 0;
}
#define CLEAR_STATIC_RATE_CONTROL_SMASK(r) \
@@ -12809,30 +12924,24 @@ static void clean_up_interrupts(struct hfi1_devdata *dd)
for (i = 0; i < dd->num_msix_entries; i++, me++) {
if (!me->arg) /* => no irq, no affinity */
continue;
- hfi1_put_irq_affinity(dd, &dd->msix_entries[i]);
- free_irq(me->msix.vector, me->arg);
+ hfi1_put_irq_affinity(dd, me);
+ free_irq(me->irq, me->arg);
}
+
+ /* clean structures */
+ kfree(dd->msix_entries);
+ dd->msix_entries = NULL;
+ dd->num_msix_entries = 0;
} else {
/* INTx */
if (dd->requested_intx_irq) {
free_irq(dd->pcidev->irq, dd);
dd->requested_intx_irq = 0;
}
- }
-
- /* turn off interrupts */
- if (dd->num_msix_entries) {
- /* MSI-X */
- pci_disable_msix(dd->pcidev);
- } else {
- /* INTx */
disable_intx(dd->pcidev);
}
- /* clean structures */
- kfree(dd->msix_entries);
- dd->msix_entries = NULL;
- dd->num_msix_entries = 0;
+ pci_free_irq_vectors(dd->pcidev);
}
/*
@@ -12953,7 +13062,7 @@ static int request_msix_irqs(struct hfi1_devdata *dd)
me->type = IRQ_SDMA;
} else if (first_rx <= i && i < last_rx) {
idx = i - first_rx;
- rcd = dd->rcd[idx];
+ rcd = hfi1_rcd_get_by_index(dd, idx);
if (rcd) {
/*
* Set the interrupt register and mask for this
@@ -12972,6 +13081,7 @@ static int request_msix_irqs(struct hfi1_devdata *dd)
remap_intr(dd, IS_RCVAVAIL_START + idx, i);
me->type = IRQ_RCVCTXT;
rcd->msix_intr = i;
+ hfi1_rcd_put(rcd);
}
} else {
/* not in our expected range - complain, then
@@ -12986,13 +13096,21 @@ static int request_msix_irqs(struct hfi1_devdata *dd)
continue;
/* make sure the name is terminated */
me->name[sizeof(me->name) - 1] = 0;
+ me->irq = pci_irq_vector(dd->pcidev, i);
+ /*
+ * On err return me->irq. Don't need to clear this
+ * because 'arg' has not been set, and cleanup will
+ * do the right thing.
+ */
+ if (me->irq < 0)
+ return me->irq;
- ret = request_threaded_irq(me->msix.vector, handler, thread, 0,
+ ret = request_threaded_irq(me->irq, handler, thread, 0,
me->name, arg);
if (ret) {
dd_dev_err(dd,
- "unable to allocate %s interrupt, vector %d, index %d, err %d\n",
- err_info, me->msix.vector, idx, ret);
+ "unable to allocate %s interrupt, irq %d, index %d, err %d\n",
+ err_info, me->irq, idx, ret);
return ret;
}
/*
@@ -13003,8 +13121,7 @@ static int request_msix_irqs(struct hfi1_devdata *dd)
ret = hfi1_get_irq_affinity(dd, me);
if (ret)
- dd_dev_err(dd,
- "unable to pin IRQ %d\n", ret);
+ dd_dev_err(dd, "unable to pin IRQ %d\n", ret);
}
return ret;
@@ -13023,7 +13140,7 @@ void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd)
struct hfi1_ctxtdata *rcd = dd->vnic.ctxt[i];
struct hfi1_msix_entry *me = &dd->msix_entries[rcd->msix_intr];
- synchronize_irq(me->msix.vector);
+ synchronize_irq(me->irq);
}
}
@@ -13036,7 +13153,7 @@ void hfi1_reset_vnic_msix_info(struct hfi1_ctxtdata *rcd)
return;
hfi1_put_irq_affinity(dd, me);
- free_irq(me->msix.vector, me->arg);
+ free_irq(me->irq, me->arg);
me->arg = NULL;
}
@@ -13064,14 +13181,19 @@ void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd)
DRIVER_NAME "_%d kctxt%d", dd->unit, idx);
me->name[sizeof(me->name) - 1] = 0;
me->type = IRQ_RCVCTXT;
-
+ me->irq = pci_irq_vector(dd->pcidev, rcd->msix_intr);
+ if (me->irq < 0) {
+ dd_dev_err(dd, "vnic irq vector request (idx %d) fail %d\n",
+ idx, me->irq);
+ return;
+ }
remap_intr(dd, IS_RCVAVAIL_START + idx, rcd->msix_intr);
- ret = request_threaded_irq(me->msix.vector, receive_context_interrupt,
+ ret = request_threaded_irq(me->irq, receive_context_interrupt,
receive_context_thread, 0, me->name, arg);
if (ret) {
- dd_dev_err(dd, "vnic irq request (vector %d, idx %d) fail %d\n",
- me->msix.vector, idx, ret);
+ dd_dev_err(dd, "vnic irq request (irq %d, idx %d) fail %d\n",
+ me->irq, idx, ret);
return;
}
/*
@@ -13084,7 +13206,7 @@ void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd)
if (ret) {
dd_dev_err(dd,
"unable to pin IRQ %d\n", ret);
- free_irq(me->msix.vector, me->arg);
+ free_irq(me->irq, me->arg);
}
}
@@ -13107,9 +13229,8 @@ static void reset_interrupts(struct hfi1_devdata *dd)
static int set_up_interrupts(struct hfi1_devdata *dd)
{
- struct hfi1_msix_entry *entries;
- u32 total, request;
- int i, ret;
+ u32 total;
+ int ret, request;
int single_interrupt = 0; /* we expect to have all the interrupts */
/*
@@ -13121,39 +13242,31 @@ static int set_up_interrupts(struct hfi1_devdata *dd)
*/
total = 1 + dd->num_sdma + dd->n_krcv_queues + HFI1_NUM_VNIC_CTXT;
- entries = kcalloc(total, sizeof(*entries), GFP_KERNEL);
- if (!entries) {
- ret = -ENOMEM;
- goto fail;
- }
- /* 1-1 MSI-X entry assignment */
- for (i = 0; i < total; i++)
- entries[i].msix.entry = i;
-
/* ask for MSI-X interrupts */
- request = total;
- request_msix(dd, &request, entries);
-
- if (request == 0) {
+ request = request_msix(dd, total);
+ if (request < 0) {
+ ret = request;
+ goto fail;
+ } else if (request == 0) {
/* using INTx */
/* dd->num_msix_entries already zero */
- kfree(entries);
single_interrupt = 1;
dd_dev_err(dd, "MSI-X failed, using INTx interrupts\n");
+ } else if (request < total) {
+ /* using MSI-X, with reduced interrupts */
+ dd_dev_err(dd, "reduced interrupt found, wanted %u, got %u\n",
+ total, request);
+ ret = -EINVAL;
+ goto fail;
} else {
- /* using MSI-X */
- dd->num_msix_entries = request;
- dd->msix_entries = entries;
-
- if (request != total) {
- /* using MSI-X, with reduced interrupts */
- dd_dev_err(
- dd,
- "cannot handle reduced interrupt case, want %u, got %u\n",
- total, request);
- ret = -EINVAL;
+ dd->msix_entries = kcalloc(total, sizeof(*dd->msix_entries),
+ GFP_KERNEL);
+ if (!dd->msix_entries) {
+ ret = -ENOMEM;
goto fail;
}
+ /* using MSI-X */
+ dd->num_msix_entries = total;
dd_dev_info(dd, "%u MSI-X interrupts allocated\n", total);
}
@@ -13396,8 +13509,7 @@ static void write_uninitialized_csrs_and_memories(struct hfi1_devdata *dd)
/* RcvArray */
for (i = 0; i < dd->chip_rcv_array_count; i++)
- write_csr(dd, RCV_ARRAY + (8 * i),
- RCV_ARRAY_RT_WRITE_ENABLE_SMASK);
+ hfi1_put_tid(dd, i, PT_INVALID_FLUSH, 0, 0);
/* RcvQPMapTable */
for (i = 0; i < 32; i++)
@@ -13831,9 +13943,10 @@ static void init_sc2vl_tables(struct hfi1_devdata *dd)
* a reset following the (possible) FLR in this routine.
*
*/
-static void init_chip(struct hfi1_devdata *dd)
+static int init_chip(struct hfi1_devdata *dd)
{
int i;
+ int ret = 0;
/*
* Put the HFI CSRs in a known state.
@@ -13881,12 +13994,22 @@ static void init_chip(struct hfi1_devdata *dd)
pcie_flr(dd->pcidev);
/* restore command and BARs */
- restore_pci_variables(dd);
+ ret = restore_pci_variables(dd);
+ if (ret) {
+ dd_dev_err(dd, "%s: Could not restore PCI variables\n",
+ __func__);
+ return ret;
+ }
if (is_ax(dd)) {
dd_dev_info(dd, "Resetting CSRs with FLR\n");
pcie_flr(dd->pcidev);
- restore_pci_variables(dd);
+ ret = restore_pci_variables(dd);
+ if (ret) {
+ dd_dev_err(dd, "%s: Could not restore PCI variables\n",
+ __func__);
+ return ret;
+ }
}
} else {
dd_dev_info(dd, "Resetting CSRs with writes\n");
@@ -13914,6 +14037,7 @@ static void init_chip(struct hfi1_devdata *dd)
write_csr(dd, ASIC_QSFP1_OUT, 0x1f);
write_csr(dd, ASIC_QSFP2_OUT, 0x1f);
init_chip_resources(dd);
+ return ret;
}
static void init_early_variables(struct hfi1_devdata *dd)
@@ -14365,6 +14489,7 @@ void hfi1_deinit_vnic_rsm(struct hfi1_devdata *dd)
static void init_rxe(struct hfi1_devdata *dd)
{
struct rsm_map_table *rmt;
+ u64 val;
/* enable all receive errors */
write_csr(dd, RCV_ERR_MASK, ~0ull);
@@ -14389,6 +14514,11 @@ static void init_rxe(struct hfi1_devdata *dd)
* (64 bytes). Max_Payload_Size is possibly modified upward in
* tune_pcie_caps() which is called after this routine.
*/
+
+ /* Have 16 bytes (4DW) of bypass header available in header queue */
+ val = read_csr(dd, RCV_BYPASS);
+ val |= (4ull << 16);
+ write_csr(dd, RCV_BYPASS, val);
}
static void init_other(struct hfi1_devdata *dd)
@@ -14470,99 +14600,86 @@ static void init_txe(struct hfi1_devdata *dd)
write_csr(dd, SEND_CM_TIMER_CTRL, HFI1_CREDIT_RETURN_RATE);
}
-int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt, u16 jkey)
+int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd,
+ u16 jkey)
{
- struct hfi1_ctxtdata *rcd = dd->rcd[ctxt];
- unsigned sctxt;
- int ret = 0;
+ u8 hw_ctxt;
u64 reg;
- if (!rcd || !rcd->sc) {
- ret = -EINVAL;
- goto done;
- }
- sctxt = rcd->sc->hw_context;
+ if (!rcd || !rcd->sc)
+ return -EINVAL;
+
+ hw_ctxt = rcd->sc->hw_context;
reg = SEND_CTXT_CHECK_JOB_KEY_MASK_SMASK | /* mask is always 1's */
((jkey & SEND_CTXT_CHECK_JOB_KEY_VALUE_MASK) <<
SEND_CTXT_CHECK_JOB_KEY_VALUE_SHIFT);
/* JOB_KEY_ALLOW_PERMISSIVE is not allowed by default */
if (HFI1_CAP_KGET_MASK(rcd->flags, ALLOW_PERM_JKEY))
reg |= SEND_CTXT_CHECK_JOB_KEY_ALLOW_PERMISSIVE_SMASK;
- write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_JOB_KEY, reg);
+ write_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_JOB_KEY, reg);
/*
* Enable send-side J_KEY integrity check, unless this is A0 h/w
*/
if (!is_ax(dd)) {
- reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE);
+ reg = read_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_ENABLE);
reg |= SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK;
- write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg);
+ write_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_ENABLE, reg);
}
/* Enable J_KEY check on receive context. */
reg = RCV_KEY_CTRL_JOB_KEY_ENABLE_SMASK |
((jkey & RCV_KEY_CTRL_JOB_KEY_VALUE_MASK) <<
RCV_KEY_CTRL_JOB_KEY_VALUE_SHIFT);
- write_kctxt_csr(dd, ctxt, RCV_KEY_CTRL, reg);
-done:
- return ret;
+ write_kctxt_csr(dd, rcd->ctxt, RCV_KEY_CTRL, reg);
+
+ return 0;
}
-int hfi1_clear_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt)
+int hfi1_clear_ctxt_jkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
{
- struct hfi1_ctxtdata *rcd = dd->rcd[ctxt];
- unsigned sctxt;
- int ret = 0;
+ u8 hw_ctxt;
u64 reg;
- if (!rcd || !rcd->sc) {
- ret = -EINVAL;
- goto done;
- }
- sctxt = rcd->sc->hw_context;
- write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_JOB_KEY, 0);
+ if (!rcd || !rcd->sc)
+ return -EINVAL;
+
+ hw_ctxt = rcd->sc->hw_context;
+ write_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_JOB_KEY, 0);
/*
* Disable send-side J_KEY integrity check, unless this is A0 h/w.
* This check would not have been enabled for A0 h/w, see
* set_ctxt_jkey().
*/
if (!is_ax(dd)) {
- reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE);
+ reg = read_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_ENABLE);
reg &= ~SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK;
- write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg);
+ write_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_ENABLE, reg);
}
/* Turn off the J_KEY on the receive side */
- write_kctxt_csr(dd, ctxt, RCV_KEY_CTRL, 0);
-done:
- return ret;
+ write_kctxt_csr(dd, rcd->ctxt, RCV_KEY_CTRL, 0);
+
+ return 0;
}
-int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt, u16 pkey)
+int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd,
+ u16 pkey)
{
- struct hfi1_ctxtdata *rcd;
- unsigned sctxt;
- int ret = 0;
+ u8 hw_ctxt;
u64 reg;
- if (ctxt < dd->num_rcv_contexts) {
- rcd = dd->rcd[ctxt];
- } else {
- ret = -EINVAL;
- goto done;
- }
- if (!rcd || !rcd->sc) {
- ret = -EINVAL;
- goto done;
- }
- sctxt = rcd->sc->hw_context;
+ if (!rcd || !rcd->sc)
+ return -EINVAL;
+
+ hw_ctxt = rcd->sc->hw_context;
reg = ((u64)pkey & SEND_CTXT_CHECK_PARTITION_KEY_VALUE_MASK) <<
SEND_CTXT_CHECK_PARTITION_KEY_VALUE_SHIFT;
- write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_PARTITION_KEY, reg);
- reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE);
+ write_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_PARTITION_KEY, reg);
+ reg = read_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_ENABLE);
reg |= SEND_CTXT_CHECK_ENABLE_CHECK_PARTITION_KEY_SMASK;
reg &= ~SEND_CTXT_CHECK_ENABLE_DISALLOW_KDETH_PACKETS_SMASK;
- write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg);
-done:
- return ret;
+ write_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_ENABLE, reg);
+
+ return 0;
}
int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *ctxt)
@@ -14573,9 +14690,6 @@ int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *ctxt)
if (!ctxt || !ctxt->sc)
return -EINVAL;
- if (ctxt->ctxt >= dd->num_rcv_contexts)
- return -EINVAL;
-
hw_ctxt = ctxt->sc->hw_context;
reg = read_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_ENABLE);
reg &= ~SEND_CTXT_CHECK_ENABLE_CHECK_PARTITION_KEY_SMASK;
@@ -14773,7 +14887,6 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
}
ppd->vls_supported = num_vls;
ppd->vls_operational = ppd->vls_supported;
- ppd->actual_vls_operational = ppd->vls_supported;
/* Set the default MTU. */
for (vl = 0; vl < num_vls; vl++)
dd->vld[vl].mtu = hfi1_max_mtu;
@@ -14782,7 +14895,6 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
* Set the initial values to reasonable default, will be set
* for real when link is up.
*/
- ppd->lstate = IB_PORT_DOWN;
ppd->overrun_threshold = 0x4;
ppd->phy_error_threshold = 0xf;
ppd->port_crc_mode_enabled = link_crc_mask;
@@ -14793,7 +14905,6 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
/* start in offline */
ppd->host_link_state = HLS_DN_OFFLINE;
init_vl_arb_caches(ppd);
- ppd->last_pstate = 0xff; /* invalid value */
}
dd->link_default = HLS_DN_POLL;
@@ -14807,6 +14918,11 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
if (ret < 0)
goto bail_free;
+ /* Save PCI space registers to rewrite after device reset */
+ ret = save_pci_variables(dd);
+ if (ret < 0)
+ goto bail_cleanup;
+
/* verify that reads actually work, save revision for reset check */
dd->revision = read_csr(dd, CCE_REVISION);
if (dd->revision == ~(u64)0) {
@@ -14899,7 +15015,9 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
goto bail_cleanup;
/* obtain chip sizes, reset chip CSRs */
- init_chip(dd);
+ ret = init_chip(dd);
+ if (ret)
+ goto bail_cleanup;
/* read in the PCIe link speed information */
ret = pcie_speeds(dd);
@@ -14974,10 +15092,16 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
if (ret)
goto bail_cleanup;
- ret = hfi1_create_ctxts(dd);
+ ret = hfi1_create_kctxts(dd);
if (ret)
goto bail_cleanup;
+ /*
+ * Initialize aspm, to be done after gen3 transition and setting up
+ * contexts and before enabling interrupts
+ */
+ aspm_init(dd);
+
dd->rcvhdrsize = DEFAULT_RCVHDRSIZE;
/*
* rcd[0] is guaranteed to be valid by this point. Also, all
@@ -14996,7 +15120,7 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
goto bail_cleanup;
}
- /* use contexts created by hfi1_create_ctxts */
+ /* use contexts created by hfi1_create_kctxts */
ret = set_up_interrupts(dd);
if (ret)
goto bail_cleanup;