diff options
Diffstat (limited to 'drivers/infiniband/hw/hfi1/chip.c')
| -rw-r--r-- | drivers/infiniband/hw/hfi1/chip.c | 157 |
1 files changed, 74 insertions, 83 deletions
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index f1245c94ae26..0781ab756d44 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* * Copyright(c) 2015 - 2020 Intel Corporation. * Copyright(c) 2021 Cornelis Networks. @@ -251,7 +251,7 @@ struct flag_table { /* * CCE Error flags. */ -static struct flag_table cce_err_status_flags[] = { +static const struct flag_table cce_err_status_flags[] = { /* 0*/ FLAG_ENTRY0("CceCsrParityErr", CCE_ERR_STATUS_CCE_CSR_PARITY_ERR_SMASK), /* 1*/ FLAG_ENTRY0("CceCsrReadBadAddrErr", @@ -341,7 +341,7 @@ static struct flag_table cce_err_status_flags[] = { * Misc Error flags */ #define MES(text) MISC_ERR_STATUS_MISC_##text##_ERR_SMASK -static struct flag_table misc_err_status_flags[] = { +static const struct flag_table misc_err_status_flags[] = { /* 0*/ FLAG_ENTRY0("CSR_PARITY", MES(CSR_PARITY)), /* 1*/ FLAG_ENTRY0("CSR_READ_BAD_ADDR", MES(CSR_READ_BAD_ADDR)), /* 2*/ FLAG_ENTRY0("CSR_WRITE_BAD_ADDR", MES(CSR_WRITE_BAD_ADDR)), @@ -360,7 +360,7 @@ static struct flag_table misc_err_status_flags[] = { /* * TXE PIO Error flags and consequences */ -static struct flag_table pio_err_status_flags[] = { +static const struct flag_table pio_err_status_flags[] = { /* 0*/ FLAG_ENTRY("PioWriteBadCtxt", SEC_WRITE_DROPPED, SEND_PIO_ERR_STATUS_PIO_WRITE_BAD_CTXT_ERR_SMASK), @@ -502,7 +502,7 @@ static struct flag_table pio_err_status_flags[] = { /* * TXE SDMA Error flags */ -static struct flag_table sdma_err_status_flags[] = { +static const struct flag_table sdma_err_status_flags[] = { /* 0*/ FLAG_ENTRY0("SDmaRpyTagErr", SEND_DMA_ERR_STATUS_SDMA_RPY_TAG_ERR_SMASK), /* 1*/ FLAG_ENTRY0("SDmaCsrParityErr", @@ -530,7 +530,7 @@ static struct flag_table sdma_err_status_flags[] = { * TXE Egress Error flags */ #define SEES(text) SEND_EGRESS_ERR_STATUS_##text##_ERR_SMASK -static struct flag_table egress_err_status_flags[] = { +static const struct flag_table egress_err_status_flags[] = { /* 0*/ FLAG_ENTRY0("TxPktIntegrityMemCorErr", SEES(TX_PKT_INTEGRITY_MEM_COR)), /* 1*/ FLAG_ENTRY0("TxPktIntegrityMemUncErr", SEES(TX_PKT_INTEGRITY_MEM_UNC)), /* 2 reserved */ @@ -631,7 +631,7 @@ static struct flag_table egress_err_status_flags[] = { * TXE Egress Error Info flags */ #define SEEI(text) SEND_EGRESS_ERR_INFO_##text##_ERR_SMASK -static struct flag_table egress_err_info_flags[] = { +static const struct flag_table egress_err_info_flags[] = { /* 0*/ FLAG_ENTRY0("Reserved", 0ull), /* 1*/ FLAG_ENTRY0("VLErr", SEEI(VL)), /* 2*/ FLAG_ENTRY0("JobKeyErr", SEEI(JOB_KEY)), @@ -680,7 +680,7 @@ static struct flag_table egress_err_info_flags[] = { * TXE Send error flags */ #define SES(name) SEND_ERR_STATUS_SEND_##name##_ERR_SMASK -static struct flag_table send_err_status_flags[] = { +static const struct flag_table send_err_status_flags[] = { /* 0*/ FLAG_ENTRY0("SendCsrParityErr", SES(CSR_PARITY)), /* 1*/ FLAG_ENTRY0("SendCsrReadBadAddrErr", SES(CSR_READ_BAD_ADDR)), /* 2*/ FLAG_ENTRY0("SendCsrWriteBadAddrErr", SES(CSR_WRITE_BAD_ADDR)) @@ -689,7 +689,7 @@ static struct flag_table send_err_status_flags[] = { /* * TXE Send Context Error flags and consequences */ -static struct flag_table sc_err_status_flags[] = { +static const struct flag_table sc_err_status_flags[] = { /* 0*/ FLAG_ENTRY("InconsistentSop", SEC_PACKET_DROPPED | SEC_SC_HALTED, SEND_CTXT_ERR_STATUS_PIO_INCONSISTENT_SOP_ERR_SMASK), @@ -712,7 +712,7 @@ static struct flag_table sc_err_status_flags[] = { * RXE Receive Error flags */ #define RXES(name) RCV_ERR_STATUS_RX_##name##_ERR_SMASK -static struct flag_table rxe_err_status_flags[] = { +static const struct flag_table rxe_err_status_flags[] = { /* 0*/ FLAG_ENTRY0("RxDmaCsrCorErr", RXES(DMA_CSR_COR)), /* 1*/ FLAG_ENTRY0("RxDcIntfParityErr", RXES(DC_INTF_PARITY)), /* 2*/ FLAG_ENTRY0("RxRcvHdrUncErr", RXES(RCV_HDR_UNC)), @@ -847,7 +847,7 @@ static struct flag_table rxe_err_status_flags[] = { * DCC Error Flags */ #define DCCE(name) DCC_ERR_FLG_##name##_SMASK -static struct flag_table dcc_err_flags[] = { +static const struct flag_table dcc_err_flags[] = { FLAG_ENTRY0("bad_l2_err", DCCE(BAD_L2_ERR)), FLAG_ENTRY0("bad_sc_err", DCCE(BAD_SC_ERR)), FLAG_ENTRY0("bad_mid_tail_err", DCCE(BAD_MID_TAIL_ERR)), @@ -900,7 +900,7 @@ static struct flag_table dcc_err_flags[] = { * LCB error flags */ #define LCBE(name) DC_LCB_ERR_FLG_##name##_SMASK -static struct flag_table lcb_err_flags[] = { +static const struct flag_table lcb_err_flags[] = { /* 0*/ FLAG_ENTRY0("CSR_PARITY_ERR", LCBE(CSR_PARITY_ERR)), /* 1*/ FLAG_ENTRY0("INVALID_CSR_ADDR", LCBE(INVALID_CSR_ADDR)), /* 2*/ FLAG_ENTRY0("RST_FOR_FAILED_DESKEW", LCBE(RST_FOR_FAILED_DESKEW)), @@ -943,7 +943,7 @@ static struct flag_table lcb_err_flags[] = { * DC8051 Error Flags */ #define D8E(name) DC_DC8051_ERR_FLG_##name##_SMASK -static struct flag_table dc8051_err_flags[] = { +static const struct flag_table dc8051_err_flags[] = { FLAG_ENTRY0("SET_BY_8051", D8E(SET_BY_8051)), FLAG_ENTRY0("LOST_8051_HEART_BEAT", D8E(LOST_8051_HEART_BEAT)), FLAG_ENTRY0("CRAM_MBE", D8E(CRAM_MBE)), @@ -962,7 +962,7 @@ static struct flag_table dc8051_err_flags[] = { * * Flags in DC8051_DBG_ERR_INFO_SET_BY_8051.ERROR field. */ -static struct flag_table dc8051_info_err_flags[] = { +static const struct flag_table dc8051_info_err_flags[] = { FLAG_ENTRY0("Spico ROM check failed", SPICO_ROM_FAILED), FLAG_ENTRY0("Unknown frame received", UNKNOWN_FRAME), FLAG_ENTRY0("Target BER not met", TARGET_BER_NOT_MET), @@ -986,7 +986,7 @@ static struct flag_table dc8051_info_err_flags[] = { * * Flags in DC8051_DBG_ERR_INFO_SET_BY_8051.HOST_MSG field. */ -static struct flag_table dc8051_info_host_msg_flags[] = { +static const struct flag_table dc8051_info_host_msg_flags[] = { FLAG_ENTRY0("Host request done", 0x0001), FLAG_ENTRY0("BC PWR_MGM message", 0x0002), FLAG_ENTRY0("BC SMA message", 0x0004), @@ -1056,7 +1056,7 @@ static void read_link_down_reason(struct hfi1_devdata *dd, u8 *ldr); static void handle_temp_err(struct hfi1_devdata *dd); static void dc_shutdown(struct hfi1_devdata *dd); static void dc_start(struct hfi1_devdata *dd); -static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp, +static int qos_rmt_entries(unsigned int n_krcv_queues, unsigned int *mp, unsigned int *np); static void clear_full_mgmt_pkey(struct hfi1_pportdata *ppd); static int wait_link_transfer_active(struct hfi1_devdata *dd, int wait_ms); @@ -1461,7 +1461,8 @@ static u64 dc_access_lcb_cntr(const struct cntr_entry *entry, void *context, ret = write_lcb_csr(dd, csr, data); if (ret) { - dd_dev_err(dd, "Could not acquire LCB for counter 0x%x", csr); + if (!(dd->flags & HFI1_SHUTDOWN)) + dd_dev_err(dd, "Could not acquire LCB for counter 0x%x", csr); return 0; } @@ -5274,7 +5275,7 @@ done: * the buffer. End in '*' if the buffer is too short. */ static char *flag_string(char *buf, int buf_len, u64 flags, - struct flag_table *table, int table_size) + const struct flag_table *table, int table_size) { char extra[32]; char *p = buf; @@ -5333,7 +5334,7 @@ static const char * const cce_misc_names[] = { static char *is_misc_err_name(char *buf, size_t bsize, unsigned int source) { if (source < ARRAY_SIZE(cce_misc_names)) - strncpy(buf, cce_misc_names[source], bsize); + strscpy_pad(buf, cce_misc_names[source], bsize); else snprintf(buf, bsize, "Reserved%u", source + IS_GENERAL_ERR_START); @@ -5373,7 +5374,7 @@ static const char * const various_names[] = { static char *is_various_name(char *buf, size_t bsize, unsigned int source) { if (source < ARRAY_SIZE(various_names)) - strncpy(buf, various_names[source], bsize); + strscpy_pad(buf, various_names[source], bsize); else snprintf(buf, bsize, "Reserved%u", source + IS_VARIOUS_START); return buf; @@ -5547,7 +5548,7 @@ static void handle_cce_err(struct hfi1_devdata *dd, u32 unused, u64 reg) #define RCVERR_CHECK_TIME 10 static void update_rcverr_timer(struct timer_list *t) { - struct hfi1_devdata *dd = from_timer(dd, t, rcverr_timer); + struct hfi1_devdata *dd = timer_container_of(dd, t, rcverr_timer); struct hfi1_pportdata *ppd = dd->pport; u32 cur_ovfl_cnt = read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL); @@ -5575,7 +5576,7 @@ static int init_rcverr(struct hfi1_devdata *dd) static void free_rcverr(struct hfi1_devdata *dd) { if (dd->rcverr_timer.function) - del_timer_sync(&dd->rcverr_timer); + timer_delete_sync(&dd->rcverr_timer); } static void handle_rxe_err(struct hfi1_devdata *dd, u32 unused, u64 reg) @@ -6160,7 +6161,7 @@ static int request_host_lcb_access(struct hfi1_devdata *dd) ret = do_8051_command(dd, HCMD_MISC, (u64)HCMD_MISC_REQUEST_LCB_ACCESS << LOAD_DATA_FIELD_ID_SHIFT, NULL); - if (ret != HCMD_SUCCESS) { + if (ret != HCMD_SUCCESS && !(dd->flags & HFI1_SHUTDOWN)) { dd_dev_err(dd, "%s: command failed with error %d\n", __func__, ret); } @@ -6241,7 +6242,8 @@ int acquire_lcb_access(struct hfi1_devdata *dd, int sleep_ok) if (dd->lcb_access_count == 0) { ret = request_host_lcb_access(dd); if (ret) { - dd_dev_err(dd, + if (!(dd->flags & HFI1_SHUTDOWN)) + dd_dev_err(dd, "%s: unable to acquire LCB access, err %d\n", __func__, ret); goto done; @@ -8753,7 +8755,7 @@ static int do_8051_command(struct hfi1_devdata *dd, u32 type, u64 in_data, /* * When writing a LCB CSR, out_data contains the full value to - * to be written, while in_data contains the relative LCB + * be written, while in_data contains the relative LCB * address in 7:0. Do the work here, rather than the caller, * of distrubting the write data to where it needs to go: * @@ -12135,7 +12137,7 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, set_intr_bits(dd, IS_RCVURGENT_START + rcd->ctxt, IS_RCVURGENT_START + rcd->ctxt, false); - hfi1_cdbg(RCVCTRL, "ctxt %d rcvctrl 0x%llx\n", ctxt, rcvctrl); + hfi1_cdbg(RCVCTRL, "ctxt %d rcvctrl 0x%llx", ctxt, rcvctrl); write_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL, rcvctrl); /* work around sticky RcvCtxtStatus.BlockedRHQFull */ @@ -12205,10 +12207,10 @@ u32 hfi1_read_cntrs(struct hfi1_devdata *dd, char **namep, u64 **cntrp) hfi1_cdbg(CNTR, "reading %s", entry->name); if (entry->flags & CNTR_DISABLED) { /* Nothing */ - hfi1_cdbg(CNTR, "\tDisabled\n"); + hfi1_cdbg(CNTR, "\tDisabled"); } else { if (entry->flags & CNTR_VL) { - hfi1_cdbg(CNTR, "\tPer VL\n"); + hfi1_cdbg(CNTR, "\tPer VL"); for (j = 0; j < C_VL_COUNT; j++) { val = entry->rw_cntr(entry, dd, j, @@ -12216,21 +12218,21 @@ u32 hfi1_read_cntrs(struct hfi1_devdata *dd, char **namep, u64 **cntrp) 0); hfi1_cdbg( CNTR, - "\t\tRead 0x%llx for %d\n", + "\t\tRead 0x%llx for %d", val, j); dd->cntrs[entry->offset + j] = val; } } else if (entry->flags & CNTR_SDMA) { hfi1_cdbg(CNTR, - "\t Per SDMA Engine\n"); + "\t Per SDMA Engine"); for (j = 0; j < chip_sdma_engines(dd); j++) { val = entry->rw_cntr(entry, dd, j, CNTR_MODE_R, 0); hfi1_cdbg(CNTR, - "\t\tRead 0x%llx for %d\n", + "\t\tRead 0x%llx for %d", val, j); dd->cntrs[entry->offset + j] = val; @@ -12271,7 +12273,7 @@ u32 hfi1_read_portcntrs(struct hfi1_pportdata *ppd, char **namep, u64 **cntrp) hfi1_cdbg(CNTR, "reading %s", entry->name); if (entry->flags & CNTR_DISABLED) { /* Nothing */ - hfi1_cdbg(CNTR, "\tDisabled\n"); + hfi1_cdbg(CNTR, "\tDisabled"); continue; } @@ -12306,7 +12308,8 @@ static void free_cntrs(struct hfi1_devdata *dd) int i; if (dd->synth_stats_timer.function) - del_timer_sync(&dd->synth_stats_timer); + timer_delete_sync(&dd->synth_stats_timer); + cancel_work_sync(&dd->update_cntr_work); ppd = (struct hfi1_pportdata *)(dd + 1); for (i = 0; i < dd->num_pports; i++, ppd++) { kfree(ppd->cntrs); @@ -12513,7 +12516,7 @@ static void do_update_synth_timer(struct work_struct *work) hfi1_cdbg( CNTR, - "[%d] curr tx=0x%llx rx=0x%llx :: last tx=0x%llx rx=0x%llx\n", + "[%d] curr tx=0x%llx rx=0x%llx :: last tx=0x%llx rx=0x%llx", dd->unit, cur_tx, cur_rx, dd->last_tx, dd->last_rx); if ((cur_tx < dd->last_tx) || (cur_rx < dd->last_rx)) { @@ -12527,7 +12530,7 @@ static void do_update_synth_timer(struct work_struct *work) } else { total_flits = (cur_tx - dd->last_tx) + (cur_rx - dd->last_rx); hfi1_cdbg(CNTR, - "[%d] total flits 0x%llx limit 0x%llx\n", dd->unit, + "[%d] total flits 0x%llx limit 0x%llx", dd->unit, total_flits, (u64)CNTR_32BIT_MAX); if (total_flits >= CNTR_32BIT_MAX) { hfi1_cdbg(CNTR, "[%d] 32bit limit hit, updating", @@ -12584,7 +12587,7 @@ static void do_update_synth_timer(struct work_struct *work) static void update_synth_timer(struct timer_list *t) { - struct hfi1_devdata *dd = from_timer(dd, t, synth_stats_timer); + struct hfi1_devdata *dd = timer_container_of(dd, t, synth_stats_timer); queue_work(dd->update_cntr_wq, &dd->update_cntr_work); mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME); @@ -12879,22 +12882,6 @@ u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate) } } -/* return the OPA port logical state name */ -const char *opa_lstate_name(u32 lstate) -{ - static const char * const port_logical_names[] = { - "PORT_NOP", - "PORT_DOWN", - "PORT_INIT", - "PORT_ARMED", - "PORT_ACTIVE", - "PORT_ACTIVE_DEFER", - }; - if (lstate < ARRAY_SIZE(port_logical_names)) - return port_logical_names[lstate]; - return "unknown"; -} - /* return the OPA port physical state name */ const char *opa_pstate_name(u32 pstate) { @@ -12953,8 +12940,6 @@ static void update_statusp(struct hfi1_pportdata *ppd, u32 state) break; } } - dd_dev_info(ppd->dd, "logical state changed to %s (0x%x)\n", - opa_lstate_name(state), state); } /** @@ -13182,15 +13167,16 @@ static void read_mod_write(struct hfi1_devdata *dd, u16 src, u64 bits, { u64 reg; u16 idx = src / BITS_PER_REGISTER; + unsigned long flags; - spin_lock(&dd->irq_src_lock); + spin_lock_irqsave(&dd->irq_src_lock, flags); reg = read_csr(dd, CCE_INT_MASK + (8 * idx)); if (set) reg |= bits; else reg &= ~bits; write_csr(dd, CCE_INT_MASK + (8 * idx), reg); - spin_unlock(&dd->irq_src_lock); + spin_unlock_irqrestore(&dd->irq_src_lock, flags); } /** @@ -13231,7 +13217,7 @@ int set_intr_bits(struct hfi1_devdata *dd, u16 first, u16 last, bool set) /* * Clear all interrupt sources on the chip. */ -void clear_all_interrupts(struct hfi1_devdata *dd) +static void clear_all_interrupts(struct hfi1_devdata *dd) { int i; @@ -13362,7 +13348,6 @@ static int set_up_context_variables(struct hfi1_devdata *dd) int ret; unsigned ngroups; int rmt_count; - int user_rmt_reduced; u32 n_usr_ctxts; u32 send_contexts = chip_send_contexts(dd); u32 rcv_contexts = chip_rcv_contexts(dd); @@ -13421,28 +13406,34 @@ static int set_up_context_variables(struct hfi1_devdata *dd) (num_kernel_contexts + n_usr_ctxts), &node_affinity.real_cpu_mask); /* - * The RMT entries are currently allocated as shown below: - * 1. QOS (0 to 128 entries); - * 2. FECN (num_kernel_context - 1 + num_user_contexts + - * num_netdev_contexts); - * 3. netdev (num_netdev_contexts). - * It should be noted that FECN oversubscribe num_netdev_contexts - * entries of RMT because both netdev and PSM could allocate any receive - * context between dd->first_dyn_alloc_text and dd->num_rcv_contexts, - * and PSM FECN must reserve an RMT entry for each possible PSM receive - * context. + * RMT entries are allocated as follows: + * 1. QOS (0 to 128 entries) + * 2. FECN (num_kernel_context - 1 [a] + num_user_contexts + + * num_netdev_contexts [b]) + * 3. netdev (NUM_NETDEV_MAP_ENTRIES) + * + * Notes: + * [a] Kernel contexts (except control) are included in FECN if kernel + * TID_RDMA is active. + * [b] Netdev and user contexts are randomly allocated from the same + * context pool, so FECN must cover all contexts in the pool. */ - rmt_count = qos_rmt_entries(dd, NULL, NULL) + (num_netdev_contexts * 2); - if (HFI1_CAP_IS_KSET(TID_RDMA)) - rmt_count += num_kernel_contexts - 1; - if (rmt_count + n_usr_ctxts > NUM_MAP_ENTRIES) { - user_rmt_reduced = NUM_MAP_ENTRIES - rmt_count; - dd_dev_err(dd, - "RMT size is reducing the number of user receive contexts from %u to %d\n", - n_usr_ctxts, - user_rmt_reduced); - /* recalculate */ - n_usr_ctxts = user_rmt_reduced; + rmt_count = qos_rmt_entries(num_kernel_contexts - 1, NULL, NULL) + + (HFI1_CAP_IS_KSET(TID_RDMA) ? num_kernel_contexts - 1 + : 0) + + n_usr_ctxts + + num_netdev_contexts + + NUM_NETDEV_MAP_ENTRIES; + if (rmt_count > NUM_MAP_ENTRIES) { + int over = rmt_count - NUM_MAP_ENTRIES; + /* try to squish user contexts, minimum of 1 */ + if (over >= n_usr_ctxts) { + dd_dev_err(dd, "RMT overflow: reduce the requested number of contexts\n"); + return -EINVAL; + } + dd_dev_err(dd, "RMT overflow: reducing # user contexts from %u to %u\n", + n_usr_ctxts, n_usr_ctxts - over); + n_usr_ctxts -= over; } /* the first N are kernel contexts, the rest are user/netdev contexts */ @@ -14299,15 +14290,15 @@ static void clear_rsm_rule(struct hfi1_devdata *dd, u8 rule_index) } /* return the number of RSM map table entries that will be used for QOS */ -static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp, +static int qos_rmt_entries(unsigned int n_krcv_queues, unsigned int *mp, unsigned int *np) { int i; unsigned int m, n; - u8 max_by_vl = 0; + uint max_by_vl = 0; /* is QOS active at all? */ - if (dd->n_krcv_queues <= MIN_KERNEL_KCTXTS || + if (n_krcv_queues < MIN_KERNEL_KCTXTS || num_vls == 1 || krcvqsset <= 1) goto no_qos; @@ -14365,7 +14356,7 @@ static void init_qos(struct hfi1_devdata *dd, struct rsm_map_table *rmt) if (!rmt) goto bail; - rmt_entries = qos_rmt_entries(dd, &m, &n); + rmt_entries = qos_rmt_entries(dd->n_krcv_queues - 1, &m, &n); if (rmt_entries == 0) goto bail; qpns_per_vl = 1 << m; |
