diff options
Diffstat (limited to 'drivers/net/ethernet/chelsio/cxgb4/sge.c')
| -rw-r--r-- | drivers/net/ethernet/chelsio/cxgb4/sge.c | 438 |
1 files changed, 335 insertions, 103 deletions
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index 1359158652b7..9fccb8ea9bcd 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -163,7 +163,7 @@ static inline unsigned int fl_mtu_bufsize(struct adapter *adapter, * for DMA, but this is of course never sent to the hardware and is only used * to prevent double unmappings. All of the above requires that the Free List * Buffers which we allocate have the bottom 5 bits free (0) -- i.e. are - * 32-byte or or a power of 2 greater in alignment. Since the SGE's minimal + * 32-byte or a power of 2 greater in alignment. Since the SGE's minimal * Free List Buffer alignment is 32 bytes, this works out for us ... */ enum { @@ -302,7 +302,7 @@ static void deferred_unmap_destructor(struct sk_buff *skb) /** * free_tx_desc - reclaims Tx descriptors and their buffers - * @adapter: the adapter + * @adap: the adapter * @q: the Tx queue to reclaim descriptors from * @n: the number of descriptors to reclaim * @unmap: whether the buffers should be unmapped for DMA @@ -443,7 +443,7 @@ static void free_rx_bufs(struct adapter *adap, struct sge_fl *q, int n) if (is_buf_mapped(d)) dma_unmap_page(adap->pdev_dev, get_buf_addr(d), get_buf_size(adap, d), - PCI_DMA_FROMDEVICE); + DMA_FROM_DEVICE); put_page(d->page); d->page = NULL; if (++q->cidx == q->size) @@ -469,7 +469,7 @@ static void unmap_rx_buf(struct adapter *adap, struct sge_fl *q) if (is_buf_mapped(d)) dma_unmap_page(adap->pdev_dev, get_buf_addr(d), - get_buf_size(adap, d), PCI_DMA_FROMDEVICE); + get_buf_size(adap, d), DMA_FROM_DEVICE); d->page = NULL; if (++q->cidx == q->size) q->cidx = 0; @@ -566,7 +566,7 @@ static unsigned int refill_fl(struct adapter *adap, struct sge_fl *q, int n, mapping = dma_map_page(adap->pdev_dev, pg, 0, PAGE_SIZE << s->fl_pg_order, - PCI_DMA_FROMDEVICE); + DMA_FROM_DEVICE); if (unlikely(dma_mapping_error(adap->pdev_dev, mapping))) { __free_pages(pg, s->fl_pg_order); q->mapping_err++; @@ -596,7 +596,7 @@ alloc_small_pages: } mapping = dma_map_page(adap->pdev_dev, pg, 0, PAGE_SIZE, - PCI_DMA_FROMDEVICE); + DMA_FROM_DEVICE); if (unlikely(dma_mapping_error(adap->pdev_dev, mapping))) { put_page(pg); q->mapping_err++; @@ -722,6 +722,7 @@ static inline unsigned int flits_to_desc(unsigned int n) /** * is_eth_imm - can an Ethernet packet be sent as immediate data? * @skb: the packet + * @chip_ver: chip version * * Returns whether an Ethernet packet is small enough to fit as * immediate data. Return value corresponds to headroom required. @@ -749,6 +750,7 @@ static inline int is_eth_imm(const struct sk_buff *skb, unsigned int chip_ver) /** * calc_tx_flits - calculate the number of flits for a packet Tx WR * @skb: the packet + * @chip_ver: chip version * * Returns the number of flits needed for a Tx WR for the given Ethernet * packet, including the needed WR and CPL headers. @@ -802,19 +804,6 @@ static inline unsigned int calc_tx_flits(const struct sk_buff *skb, } /** - * calc_tx_descs - calculate the number of Tx descriptors for a packet - * @skb: the packet - * - * Returns the number of Tx descriptors needed for the given Ethernet - * packet, including the needed WR and CPL headers. - */ -static inline unsigned int calc_tx_descs(const struct sk_buff *skb, - unsigned int chip_ver) -{ - return flits_to_desc(calc_tx_flits(skb, chip_ver)); -} - -/** * cxgb4_write_sgl - populate a scatter/gather list for a packet * @skb: the packet * @q: the Tx queue we are writing into @@ -887,6 +876,114 @@ void cxgb4_write_sgl(const struct sk_buff *skb, struct sge_txq *q, } EXPORT_SYMBOL(cxgb4_write_sgl); +/* cxgb4_write_partial_sgl - populate SGL for partial packet + * @skb: the packet + * @q: the Tx queue we are writing into + * @sgl: starting location for writing the SGL + * @end: points right after the end of the SGL + * @addr: the list of bus addresses for the SGL elements + * @start: start offset in the SKB where partial data starts + * @len: length of data from @start to send out + * + * This API will handle sending out partial data of a skb if required. + * Unlike cxgb4_write_sgl, @start can be any offset into the skb data, + * and @len will decide how much data after @start offset to send out. + */ +void cxgb4_write_partial_sgl(const struct sk_buff *skb, struct sge_txq *q, + struct ulptx_sgl *sgl, u64 *end, + const dma_addr_t *addr, u32 start, u32 len) +{ + struct ulptx_sge_pair buf[MAX_SKB_FRAGS / 2 + 1] = {0}, *to; + u32 frag_size, skb_linear_data_len = skb_headlen(skb); + struct skb_shared_info *si = skb_shinfo(skb); + u8 i = 0, frag_idx = 0, nfrags = 0; + skb_frag_t *frag; + + /* Fill the first SGL either from linear data or from partial + * frag based on @start. + */ + if (unlikely(start < skb_linear_data_len)) { + frag_size = min(len, skb_linear_data_len - start); + sgl->len0 = htonl(frag_size); + sgl->addr0 = cpu_to_be64(addr[0] + start); + len -= frag_size; + nfrags++; + } else { + start -= skb_linear_data_len; + frag = &si->frags[frag_idx]; + frag_size = skb_frag_size(frag); + /* find the first frag */ + while (start >= frag_size) { + start -= frag_size; + frag_idx++; + frag = &si->frags[frag_idx]; + frag_size = skb_frag_size(frag); + } + + frag_size = min(len, skb_frag_size(frag) - start); + sgl->len0 = cpu_to_be32(frag_size); + sgl->addr0 = cpu_to_be64(addr[frag_idx + 1] + start); + len -= frag_size; + nfrags++; + frag_idx++; + } + + /* If the entire partial data fit in one SGL, then send it out + * now. + */ + if (!len) + goto done; + + /* Most of the complexity below deals with the possibility we hit the + * end of the queue in the middle of writing the SGL. For this case + * only we create the SGL in a temporary buffer and then copy it. + */ + to = (u8 *)end > (u8 *)q->stat ? buf : sgl->sge; + + /* If the skb couldn't fit in first SGL completely, fill the + * rest of the frags in subsequent SGLs. Note that each SGL + * pair can store 2 frags. + */ + while (len) { + frag_size = min(len, skb_frag_size(&si->frags[frag_idx])); + to->len[i & 1] = cpu_to_be32(frag_size); + to->addr[i & 1] = cpu_to_be64(addr[frag_idx + 1]); + if (i && (i & 1)) + to++; + nfrags++; + frag_idx++; + i++; + len -= frag_size; + } + + /* If we ended in an odd boundary, then set the second SGL's + * length in the pair to 0. + */ + if (i & 1) + to->len[1] = cpu_to_be32(0); + + /* Copy from temporary buffer to Tx ring, in case we hit the + * end of the queue in the middle of writing the SGL. + */ + if (unlikely((u8 *)end > (u8 *)q->stat)) { + u32 part0 = (u8 *)q->stat - (u8 *)sgl->sge, part1; + + if (likely(part0)) + memcpy(sgl->sge, buf, part0); + part1 = (u8 *)end - (u8 *)q->stat; + memcpy(q->desc, (u8 *)buf + part0, part1); + end = (void *)q->desc + part1; + } + + /* 0-pad to multiple of 16 */ + if ((uintptr_t)end & 8) + *end = 0; +done: + sgl->cmd_nsge = htonl(ULPTX_CMD_V(ULP_TX_SC_DSGL) | + ULPTX_NSGE_V(nfrags)); +} +EXPORT_SYMBOL(cxgb4_write_partial_sgl); + /* This function copies 64 byte coalesced work request to * memory mapped BAR2 space. For coalesced WR SGE fetches * data from the FIFO instead of from Host. @@ -1413,42 +1510,37 @@ static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev) pi = netdev_priv(dev); adap = pi->adapter; ssi = skb_shinfo(skb); -#ifdef CONFIG_CHELSIO_IPSEC_INLINE +#if IS_ENABLED(CONFIG_CHELSIO_IPSEC_INLINE) if (xfrm_offload(skb) && !ssi->gso_size) - return adap->uld[CXGB4_ULD_CRYPTO].tx_handler(skb, dev); + return adap->uld[CXGB4_ULD_IPSEC].tx_handler(skb, dev); #endif /* CHELSIO_IPSEC_INLINE */ -#ifdef CONFIG_CHELSIO_TLS_DEVICE - if (skb->decrypted) - return adap->uld[CXGB4_ULD_CRYPTO].tx_handler(skb, dev); +#if IS_ENABLED(CONFIG_CHELSIO_TLS_DEVICE) + if (tls_is_skb_tx_device_offloaded(skb) && + (skb->len - skb_tcp_all_headers(skb))) + return adap->uld[CXGB4_ULD_KTLS].tx_handler(skb, dev); #endif /* CHELSIO_TLS_DEVICE */ qidx = skb_get_queue_mapping(skb); if (ptp_enabled) { - spin_lock(&adap->ptp_lock); if (!(adap->ptp_tx_skb)) { skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; adap->ptp_tx_skb = skb_get(skb); } else { - spin_unlock(&adap->ptp_lock); goto out_free; } q = &adap->sge.ptptxq; } else { q = &adap->sge.ethtxq[qidx + pi->first_qset]; } - skb_tx_timestamp(skb); reclaim_completed_tx(adap, &q->q, -1, true); cntrl = TXPKT_L4CSUM_DIS_F | TXPKT_IPCSUM_DIS_F; #ifdef CONFIG_CHELSIO_T4_FCOE ret = cxgb_fcoe_offload(skb, adap, pi, &cntrl); - if (unlikely(ret == -ENOTSUPP)) { - if (ptp_enabled) - spin_unlock(&adap->ptp_lock); + if (unlikely(ret == -EOPNOTSUPP)) goto out_free; - } #endif /* CONFIG_CHELSIO_T4_FCOE */ chip_ver = CHELSIO_CHIP_VERSION(adap->params.chip); @@ -1461,8 +1553,6 @@ static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev) dev_err(adap->pdev_dev, "%s: Tx ring %u full while queue awake!\n", dev->name, qidx); - if (ptp_enabled) - spin_unlock(&adap->ptp_lock); return NETDEV_TX_BUSY; } @@ -1481,8 +1571,6 @@ static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev) unlikely(cxgb4_map_skb(adap->pdev_dev, skb, sgl_sdesc->addr) < 0)) { memset(sgl_sdesc->addr, 0, sizeof(sgl_sdesc->addr)); q->mapping_err++; - if (ptp_enabled) - spin_unlock(&adap->ptp_lock); goto out_free; } @@ -1497,7 +1585,8 @@ static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev) * has opened up. */ eth_txq_stop(q); - wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F; + if (chip_ver > CHELSIO_T5) + wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F; } wr = (void *)&q->q.desc[q->q.pidx]; @@ -1533,8 +1622,7 @@ static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev) if (iph->version == 4) { iph->check = 0; iph->tot_len = 0; - iph->check = (u16)(~ip_fast_csum((u8 *)iph, - iph->ihl)); + iph->check = ~ip_fast_csum((u8 *)iph, iph->ihl); } if (skb->ip_summed == CHECKSUM_PARTIAL) cntrl = hwcsum(adap->params.chip, skb); @@ -1617,6 +1705,8 @@ static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev) cpl->len = htons(skb->len); cpl->ctrl1 = cpu_to_be64(cntrl); + skb_tx_timestamp(skb); + if (immediate) { cxgb4_inline_tx_skb(skb, &q->q, sgl); dev_consume_skb_any(skb); @@ -1630,8 +1720,6 @@ static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev) txq_advance(&q->q, ndesc); cxgb4_ring_tx_db(adap, &q->q, ndesc); - if (ptp_enabled) - spin_unlock(&adap->ptp_lock); return NETDEV_TX_OK; out_free: @@ -1732,6 +1820,7 @@ static netdev_tx_t cxgb4_vf_eth_xmit(struct sk_buff *skb, struct adapter *adapter; int qidx, credits, ret; size_t fw_hdr_copy_len; + unsigned int chip_ver; u64 cntrl, *end; u32 wr_mid; @@ -1740,8 +1829,10 @@ static netdev_tx_t cxgb4_vf_eth_xmit(struct sk_buff *skb, * (including the VLAN tag) into the header so we reject anything * smaller than that ... */ - fw_hdr_copy_len = sizeof(wr->ethmacdst) + sizeof(wr->ethmacsrc) + - sizeof(wr->ethtype) + sizeof(wr->vlantci); + BUILD_BUG_ON(sizeof(wr->firmware) != + (sizeof(wr->ethmacdst) + sizeof(wr->ethmacsrc) + + sizeof(wr->ethtype) + sizeof(wr->vlantci))); + fw_hdr_copy_len = sizeof(wr->firmware); ret = cxgb4_validate_skb(skb, dev, fw_hdr_copy_len); if (ret) goto out_free; @@ -1796,6 +1887,7 @@ static netdev_tx_t cxgb4_vf_eth_xmit(struct sk_buff *skb, goto out_free; } + chip_ver = CHELSIO_CHIP_VERSION(adapter->params.chip); wr_mid = FW_WR_LEN16_V(DIV_ROUND_UP(flits, 2)); if (unlikely(credits < ETHTXQ_STOP_THRES)) { /* After we're done injecting the Work Request for this @@ -1807,7 +1899,8 @@ static netdev_tx_t cxgb4_vf_eth_xmit(struct sk_buff *skb, * has opened up. */ eth_txq_stop(txq); - wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F; + if (chip_ver > CHELSIO_T5) + wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F; } /* Start filling in our Work Request. Note that we do _not_ handle @@ -1820,7 +1913,7 @@ static netdev_tx_t cxgb4_vf_eth_xmit(struct sk_buff *skb, wr->equiq_to_len16 = cpu_to_be32(wr_mid); wr->r3[0] = cpu_to_be32(0); wr->r3[1] = cpu_to_be32(0); - skb_copy_from_linear_data(skb, (void *)wr->ethmacdst, fw_hdr_copy_len); + skb_copy_from_linear_data(skb, &wr->firmware, fw_hdr_copy_len); end = (u64 *)wr + flits; /* If this is a Large Send Offload packet we'll put in an LSO CPL @@ -1860,7 +1953,7 @@ static netdev_tx_t cxgb4_vf_eth_xmit(struct sk_buff *skb, */ cpl = (void *)(lso + 1); - if (CHELSIO_CHIP_VERSION(adapter->params.chip) <= CHELSIO_T5) + if (chip_ver <= CHELSIO_T5) cntrl = TXPKT_ETHHDR_LEN_V(eth_xtra_len); else cntrl = T6_TXPKT_ETHHDR_LEN_V(eth_xtra_len); @@ -2176,7 +2269,6 @@ static int ethofld_hard_xmit(struct net_device *dev, d = &eosw_txq->desc[eosw_txq->last_pidx]; skb = d->skb; - skb_tx_timestamp(skb); wr = (struct fw_eth_tx_eo_wr *)&eohw_txq->q.desc[eohw_txq->q.pidx]; if (unlikely(eosw_txq->state != CXGB4_EO_STATE_ACTIVE && @@ -2281,6 +2373,7 @@ write_wr_headers: eohw_txq->vlan_ins++; txq_advance(&eohw_txq->q, ndesc); + skb_tx_timestamp(skb); cxgb4_ring_tx_db(adap, &eohw_txq->q, ndesc); eosw_txq_advance_index(&eosw_txq->last_pidx, 1, eosw_txq->ndesc); @@ -2377,6 +2470,16 @@ netdev_tx_t t4_start_xmit(struct sk_buff *skb, struct net_device *dev) if (unlikely(qid >= pi->nqsets)) return cxgb4_ethofld_xmit(skb, dev); + if (is_ptp_enabled(skb, dev)) { + struct adapter *adap = netdev2adap(dev); + netdev_tx_t ret; + + spin_lock(&adap->ptp_lock); + ret = cxgb4_eth_xmit(skb, dev); + spin_unlock(&adap->ptp_lock); + return ret; + } + return cxgb4_eth_xmit(skb, dev); } @@ -2410,9 +2513,9 @@ static void eosw_txq_flush_pending_skbs(struct sge_eosw_txq *eosw_txq) /** * cxgb4_ethofld_send_flowc - Send ETHOFLD flowc request to bind eotid to tc. - * @dev - netdevice - * @eotid - ETHOFLD tid to bind/unbind - * @tc - traffic class. If set to FW_SCHED_CLS_NONE, then unbinds the @eotid + * @dev: netdevice + * @eotid: ETHOFLD tid to bind/unbind + * @tc: traffic class. If set to FW_SCHED_CLS_NONE, then unbinds the @eotid * * Send a FLOWC work request to bind an ETHOFLD TID to a traffic class. * If @tc is set to FW_SCHED_CLS_NONE, then the @eotid is unbound from @@ -2431,7 +2534,7 @@ int cxgb4_ethofld_send_flowc(struct net_device *dev, u32 eotid, u32 tc) struct sk_buff *skb; int ret = 0; - len = sizeof(*flowc) + sizeof(struct fw_flowc_mnemval) * nparams; + len = struct_size(flowc, mnemval, nparams); len16 = DIV_ROUND_UP(len, 16); entry = cxgb4_lookup_eotid(&adap->tids, eotid); @@ -2442,6 +2545,12 @@ int cxgb4_ethofld_send_flowc(struct net_device *dev, u32 eotid, u32 tc) if (!eosw_txq) return -ENOMEM; + if (!(adap->flags & CXGB4_FW_OK)) { + /* Don't stall caller when access to FW is lost */ + complete(&eosw_txq->completion); + return -EIO; + } + skb = alloc_skb(len, GFP_KERNEL); if (!skb) return -ENOMEM; @@ -2449,12 +2558,12 @@ int cxgb4_ethofld_send_flowc(struct net_device *dev, u32 eotid, u32 tc) spin_lock_bh(&eosw_txq->lock); if (tc != FW_SCHED_CLS_NONE) { if (eosw_txq->state != CXGB4_EO_STATE_CLOSED) - goto out_unlock; + goto out_free_skb; next_state = CXGB4_EO_STATE_FLOWC_OPEN_SEND; } else { if (eosw_txq->state != CXGB4_EO_STATE_ACTIVE) - goto out_unlock; + goto out_free_skb; next_state = CXGB4_EO_STATE_FLOWC_CLOSE_SEND; } @@ -2490,17 +2599,19 @@ int cxgb4_ethofld_send_flowc(struct net_device *dev, u32 eotid, u32 tc) eosw_txq_flush_pending_skbs(eosw_txq); ret = eosw_txq_enqueue(eosw_txq, skb); - if (ret) { - dev_consume_skb_any(skb); - goto out_unlock; - } + if (ret) + goto out_free_skb; eosw_txq->state = next_state; eosw_txq->flowc_idx = eosw_txq->pidx; eosw_txq_advance(eosw_txq, 1); ethofld_xmit(dev, eosw_txq); -out_unlock: + spin_unlock_bh(&eosw_txq->lock); + return 0; + +out_free_skb: + dev_consume_skb_any(skb); spin_unlock_bh(&eosw_txq->lock); return ret; } @@ -2536,6 +2647,84 @@ static void ctrlq_check_stop(struct sge_ctrl_txq *q, struct fw_wr_hdr *wr) } } +#define CXGB4_SELFTEST_LB_STR "CHELSIO_SELFTEST" + +int cxgb4_selftest_lb_pkt(struct net_device *netdev) +{ + struct port_info *pi = netdev_priv(netdev); + struct adapter *adap = pi->adapter; + struct cxgb4_ethtool_lb_test *lb; + int ret, i = 0, pkt_len, credits; + struct fw_eth_tx_pkt_wr *wr; + struct cpl_tx_pkt_core *cpl; + u32 ctrl0, ndesc, flits; + struct sge_eth_txq *q; + u8 *sgl; + + pkt_len = ETH_HLEN + sizeof(CXGB4_SELFTEST_LB_STR); + + flits = DIV_ROUND_UP(pkt_len + sizeof(*cpl) + sizeof(*wr), + sizeof(__be64)); + ndesc = flits_to_desc(flits); + + lb = &pi->ethtool_lb; + lb->loopback = 1; + + q = &adap->sge.ethtxq[pi->first_qset]; + __netif_tx_lock_bh(q->txq); + + reclaim_completed_tx(adap, &q->q, -1, true); + credits = txq_avail(&q->q) - ndesc; + if (unlikely(credits < 0)) { + __netif_tx_unlock_bh(q->txq); + return -ENOMEM; + } + + wr = (void *)&q->q.desc[q->q.pidx]; + memset(wr, 0, sizeof(struct tx_desc)); + + wr->op_immdlen = htonl(FW_WR_OP_V(FW_ETH_TX_PKT_WR) | + FW_WR_IMMDLEN_V(pkt_len + + sizeof(*cpl))); + wr->equiq_to_len16 = htonl(FW_WR_LEN16_V(DIV_ROUND_UP(flits, 2))); + wr->r3 = cpu_to_be64(0); + + cpl = (void *)(wr + 1); + sgl = (u8 *)(cpl + 1); + + ctrl0 = TXPKT_OPCODE_V(CPL_TX_PKT_XT) | TXPKT_PF_V(adap->pf) | + TXPKT_INTF_V(pi->tx_chan + 4); + + cpl->ctrl0 = htonl(ctrl0); + cpl->pack = htons(0); + cpl->len = htons(pkt_len); + cpl->ctrl1 = cpu_to_be64(TXPKT_L4CSUM_DIS_F | TXPKT_IPCSUM_DIS_F); + + eth_broadcast_addr(sgl); + i += ETH_ALEN; + ether_addr_copy(&sgl[i], netdev->dev_addr); + i += ETH_ALEN; + + snprintf(&sgl[i], sizeof(CXGB4_SELFTEST_LB_STR), "%s", + CXGB4_SELFTEST_LB_STR); + + init_completion(&lb->completion); + txq_advance(&q->q, ndesc); + cxgb4_ring_tx_db(adap, &q->q, ndesc); + __netif_tx_unlock_bh(q->txq); + + /* wait for the pkt to return */ + ret = wait_for_completion_timeout(&lb->completion, 10 * HZ); + if (!ret) + ret = -ETIMEDOUT; + else + ret = lb->result; + + lb->loopback = 0; + + return ret; +} + /** * ctrl_xmit - send a packet through an SGE control Tx queue * @q: the control queue @@ -2581,15 +2770,15 @@ static int ctrl_xmit(struct sge_ctrl_txq *q, struct sk_buff *skb) /** * restart_ctrlq - restart a suspended control queue - * @data: the control queue to restart + * @t: pointer to the tasklet associated with this handler * * Resumes transmission on a suspended Tx control queue. */ -static void restart_ctrlq(unsigned long data) +static void restart_ctrlq(struct tasklet_struct *t) { struct sk_buff *skb; unsigned int written = 0; - struct sge_ctrl_txq *q = (struct sge_ctrl_txq *)data; + struct sge_ctrl_txq *q = from_tasklet(q, t, qresume_tsk); spin_lock(&q->sendq.lock); reclaim_completed_tx_imm(&q->q); @@ -2654,17 +2843,22 @@ int t4_mgmt_tx(struct adapter *adap, struct sk_buff *skb) * @skb: the packet * * Returns true if a packet can be sent as an offload WR with immediate - * data. We currently use the same limit as for Ethernet packets. + * data. + * FW_OFLD_TX_DATA_WR limits the payload to 255 bytes due to 8-bit field. + * However, FW_ULPTX_WR commands have a 256 byte immediate only + * payload limit. */ static inline int is_ofld_imm(const struct sk_buff *skb) { struct work_request_hdr *req = (struct work_request_hdr *)skb->data; unsigned long opcode = FW_WR_OP_G(ntohl(req->wr_hi)); - if (opcode == FW_CRYPTO_LOOKASIDE_WR) + if (unlikely(opcode == FW_ULPTX_WR)) + return skb->len <= MAX_IMM_ULPTX_WR_LEN; + else if (opcode == FW_CRYPTO_LOOKASIDE_WR) return skb->len <= SGE_MAX_WR_LEN; else - return skb->len <= MAX_IMM_TX_PKT_LEN; + return skb->len <= MAX_IMM_OFLD_TX_DATA_WR_LEN; } /** @@ -2691,7 +2885,6 @@ static inline unsigned int calc_tx_flits_ofld(const struct sk_buff *skb) /** * txq_stop_maperr - stop a Tx queue due to I/O MMU exhaustion - * @adap: the adapter * @q: the queue to stop * * Mark a Tx queue stopped due to I/O MMU exhaustion and resulting @@ -2883,13 +3076,13 @@ static int ofld_xmit(struct sge_uld_txq *q, struct sk_buff *skb) /** * restart_ofldq - restart a suspended offload queue - * @data: the offload queue to restart + * @t: pointer to the tasklet associated with this handler * * Resumes transmission on a suspended Tx offload queue. */ -static void restart_ofldq(unsigned long data) +static void restart_ofldq(struct tasklet_struct *t) { - struct sge_uld_txq *q = (struct sge_uld_txq *)data; + struct sge_uld_txq *q = from_tasklet(q, t, qresume_tsk); spin_lock(&q->sendq.lock); q->full = 0; /* the queue actually is completely empty now */ @@ -2938,6 +3131,7 @@ static inline int uld_send(struct adapter *adap, struct sk_buff *skb, txq_info = adap->sge.uld_txq_info[tx_uld_type]; if (unlikely(!txq_info)) { WARN_ON(true); + kfree_skb(skb); return NET_XMIT_DROP; } @@ -3286,7 +3480,7 @@ enum { /** * t4_systim_to_hwstamp - read hardware time stamp - * @adap: the adapter + * @adapter: the adapter * @skb: the packet * * Read Time Stamp from MPS packet and insert in skb which @@ -3313,15 +3507,16 @@ static noinline int t4_systim_to_hwstamp(struct adapter *adapter, hwtstamps = skb_hwtstamps(skb); memset(hwtstamps, 0, sizeof(*hwtstamps)); - hwtstamps->hwtstamp = ns_to_ktime(be64_to_cpu(*((u64 *)data))); + hwtstamps->hwtstamp = ns_to_ktime(get_unaligned_be64(data)); return RX_PTP_PKT_SUC; } /** * t4_rx_hststamp - Recv PTP Event Message - * @adap: the adapter + * @adapter: the adapter * @rsp: the response queue descriptor holding the RX_PKT message + * @rxq: the response queue holding the RX_PKT message * @skb: the packet * * PTP enabled and MPS packet, read HW timestamp @@ -3345,7 +3540,7 @@ static int t4_rx_hststamp(struct adapter *adapter, const __be64 *rsp, /** * t4_tx_hststamp - Loopback PTP Transmit Event Message - * @adap: the adapter + * @adapter: the adapter * @skb: the packet * @dev: the ingress net device * @@ -3409,9 +3604,53 @@ static void t4_tx_completion_handler(struct sge_rspq *rspq, } txq = &s->ethtxq[pi->first_qset + rspq->idx]; + + /* We've got the Hardware Consumer Index Update in the Egress Update + * message. These Egress Update messages will be our sole CIDX Updates + * we get since we don't want to chew up PCIe bandwidth for both Ingress + * Messages and Status Page writes. However, The code which manages + * reclaiming successfully DMA'ed TX Work Requests uses the CIDX value + * stored in the Status Page at the end of the TX Queue. It's easiest + * to simply copy the CIDX Update value from the Egress Update message + * to the Status Page. Also note that no Endian issues need to be + * considered here since both are Big Endian and we're just copying + * bytes consistently ... + */ + if (CHELSIO_CHIP_VERSION(adapter->params.chip) <= CHELSIO_T5) { + struct cpl_sge_egr_update *egr; + + egr = (struct cpl_sge_egr_update *)rsp; + WRITE_ONCE(txq->q.stat->cidx, egr->cidx); + } + t4_sge_eth_txq_egress_update(adapter, txq, -1); } +static int cxgb4_validate_lb_pkt(struct port_info *pi, const struct pkt_gl *si) +{ + struct adapter *adap = pi->adapter; + struct cxgb4_ethtool_lb_test *lb; + struct sge *s = &adap->sge; + struct net_device *netdev; + u8 *data; + int i; + + netdev = adap->port[pi->port_id]; + lb = &pi->ethtool_lb; + data = si->va + s->pktshift; + + i = ETH_ALEN; + if (!ether_addr_equal(data + i, netdev->dev_addr)) + return -1; + + i += ETH_ALEN; + if (strcmp(&data[i], CXGB4_SELFTEST_LB_STR)) + lb->result = -EIO; + + complete(&lb->completion); + return 0; +} + /** * t4_ethrx_handler - process an ingress ethernet packet * @q: the response queue that received the packet @@ -3435,6 +3674,7 @@ int t4_ethrx_handler(struct sge_rspq *q, const __be64 *rsp, struct port_info *pi; int ret = 0; + pi = netdev_priv(q->netdev); /* If we're looking at TX Queue CIDX Update, handle that separately * and return. */ @@ -3462,6 +3702,12 @@ int t4_ethrx_handler(struct sge_rspq *q, const __be64 *rsp, if (err_vec) rxq->stats.bad_rx_pkts++; + if (unlikely(pi->ethtool_lb.loopback && pkt->iff >= NCHAN)) { + ret = cxgb4_validate_lb_pkt(pi, si); + if (!ret) + return 0; + } + if (((pkt->l2info & htonl(RXF_TCP_F)) || tnl_hdr_len) && (q->netdev->features & NETIF_F_GRO) && csum_ok && !pkt->ip_frag) { @@ -3475,7 +3721,6 @@ int t4_ethrx_handler(struct sge_rspq *q, const __be64 *rsp, rxq->stats.rx_drops++; return 0; } - pi = netdev_priv(q->netdev); /* Handle PTP Event Rx packet */ if (unlikely(pi->ptp_enable)) { @@ -3776,9 +4021,10 @@ static int napi_rx_handler(struct napi_struct *napi, int budget) return work_done; } -void cxgb4_ethofld_restart(unsigned long data) +void cxgb4_ethofld_restart(struct tasklet_struct *t) { - struct sge_eosw_txq *eosw_txq = (struct sge_eosw_txq *)data; + struct sge_eosw_txq *eosw_txq = from_tasklet(eosw_txq, t, + qresume_tsk); int pktcount; spin_lock(&eosw_txq->lock); @@ -3988,7 +4234,7 @@ static void sge_rx_timer_cb(struct timer_list *t) { unsigned long m; unsigned int i; - struct adapter *adap = from_timer(adap, t, sge.rx_timer); + struct adapter *adap = timer_container_of(adap, t, sge.rx_timer); struct sge *s = &adap->sge; for (i = 0; i < BITS_TO_LONGS(s->egr_sz); i++) @@ -4002,7 +4248,7 @@ static void sge_rx_timer_cb(struct timer_list *t) if (fl_starving(adap, fl)) { rxq = container_of(fl, struct sge_eth_rxq, fl); - if (napi_reschedule(&rxq->rspq.napi)) + if (napi_schedule(&rxq->rspq.napi)) fl->starving++; else set_bit(id, s->starving_fl); @@ -4023,7 +4269,7 @@ done: static void sge_tx_timer_cb(struct timer_list *t) { - struct adapter *adap = from_timer(adap, t, sge.tx_timer); + struct adapter *adap = timer_container_of(adap, t, sge.tx_timer); struct sge *s = &adap->sge; unsigned long m, period; unsigned int i, budget; @@ -4208,7 +4454,7 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, if (ret) goto err; - netif_napi_add(dev, &iq->napi, napi_rx_handler, 64); + netif_napi_add(dev, &iq->napi, napi_rx_handler); iq->cur_desc = iq->desc; iq->cidx = 0; iq->gen = 1; @@ -4362,11 +4608,15 @@ int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq, * write the CIDX Updates into the Status Page at the end of the * TX Queue. */ - c.autoequiqe_to_viid = htonl(FW_EQ_ETH_CMD_AUTOEQUEQE_F | + c.autoequiqe_to_viid = htonl(((chip_ver <= CHELSIO_T5) ? + FW_EQ_ETH_CMD_AUTOEQUIQE_F : + FW_EQ_ETH_CMD_AUTOEQUEQE_F) | FW_EQ_ETH_CMD_VIID_V(pi->viid)); c.fetchszm_to_iqid = - htonl(FW_EQ_ETH_CMD_HOSTFCMODE_V(HOSTFCMODE_STATUS_PAGE_X) | + htonl(FW_EQ_ETH_CMD_HOSTFCMODE_V((chip_ver <= CHELSIO_T5) ? + HOSTFCMODE_INGRESS_QUEUE_X : + HOSTFCMODE_STATUS_PAGE_X) | FW_EQ_ETH_CMD_PCIECHN_V(pi->tx_chan) | FW_EQ_ETH_CMD_FETCHRO_F | FW_EQ_ETH_CMD_IQID_V(iqid)); @@ -4377,6 +4627,7 @@ int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq, : FETCHBURSTMIN_64B_T6_X) | FW_EQ_ETH_CMD_FBMAX_V(FETCHBURSTMAX_512B_X) | FW_EQ_ETH_CMD_CIDXFTHRESH_V(CIDXFLUSHTHRESH_32_X) | + FW_EQ_ETH_CMD_CIDXFTHRESHO_V(chip_ver == CHELSIO_T5) | FW_EQ_ETH_CMD_EQSIZE_V(nentries)); c.eqaddr = cpu_to_be64(txq->q.phys_addr); @@ -4469,7 +4720,7 @@ int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq, init_txq(adap, &txq->q, FW_EQ_CTRL_CMD_EQID_G(ntohl(c.cmpliqid_eqid))); txq->adap = adap; skb_queue_head_init(&txq->sendq); - tasklet_init(&txq->qresume_tsk, restart_ctrlq, (unsigned long)txq); + tasklet_setup(&txq->qresume_tsk, restart_ctrlq); txq->full = 0; return 0; } @@ -4559,7 +4810,7 @@ int t4_sge_alloc_uld_txq(struct adapter *adap, struct sge_uld_txq *txq, txq->q.q_type = CXGB4_TXQ_ULD; txq->adap = adap; skb_queue_head_init(&txq->sendq); - tasklet_init(&txq->qresume_tsk, restart_ofldq, (unsigned long)txq); + tasklet_setup(&txq->qresume_tsk, restart_ofldq); txq->full = 0; txq->mapping_err = 0; return 0; @@ -4624,22 +4875,6 @@ void free_rspq_fl(struct adapter *adap, struct sge_rspq *rq, } } -/** - * t4_free_ofld_rxqs - free a block of consecutive Rx queues - * @adap: the adapter - * @n: number of queues - * @q: pointer to first queue - * - * Release the resources of a consecutive block of offload Rx queues. - */ -void t4_free_ofld_rxqs(struct adapter *adap, int n, struct sge_ofld_rxq *q) -{ - for ( ; n; n--, q++) - if (q->rspq.desc) - free_rspq_fl(adap, &q->rspq, - q->fl.size ? &q->fl : NULL); -} - void t4_sge_free_ethofld_txq(struct adapter *adap, struct sge_eohw_txq *txq) { if (txq->q.desc) { @@ -4761,13 +4996,10 @@ void t4_sge_stop(struct adapter *adap) int i; struct sge *s = &adap->sge; - if (in_interrupt()) /* actions below require waiting */ - return; - if (s->rx_timer.function) - del_timer_sync(&s->rx_timer); + timer_delete_sync(&s->rx_timer); if (s->tx_timer.function) - del_timer_sync(&s->tx_timer); + timer_delete_sync(&s->tx_timer); if (is_offload(adap)) { struct sge_uld_txq_info *txq_info; |
