summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/chelsio/cxgb4/sge.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/ethernet/chelsio/cxgb4/sge.c')
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/sge.c438
1 files changed, 335 insertions, 103 deletions
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index 1359158652b7..9fccb8ea9bcd 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -163,7 +163,7 @@ static inline unsigned int fl_mtu_bufsize(struct adapter *adapter,
* for DMA, but this is of course never sent to the hardware and is only used
* to prevent double unmappings. All of the above requires that the Free List
* Buffers which we allocate have the bottom 5 bits free (0) -- i.e. are
- * 32-byte or or a power of 2 greater in alignment. Since the SGE's minimal
+ * 32-byte or a power of 2 greater in alignment. Since the SGE's minimal
* Free List Buffer alignment is 32 bytes, this works out for us ...
*/
enum {
@@ -302,7 +302,7 @@ static void deferred_unmap_destructor(struct sk_buff *skb)
/**
* free_tx_desc - reclaims Tx descriptors and their buffers
- * @adapter: the adapter
+ * @adap: the adapter
* @q: the Tx queue to reclaim descriptors from
* @n: the number of descriptors to reclaim
* @unmap: whether the buffers should be unmapped for DMA
@@ -443,7 +443,7 @@ static void free_rx_bufs(struct adapter *adap, struct sge_fl *q, int n)
if (is_buf_mapped(d))
dma_unmap_page(adap->pdev_dev, get_buf_addr(d),
get_buf_size(adap, d),
- PCI_DMA_FROMDEVICE);
+ DMA_FROM_DEVICE);
put_page(d->page);
d->page = NULL;
if (++q->cidx == q->size)
@@ -469,7 +469,7 @@ static void unmap_rx_buf(struct adapter *adap, struct sge_fl *q)
if (is_buf_mapped(d))
dma_unmap_page(adap->pdev_dev, get_buf_addr(d),
- get_buf_size(adap, d), PCI_DMA_FROMDEVICE);
+ get_buf_size(adap, d), DMA_FROM_DEVICE);
d->page = NULL;
if (++q->cidx == q->size)
q->cidx = 0;
@@ -566,7 +566,7 @@ static unsigned int refill_fl(struct adapter *adap, struct sge_fl *q, int n,
mapping = dma_map_page(adap->pdev_dev, pg, 0,
PAGE_SIZE << s->fl_pg_order,
- PCI_DMA_FROMDEVICE);
+ DMA_FROM_DEVICE);
if (unlikely(dma_mapping_error(adap->pdev_dev, mapping))) {
__free_pages(pg, s->fl_pg_order);
q->mapping_err++;
@@ -596,7 +596,7 @@ alloc_small_pages:
}
mapping = dma_map_page(adap->pdev_dev, pg, 0, PAGE_SIZE,
- PCI_DMA_FROMDEVICE);
+ DMA_FROM_DEVICE);
if (unlikely(dma_mapping_error(adap->pdev_dev, mapping))) {
put_page(pg);
q->mapping_err++;
@@ -722,6 +722,7 @@ static inline unsigned int flits_to_desc(unsigned int n)
/**
* is_eth_imm - can an Ethernet packet be sent as immediate data?
* @skb: the packet
+ * @chip_ver: chip version
*
* Returns whether an Ethernet packet is small enough to fit as
* immediate data. Return value corresponds to headroom required.
@@ -749,6 +750,7 @@ static inline int is_eth_imm(const struct sk_buff *skb, unsigned int chip_ver)
/**
* calc_tx_flits - calculate the number of flits for a packet Tx WR
* @skb: the packet
+ * @chip_ver: chip version
*
* Returns the number of flits needed for a Tx WR for the given Ethernet
* packet, including the needed WR and CPL headers.
@@ -802,19 +804,6 @@ static inline unsigned int calc_tx_flits(const struct sk_buff *skb,
}
/**
- * calc_tx_descs - calculate the number of Tx descriptors for a packet
- * @skb: the packet
- *
- * Returns the number of Tx descriptors needed for the given Ethernet
- * packet, including the needed WR and CPL headers.
- */
-static inline unsigned int calc_tx_descs(const struct sk_buff *skb,
- unsigned int chip_ver)
-{
- return flits_to_desc(calc_tx_flits(skb, chip_ver));
-}
-
-/**
* cxgb4_write_sgl - populate a scatter/gather list for a packet
* @skb: the packet
* @q: the Tx queue we are writing into
@@ -887,6 +876,114 @@ void cxgb4_write_sgl(const struct sk_buff *skb, struct sge_txq *q,
}
EXPORT_SYMBOL(cxgb4_write_sgl);
+/* cxgb4_write_partial_sgl - populate SGL for partial packet
+ * @skb: the packet
+ * @q: the Tx queue we are writing into
+ * @sgl: starting location for writing the SGL
+ * @end: points right after the end of the SGL
+ * @addr: the list of bus addresses for the SGL elements
+ * @start: start offset in the SKB where partial data starts
+ * @len: length of data from @start to send out
+ *
+ * This API will handle sending out partial data of a skb if required.
+ * Unlike cxgb4_write_sgl, @start can be any offset into the skb data,
+ * and @len will decide how much data after @start offset to send out.
+ */
+void cxgb4_write_partial_sgl(const struct sk_buff *skb, struct sge_txq *q,
+ struct ulptx_sgl *sgl, u64 *end,
+ const dma_addr_t *addr, u32 start, u32 len)
+{
+ struct ulptx_sge_pair buf[MAX_SKB_FRAGS / 2 + 1] = {0}, *to;
+ u32 frag_size, skb_linear_data_len = skb_headlen(skb);
+ struct skb_shared_info *si = skb_shinfo(skb);
+ u8 i = 0, frag_idx = 0, nfrags = 0;
+ skb_frag_t *frag;
+
+ /* Fill the first SGL either from linear data or from partial
+ * frag based on @start.
+ */
+ if (unlikely(start < skb_linear_data_len)) {
+ frag_size = min(len, skb_linear_data_len - start);
+ sgl->len0 = htonl(frag_size);
+ sgl->addr0 = cpu_to_be64(addr[0] + start);
+ len -= frag_size;
+ nfrags++;
+ } else {
+ start -= skb_linear_data_len;
+ frag = &si->frags[frag_idx];
+ frag_size = skb_frag_size(frag);
+ /* find the first frag */
+ while (start >= frag_size) {
+ start -= frag_size;
+ frag_idx++;
+ frag = &si->frags[frag_idx];
+ frag_size = skb_frag_size(frag);
+ }
+
+ frag_size = min(len, skb_frag_size(frag) - start);
+ sgl->len0 = cpu_to_be32(frag_size);
+ sgl->addr0 = cpu_to_be64(addr[frag_idx + 1] + start);
+ len -= frag_size;
+ nfrags++;
+ frag_idx++;
+ }
+
+ /* If the entire partial data fit in one SGL, then send it out
+ * now.
+ */
+ if (!len)
+ goto done;
+
+ /* Most of the complexity below deals with the possibility we hit the
+ * end of the queue in the middle of writing the SGL. For this case
+ * only we create the SGL in a temporary buffer and then copy it.
+ */
+ to = (u8 *)end > (u8 *)q->stat ? buf : sgl->sge;
+
+ /* If the skb couldn't fit in first SGL completely, fill the
+ * rest of the frags in subsequent SGLs. Note that each SGL
+ * pair can store 2 frags.
+ */
+ while (len) {
+ frag_size = min(len, skb_frag_size(&si->frags[frag_idx]));
+ to->len[i & 1] = cpu_to_be32(frag_size);
+ to->addr[i & 1] = cpu_to_be64(addr[frag_idx + 1]);
+ if (i && (i & 1))
+ to++;
+ nfrags++;
+ frag_idx++;
+ i++;
+ len -= frag_size;
+ }
+
+ /* If we ended in an odd boundary, then set the second SGL's
+ * length in the pair to 0.
+ */
+ if (i & 1)
+ to->len[1] = cpu_to_be32(0);
+
+ /* Copy from temporary buffer to Tx ring, in case we hit the
+ * end of the queue in the middle of writing the SGL.
+ */
+ if (unlikely((u8 *)end > (u8 *)q->stat)) {
+ u32 part0 = (u8 *)q->stat - (u8 *)sgl->sge, part1;
+
+ if (likely(part0))
+ memcpy(sgl->sge, buf, part0);
+ part1 = (u8 *)end - (u8 *)q->stat;
+ memcpy(q->desc, (u8 *)buf + part0, part1);
+ end = (void *)q->desc + part1;
+ }
+
+ /* 0-pad to multiple of 16 */
+ if ((uintptr_t)end & 8)
+ *end = 0;
+done:
+ sgl->cmd_nsge = htonl(ULPTX_CMD_V(ULP_TX_SC_DSGL) |
+ ULPTX_NSGE_V(nfrags));
+}
+EXPORT_SYMBOL(cxgb4_write_partial_sgl);
+
/* This function copies 64 byte coalesced work request to
* memory mapped BAR2 space. For coalesced WR SGE fetches
* data from the FIFO instead of from Host.
@@ -1413,42 +1510,37 @@ static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev)
pi = netdev_priv(dev);
adap = pi->adapter;
ssi = skb_shinfo(skb);
-#ifdef CONFIG_CHELSIO_IPSEC_INLINE
+#if IS_ENABLED(CONFIG_CHELSIO_IPSEC_INLINE)
if (xfrm_offload(skb) && !ssi->gso_size)
- return adap->uld[CXGB4_ULD_CRYPTO].tx_handler(skb, dev);
+ return adap->uld[CXGB4_ULD_IPSEC].tx_handler(skb, dev);
#endif /* CHELSIO_IPSEC_INLINE */
-#ifdef CONFIG_CHELSIO_TLS_DEVICE
- if (skb->decrypted)
- return adap->uld[CXGB4_ULD_CRYPTO].tx_handler(skb, dev);
+#if IS_ENABLED(CONFIG_CHELSIO_TLS_DEVICE)
+ if (tls_is_skb_tx_device_offloaded(skb) &&
+ (skb->len - skb_tcp_all_headers(skb)))
+ return adap->uld[CXGB4_ULD_KTLS].tx_handler(skb, dev);
#endif /* CHELSIO_TLS_DEVICE */
qidx = skb_get_queue_mapping(skb);
if (ptp_enabled) {
- spin_lock(&adap->ptp_lock);
if (!(adap->ptp_tx_skb)) {
skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
adap->ptp_tx_skb = skb_get(skb);
} else {
- spin_unlock(&adap->ptp_lock);
goto out_free;
}
q = &adap->sge.ptptxq;
} else {
q = &adap->sge.ethtxq[qidx + pi->first_qset];
}
- skb_tx_timestamp(skb);
reclaim_completed_tx(adap, &q->q, -1, true);
cntrl = TXPKT_L4CSUM_DIS_F | TXPKT_IPCSUM_DIS_F;
#ifdef CONFIG_CHELSIO_T4_FCOE
ret = cxgb_fcoe_offload(skb, adap, pi, &cntrl);
- if (unlikely(ret == -ENOTSUPP)) {
- if (ptp_enabled)
- spin_unlock(&adap->ptp_lock);
+ if (unlikely(ret == -EOPNOTSUPP))
goto out_free;
- }
#endif /* CONFIG_CHELSIO_T4_FCOE */
chip_ver = CHELSIO_CHIP_VERSION(adap->params.chip);
@@ -1461,8 +1553,6 @@ static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev)
dev_err(adap->pdev_dev,
"%s: Tx ring %u full while queue awake!\n",
dev->name, qidx);
- if (ptp_enabled)
- spin_unlock(&adap->ptp_lock);
return NETDEV_TX_BUSY;
}
@@ -1481,8 +1571,6 @@ static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev)
unlikely(cxgb4_map_skb(adap->pdev_dev, skb, sgl_sdesc->addr) < 0)) {
memset(sgl_sdesc->addr, 0, sizeof(sgl_sdesc->addr));
q->mapping_err++;
- if (ptp_enabled)
- spin_unlock(&adap->ptp_lock);
goto out_free;
}
@@ -1497,7 +1585,8 @@ static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev)
* has opened up.
*/
eth_txq_stop(q);
- wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F;
+ if (chip_ver > CHELSIO_T5)
+ wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F;
}
wr = (void *)&q->q.desc[q->q.pidx];
@@ -1533,8 +1622,7 @@ static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev)
if (iph->version == 4) {
iph->check = 0;
iph->tot_len = 0;
- iph->check = (u16)(~ip_fast_csum((u8 *)iph,
- iph->ihl));
+ iph->check = ~ip_fast_csum((u8 *)iph, iph->ihl);
}
if (skb->ip_summed == CHECKSUM_PARTIAL)
cntrl = hwcsum(adap->params.chip, skb);
@@ -1617,6 +1705,8 @@ static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev)
cpl->len = htons(skb->len);
cpl->ctrl1 = cpu_to_be64(cntrl);
+ skb_tx_timestamp(skb);
+
if (immediate) {
cxgb4_inline_tx_skb(skb, &q->q, sgl);
dev_consume_skb_any(skb);
@@ -1630,8 +1720,6 @@ static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev)
txq_advance(&q->q, ndesc);
cxgb4_ring_tx_db(adap, &q->q, ndesc);
- if (ptp_enabled)
- spin_unlock(&adap->ptp_lock);
return NETDEV_TX_OK;
out_free:
@@ -1732,6 +1820,7 @@ static netdev_tx_t cxgb4_vf_eth_xmit(struct sk_buff *skb,
struct adapter *adapter;
int qidx, credits, ret;
size_t fw_hdr_copy_len;
+ unsigned int chip_ver;
u64 cntrl, *end;
u32 wr_mid;
@@ -1740,8 +1829,10 @@ static netdev_tx_t cxgb4_vf_eth_xmit(struct sk_buff *skb,
* (including the VLAN tag) into the header so we reject anything
* smaller than that ...
*/
- fw_hdr_copy_len = sizeof(wr->ethmacdst) + sizeof(wr->ethmacsrc) +
- sizeof(wr->ethtype) + sizeof(wr->vlantci);
+ BUILD_BUG_ON(sizeof(wr->firmware) !=
+ (sizeof(wr->ethmacdst) + sizeof(wr->ethmacsrc) +
+ sizeof(wr->ethtype) + sizeof(wr->vlantci)));
+ fw_hdr_copy_len = sizeof(wr->firmware);
ret = cxgb4_validate_skb(skb, dev, fw_hdr_copy_len);
if (ret)
goto out_free;
@@ -1796,6 +1887,7 @@ static netdev_tx_t cxgb4_vf_eth_xmit(struct sk_buff *skb,
goto out_free;
}
+ chip_ver = CHELSIO_CHIP_VERSION(adapter->params.chip);
wr_mid = FW_WR_LEN16_V(DIV_ROUND_UP(flits, 2));
if (unlikely(credits < ETHTXQ_STOP_THRES)) {
/* After we're done injecting the Work Request for this
@@ -1807,7 +1899,8 @@ static netdev_tx_t cxgb4_vf_eth_xmit(struct sk_buff *skb,
* has opened up.
*/
eth_txq_stop(txq);
- wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F;
+ if (chip_ver > CHELSIO_T5)
+ wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F;
}
/* Start filling in our Work Request. Note that we do _not_ handle
@@ -1820,7 +1913,7 @@ static netdev_tx_t cxgb4_vf_eth_xmit(struct sk_buff *skb,
wr->equiq_to_len16 = cpu_to_be32(wr_mid);
wr->r3[0] = cpu_to_be32(0);
wr->r3[1] = cpu_to_be32(0);
- skb_copy_from_linear_data(skb, (void *)wr->ethmacdst, fw_hdr_copy_len);
+ skb_copy_from_linear_data(skb, &wr->firmware, fw_hdr_copy_len);
end = (u64 *)wr + flits;
/* If this is a Large Send Offload packet we'll put in an LSO CPL
@@ -1860,7 +1953,7 @@ static netdev_tx_t cxgb4_vf_eth_xmit(struct sk_buff *skb,
*/
cpl = (void *)(lso + 1);
- if (CHELSIO_CHIP_VERSION(adapter->params.chip) <= CHELSIO_T5)
+ if (chip_ver <= CHELSIO_T5)
cntrl = TXPKT_ETHHDR_LEN_V(eth_xtra_len);
else
cntrl = T6_TXPKT_ETHHDR_LEN_V(eth_xtra_len);
@@ -2176,7 +2269,6 @@ static int ethofld_hard_xmit(struct net_device *dev,
d = &eosw_txq->desc[eosw_txq->last_pidx];
skb = d->skb;
- skb_tx_timestamp(skb);
wr = (struct fw_eth_tx_eo_wr *)&eohw_txq->q.desc[eohw_txq->q.pidx];
if (unlikely(eosw_txq->state != CXGB4_EO_STATE_ACTIVE &&
@@ -2281,6 +2373,7 @@ write_wr_headers:
eohw_txq->vlan_ins++;
txq_advance(&eohw_txq->q, ndesc);
+ skb_tx_timestamp(skb);
cxgb4_ring_tx_db(adap, &eohw_txq->q, ndesc);
eosw_txq_advance_index(&eosw_txq->last_pidx, 1, eosw_txq->ndesc);
@@ -2377,6 +2470,16 @@ netdev_tx_t t4_start_xmit(struct sk_buff *skb, struct net_device *dev)
if (unlikely(qid >= pi->nqsets))
return cxgb4_ethofld_xmit(skb, dev);
+ if (is_ptp_enabled(skb, dev)) {
+ struct adapter *adap = netdev2adap(dev);
+ netdev_tx_t ret;
+
+ spin_lock(&adap->ptp_lock);
+ ret = cxgb4_eth_xmit(skb, dev);
+ spin_unlock(&adap->ptp_lock);
+ return ret;
+ }
+
return cxgb4_eth_xmit(skb, dev);
}
@@ -2410,9 +2513,9 @@ static void eosw_txq_flush_pending_skbs(struct sge_eosw_txq *eosw_txq)
/**
* cxgb4_ethofld_send_flowc - Send ETHOFLD flowc request to bind eotid to tc.
- * @dev - netdevice
- * @eotid - ETHOFLD tid to bind/unbind
- * @tc - traffic class. If set to FW_SCHED_CLS_NONE, then unbinds the @eotid
+ * @dev: netdevice
+ * @eotid: ETHOFLD tid to bind/unbind
+ * @tc: traffic class. If set to FW_SCHED_CLS_NONE, then unbinds the @eotid
*
* Send a FLOWC work request to bind an ETHOFLD TID to a traffic class.
* If @tc is set to FW_SCHED_CLS_NONE, then the @eotid is unbound from
@@ -2431,7 +2534,7 @@ int cxgb4_ethofld_send_flowc(struct net_device *dev, u32 eotid, u32 tc)
struct sk_buff *skb;
int ret = 0;
- len = sizeof(*flowc) + sizeof(struct fw_flowc_mnemval) * nparams;
+ len = struct_size(flowc, mnemval, nparams);
len16 = DIV_ROUND_UP(len, 16);
entry = cxgb4_lookup_eotid(&adap->tids, eotid);
@@ -2442,6 +2545,12 @@ int cxgb4_ethofld_send_flowc(struct net_device *dev, u32 eotid, u32 tc)
if (!eosw_txq)
return -ENOMEM;
+ if (!(adap->flags & CXGB4_FW_OK)) {
+ /* Don't stall caller when access to FW is lost */
+ complete(&eosw_txq->completion);
+ return -EIO;
+ }
+
skb = alloc_skb(len, GFP_KERNEL);
if (!skb)
return -ENOMEM;
@@ -2449,12 +2558,12 @@ int cxgb4_ethofld_send_flowc(struct net_device *dev, u32 eotid, u32 tc)
spin_lock_bh(&eosw_txq->lock);
if (tc != FW_SCHED_CLS_NONE) {
if (eosw_txq->state != CXGB4_EO_STATE_CLOSED)
- goto out_unlock;
+ goto out_free_skb;
next_state = CXGB4_EO_STATE_FLOWC_OPEN_SEND;
} else {
if (eosw_txq->state != CXGB4_EO_STATE_ACTIVE)
- goto out_unlock;
+ goto out_free_skb;
next_state = CXGB4_EO_STATE_FLOWC_CLOSE_SEND;
}
@@ -2490,17 +2599,19 @@ int cxgb4_ethofld_send_flowc(struct net_device *dev, u32 eotid, u32 tc)
eosw_txq_flush_pending_skbs(eosw_txq);
ret = eosw_txq_enqueue(eosw_txq, skb);
- if (ret) {
- dev_consume_skb_any(skb);
- goto out_unlock;
- }
+ if (ret)
+ goto out_free_skb;
eosw_txq->state = next_state;
eosw_txq->flowc_idx = eosw_txq->pidx;
eosw_txq_advance(eosw_txq, 1);
ethofld_xmit(dev, eosw_txq);
-out_unlock:
+ spin_unlock_bh(&eosw_txq->lock);
+ return 0;
+
+out_free_skb:
+ dev_consume_skb_any(skb);
spin_unlock_bh(&eosw_txq->lock);
return ret;
}
@@ -2536,6 +2647,84 @@ static void ctrlq_check_stop(struct sge_ctrl_txq *q, struct fw_wr_hdr *wr)
}
}
+#define CXGB4_SELFTEST_LB_STR "CHELSIO_SELFTEST"
+
+int cxgb4_selftest_lb_pkt(struct net_device *netdev)
+{
+ struct port_info *pi = netdev_priv(netdev);
+ struct adapter *adap = pi->adapter;
+ struct cxgb4_ethtool_lb_test *lb;
+ int ret, i = 0, pkt_len, credits;
+ struct fw_eth_tx_pkt_wr *wr;
+ struct cpl_tx_pkt_core *cpl;
+ u32 ctrl0, ndesc, flits;
+ struct sge_eth_txq *q;
+ u8 *sgl;
+
+ pkt_len = ETH_HLEN + sizeof(CXGB4_SELFTEST_LB_STR);
+
+ flits = DIV_ROUND_UP(pkt_len + sizeof(*cpl) + sizeof(*wr),
+ sizeof(__be64));
+ ndesc = flits_to_desc(flits);
+
+ lb = &pi->ethtool_lb;
+ lb->loopback = 1;
+
+ q = &adap->sge.ethtxq[pi->first_qset];
+ __netif_tx_lock_bh(q->txq);
+
+ reclaim_completed_tx(adap, &q->q, -1, true);
+ credits = txq_avail(&q->q) - ndesc;
+ if (unlikely(credits < 0)) {
+ __netif_tx_unlock_bh(q->txq);
+ return -ENOMEM;
+ }
+
+ wr = (void *)&q->q.desc[q->q.pidx];
+ memset(wr, 0, sizeof(struct tx_desc));
+
+ wr->op_immdlen = htonl(FW_WR_OP_V(FW_ETH_TX_PKT_WR) |
+ FW_WR_IMMDLEN_V(pkt_len +
+ sizeof(*cpl)));
+ wr->equiq_to_len16 = htonl(FW_WR_LEN16_V(DIV_ROUND_UP(flits, 2)));
+ wr->r3 = cpu_to_be64(0);
+
+ cpl = (void *)(wr + 1);
+ sgl = (u8 *)(cpl + 1);
+
+ ctrl0 = TXPKT_OPCODE_V(CPL_TX_PKT_XT) | TXPKT_PF_V(adap->pf) |
+ TXPKT_INTF_V(pi->tx_chan + 4);
+
+ cpl->ctrl0 = htonl(ctrl0);
+ cpl->pack = htons(0);
+ cpl->len = htons(pkt_len);
+ cpl->ctrl1 = cpu_to_be64(TXPKT_L4CSUM_DIS_F | TXPKT_IPCSUM_DIS_F);
+
+ eth_broadcast_addr(sgl);
+ i += ETH_ALEN;
+ ether_addr_copy(&sgl[i], netdev->dev_addr);
+ i += ETH_ALEN;
+
+ snprintf(&sgl[i], sizeof(CXGB4_SELFTEST_LB_STR), "%s",
+ CXGB4_SELFTEST_LB_STR);
+
+ init_completion(&lb->completion);
+ txq_advance(&q->q, ndesc);
+ cxgb4_ring_tx_db(adap, &q->q, ndesc);
+ __netif_tx_unlock_bh(q->txq);
+
+ /* wait for the pkt to return */
+ ret = wait_for_completion_timeout(&lb->completion, 10 * HZ);
+ if (!ret)
+ ret = -ETIMEDOUT;
+ else
+ ret = lb->result;
+
+ lb->loopback = 0;
+
+ return ret;
+}
+
/**
* ctrl_xmit - send a packet through an SGE control Tx queue
* @q: the control queue
@@ -2581,15 +2770,15 @@ static int ctrl_xmit(struct sge_ctrl_txq *q, struct sk_buff *skb)
/**
* restart_ctrlq - restart a suspended control queue
- * @data: the control queue to restart
+ * @t: pointer to the tasklet associated with this handler
*
* Resumes transmission on a suspended Tx control queue.
*/
-static void restart_ctrlq(unsigned long data)
+static void restart_ctrlq(struct tasklet_struct *t)
{
struct sk_buff *skb;
unsigned int written = 0;
- struct sge_ctrl_txq *q = (struct sge_ctrl_txq *)data;
+ struct sge_ctrl_txq *q = from_tasklet(q, t, qresume_tsk);
spin_lock(&q->sendq.lock);
reclaim_completed_tx_imm(&q->q);
@@ -2654,17 +2843,22 @@ int t4_mgmt_tx(struct adapter *adap, struct sk_buff *skb)
* @skb: the packet
*
* Returns true if a packet can be sent as an offload WR with immediate
- * data. We currently use the same limit as for Ethernet packets.
+ * data.
+ * FW_OFLD_TX_DATA_WR limits the payload to 255 bytes due to 8-bit field.
+ * However, FW_ULPTX_WR commands have a 256 byte immediate only
+ * payload limit.
*/
static inline int is_ofld_imm(const struct sk_buff *skb)
{
struct work_request_hdr *req = (struct work_request_hdr *)skb->data;
unsigned long opcode = FW_WR_OP_G(ntohl(req->wr_hi));
- if (opcode == FW_CRYPTO_LOOKASIDE_WR)
+ if (unlikely(opcode == FW_ULPTX_WR))
+ return skb->len <= MAX_IMM_ULPTX_WR_LEN;
+ else if (opcode == FW_CRYPTO_LOOKASIDE_WR)
return skb->len <= SGE_MAX_WR_LEN;
else
- return skb->len <= MAX_IMM_TX_PKT_LEN;
+ return skb->len <= MAX_IMM_OFLD_TX_DATA_WR_LEN;
}
/**
@@ -2691,7 +2885,6 @@ static inline unsigned int calc_tx_flits_ofld(const struct sk_buff *skb)
/**
* txq_stop_maperr - stop a Tx queue due to I/O MMU exhaustion
- * @adap: the adapter
* @q: the queue to stop
*
* Mark a Tx queue stopped due to I/O MMU exhaustion and resulting
@@ -2883,13 +3076,13 @@ static int ofld_xmit(struct sge_uld_txq *q, struct sk_buff *skb)
/**
* restart_ofldq - restart a suspended offload queue
- * @data: the offload queue to restart
+ * @t: pointer to the tasklet associated with this handler
*
* Resumes transmission on a suspended Tx offload queue.
*/
-static void restart_ofldq(unsigned long data)
+static void restart_ofldq(struct tasklet_struct *t)
{
- struct sge_uld_txq *q = (struct sge_uld_txq *)data;
+ struct sge_uld_txq *q = from_tasklet(q, t, qresume_tsk);
spin_lock(&q->sendq.lock);
q->full = 0; /* the queue actually is completely empty now */
@@ -2938,6 +3131,7 @@ static inline int uld_send(struct adapter *adap, struct sk_buff *skb,
txq_info = adap->sge.uld_txq_info[tx_uld_type];
if (unlikely(!txq_info)) {
WARN_ON(true);
+ kfree_skb(skb);
return NET_XMIT_DROP;
}
@@ -3286,7 +3480,7 @@ enum {
/**
* t4_systim_to_hwstamp - read hardware time stamp
- * @adap: the adapter
+ * @adapter: the adapter
* @skb: the packet
*
* Read Time Stamp from MPS packet and insert in skb which
@@ -3313,15 +3507,16 @@ static noinline int t4_systim_to_hwstamp(struct adapter *adapter,
hwtstamps = skb_hwtstamps(skb);
memset(hwtstamps, 0, sizeof(*hwtstamps));
- hwtstamps->hwtstamp = ns_to_ktime(be64_to_cpu(*((u64 *)data)));
+ hwtstamps->hwtstamp = ns_to_ktime(get_unaligned_be64(data));
return RX_PTP_PKT_SUC;
}
/**
* t4_rx_hststamp - Recv PTP Event Message
- * @adap: the adapter
+ * @adapter: the adapter
* @rsp: the response queue descriptor holding the RX_PKT message
+ * @rxq: the response queue holding the RX_PKT message
* @skb: the packet
*
* PTP enabled and MPS packet, read HW timestamp
@@ -3345,7 +3540,7 @@ static int t4_rx_hststamp(struct adapter *adapter, const __be64 *rsp,
/**
* t4_tx_hststamp - Loopback PTP Transmit Event Message
- * @adap: the adapter
+ * @adapter: the adapter
* @skb: the packet
* @dev: the ingress net device
*
@@ -3409,9 +3604,53 @@ static void t4_tx_completion_handler(struct sge_rspq *rspq,
}
txq = &s->ethtxq[pi->first_qset + rspq->idx];
+
+ /* We've got the Hardware Consumer Index Update in the Egress Update
+ * message. These Egress Update messages will be our sole CIDX Updates
+ * we get since we don't want to chew up PCIe bandwidth for both Ingress
+ * Messages and Status Page writes. However, The code which manages
+ * reclaiming successfully DMA'ed TX Work Requests uses the CIDX value
+ * stored in the Status Page at the end of the TX Queue. It's easiest
+ * to simply copy the CIDX Update value from the Egress Update message
+ * to the Status Page. Also note that no Endian issues need to be
+ * considered here since both are Big Endian and we're just copying
+ * bytes consistently ...
+ */
+ if (CHELSIO_CHIP_VERSION(adapter->params.chip) <= CHELSIO_T5) {
+ struct cpl_sge_egr_update *egr;
+
+ egr = (struct cpl_sge_egr_update *)rsp;
+ WRITE_ONCE(txq->q.stat->cidx, egr->cidx);
+ }
+
t4_sge_eth_txq_egress_update(adapter, txq, -1);
}
+static int cxgb4_validate_lb_pkt(struct port_info *pi, const struct pkt_gl *si)
+{
+ struct adapter *adap = pi->adapter;
+ struct cxgb4_ethtool_lb_test *lb;
+ struct sge *s = &adap->sge;
+ struct net_device *netdev;
+ u8 *data;
+ int i;
+
+ netdev = adap->port[pi->port_id];
+ lb = &pi->ethtool_lb;
+ data = si->va + s->pktshift;
+
+ i = ETH_ALEN;
+ if (!ether_addr_equal(data + i, netdev->dev_addr))
+ return -1;
+
+ i += ETH_ALEN;
+ if (strcmp(&data[i], CXGB4_SELFTEST_LB_STR))
+ lb->result = -EIO;
+
+ complete(&lb->completion);
+ return 0;
+}
+
/**
* t4_ethrx_handler - process an ingress ethernet packet
* @q: the response queue that received the packet
@@ -3435,6 +3674,7 @@ int t4_ethrx_handler(struct sge_rspq *q, const __be64 *rsp,
struct port_info *pi;
int ret = 0;
+ pi = netdev_priv(q->netdev);
/* If we're looking at TX Queue CIDX Update, handle that separately
* and return.
*/
@@ -3462,6 +3702,12 @@ int t4_ethrx_handler(struct sge_rspq *q, const __be64 *rsp,
if (err_vec)
rxq->stats.bad_rx_pkts++;
+ if (unlikely(pi->ethtool_lb.loopback && pkt->iff >= NCHAN)) {
+ ret = cxgb4_validate_lb_pkt(pi, si);
+ if (!ret)
+ return 0;
+ }
+
if (((pkt->l2info & htonl(RXF_TCP_F)) ||
tnl_hdr_len) &&
(q->netdev->features & NETIF_F_GRO) && csum_ok && !pkt->ip_frag) {
@@ -3475,7 +3721,6 @@ int t4_ethrx_handler(struct sge_rspq *q, const __be64 *rsp,
rxq->stats.rx_drops++;
return 0;
}
- pi = netdev_priv(q->netdev);
/* Handle PTP Event Rx packet */
if (unlikely(pi->ptp_enable)) {
@@ -3776,9 +4021,10 @@ static int napi_rx_handler(struct napi_struct *napi, int budget)
return work_done;
}
-void cxgb4_ethofld_restart(unsigned long data)
+void cxgb4_ethofld_restart(struct tasklet_struct *t)
{
- struct sge_eosw_txq *eosw_txq = (struct sge_eosw_txq *)data;
+ struct sge_eosw_txq *eosw_txq = from_tasklet(eosw_txq, t,
+ qresume_tsk);
int pktcount;
spin_lock(&eosw_txq->lock);
@@ -3988,7 +4234,7 @@ static void sge_rx_timer_cb(struct timer_list *t)
{
unsigned long m;
unsigned int i;
- struct adapter *adap = from_timer(adap, t, sge.rx_timer);
+ struct adapter *adap = timer_container_of(adap, t, sge.rx_timer);
struct sge *s = &adap->sge;
for (i = 0; i < BITS_TO_LONGS(s->egr_sz); i++)
@@ -4002,7 +4248,7 @@ static void sge_rx_timer_cb(struct timer_list *t)
if (fl_starving(adap, fl)) {
rxq = container_of(fl, struct sge_eth_rxq, fl);
- if (napi_reschedule(&rxq->rspq.napi))
+ if (napi_schedule(&rxq->rspq.napi))
fl->starving++;
else
set_bit(id, s->starving_fl);
@@ -4023,7 +4269,7 @@ done:
static void sge_tx_timer_cb(struct timer_list *t)
{
- struct adapter *adap = from_timer(adap, t, sge.tx_timer);
+ struct adapter *adap = timer_container_of(adap, t, sge.tx_timer);
struct sge *s = &adap->sge;
unsigned long m, period;
unsigned int i, budget;
@@ -4208,7 +4454,7 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
if (ret)
goto err;
- netif_napi_add(dev, &iq->napi, napi_rx_handler, 64);
+ netif_napi_add(dev, &iq->napi, napi_rx_handler);
iq->cur_desc = iq->desc;
iq->cidx = 0;
iq->gen = 1;
@@ -4362,11 +4608,15 @@ int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq,
* write the CIDX Updates into the Status Page at the end of the
* TX Queue.
*/
- c.autoequiqe_to_viid = htonl(FW_EQ_ETH_CMD_AUTOEQUEQE_F |
+ c.autoequiqe_to_viid = htonl(((chip_ver <= CHELSIO_T5) ?
+ FW_EQ_ETH_CMD_AUTOEQUIQE_F :
+ FW_EQ_ETH_CMD_AUTOEQUEQE_F) |
FW_EQ_ETH_CMD_VIID_V(pi->viid));
c.fetchszm_to_iqid =
- htonl(FW_EQ_ETH_CMD_HOSTFCMODE_V(HOSTFCMODE_STATUS_PAGE_X) |
+ htonl(FW_EQ_ETH_CMD_HOSTFCMODE_V((chip_ver <= CHELSIO_T5) ?
+ HOSTFCMODE_INGRESS_QUEUE_X :
+ HOSTFCMODE_STATUS_PAGE_X) |
FW_EQ_ETH_CMD_PCIECHN_V(pi->tx_chan) |
FW_EQ_ETH_CMD_FETCHRO_F | FW_EQ_ETH_CMD_IQID_V(iqid));
@@ -4377,6 +4627,7 @@ int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq,
: FETCHBURSTMIN_64B_T6_X) |
FW_EQ_ETH_CMD_FBMAX_V(FETCHBURSTMAX_512B_X) |
FW_EQ_ETH_CMD_CIDXFTHRESH_V(CIDXFLUSHTHRESH_32_X) |
+ FW_EQ_ETH_CMD_CIDXFTHRESHO_V(chip_ver == CHELSIO_T5) |
FW_EQ_ETH_CMD_EQSIZE_V(nentries));
c.eqaddr = cpu_to_be64(txq->q.phys_addr);
@@ -4469,7 +4720,7 @@ int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq,
init_txq(adap, &txq->q, FW_EQ_CTRL_CMD_EQID_G(ntohl(c.cmpliqid_eqid)));
txq->adap = adap;
skb_queue_head_init(&txq->sendq);
- tasklet_init(&txq->qresume_tsk, restart_ctrlq, (unsigned long)txq);
+ tasklet_setup(&txq->qresume_tsk, restart_ctrlq);
txq->full = 0;
return 0;
}
@@ -4559,7 +4810,7 @@ int t4_sge_alloc_uld_txq(struct adapter *adap, struct sge_uld_txq *txq,
txq->q.q_type = CXGB4_TXQ_ULD;
txq->adap = adap;
skb_queue_head_init(&txq->sendq);
- tasklet_init(&txq->qresume_tsk, restart_ofldq, (unsigned long)txq);
+ tasklet_setup(&txq->qresume_tsk, restart_ofldq);
txq->full = 0;
txq->mapping_err = 0;
return 0;
@@ -4624,22 +4875,6 @@ void free_rspq_fl(struct adapter *adap, struct sge_rspq *rq,
}
}
-/**
- * t4_free_ofld_rxqs - free a block of consecutive Rx queues
- * @adap: the adapter
- * @n: number of queues
- * @q: pointer to first queue
- *
- * Release the resources of a consecutive block of offload Rx queues.
- */
-void t4_free_ofld_rxqs(struct adapter *adap, int n, struct sge_ofld_rxq *q)
-{
- for ( ; n; n--, q++)
- if (q->rspq.desc)
- free_rspq_fl(adap, &q->rspq,
- q->fl.size ? &q->fl : NULL);
-}
-
void t4_sge_free_ethofld_txq(struct adapter *adap, struct sge_eohw_txq *txq)
{
if (txq->q.desc) {
@@ -4761,13 +4996,10 @@ void t4_sge_stop(struct adapter *adap)
int i;
struct sge *s = &adap->sge;
- if (in_interrupt()) /* actions below require waiting */
- return;
-
if (s->rx_timer.function)
- del_timer_sync(&s->rx_timer);
+ timer_delete_sync(&s->rx_timer);
if (s->tx_timer.function)
- del_timer_sync(&s->tx_timer);
+ timer_delete_sync(&s->tx_timer);
if (is_offload(adap)) {
struct sge_uld_txq_info *txq_info;