diff options
author | Yunsheng Lin <linyunsheng@huawei.com> | 2021-06-16 14:36:13 +0800 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2021-06-16 00:36:06 -0700 |
commit | 907676b130711fd1f627824559e92259db2061d1 (patch) | |
tree | b5a7a41d1f7c3194ea21d216aa2b716d53078ef5 /drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | |
parent | 8677d78c3d860c156ccd335e2b97728298c2cbb1 (diff) |
net: hns3: use tx bounce buffer for small packets
when the packet or frag size is small, it causes both security and
performance issue. As dma can't map sub-page, this means some extra
kernel data is visible to devices. On the other hand, the overhead
of dma map and unmap is huge when IOMMU is on.
So add a queue based tx shared bounce buffer to memcpy the small
packet when the len of the xmitted skb is below tx_copybreak.
Add tx_spare_buf_size module param to set the size of tx spare
buffer, and add set/get_tunable to set or query the tx_copybreak.
The throughtput improves from 30 Gbps to 90+ Gbps when running 16
netperf threads with 32KB UDP message size when IOMMU is in the
strict mode(tx_copybreak = 2000 and mtu = 1500).
Suggested-by: Barry Song <song.bao.hua@hisilicon.com>
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/ethernet/hisilicon/hns3/hns3_enet.c')
-rw-r--r-- | drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 289 |
1 files changed, 283 insertions, 6 deletions
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 6fa1ed5c4098..e5466daac1c4 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -53,6 +53,10 @@ static int debug = -1; module_param(debug, int, 0); MODULE_PARM_DESC(debug, " Network interface message level setting"); +static unsigned int tx_spare_buf_size; +module_param(tx_spare_buf_size, uint, 0400); +MODULE_PARM_DESC(tx_spare_buf_size, "Size used to allocate tx spare buffer"); + #define DEFAULT_MSG_LEVEL (NETIF_MSG_PROBE | NETIF_MSG_LINK | \ NETIF_MSG_IFDOWN | NETIF_MSG_IFUP) @@ -941,6 +945,177 @@ void hns3_request_update_promisc_mode(struct hnae3_handle *handle) ops->request_update_promisc_mode(handle); } +static u32 hns3_tx_spare_space(struct hns3_enet_ring *ring) +{ + struct hns3_tx_spare *tx_spare = ring->tx_spare; + u32 ntc, ntu; + + /* This smp_load_acquire() pairs with smp_store_release() in + * hns3_tx_spare_update() called in tx desc cleaning process. + */ + ntc = smp_load_acquire(&tx_spare->last_to_clean); + ntu = tx_spare->next_to_use; + + if (ntc > ntu) + return ntc - ntu - 1; + + /* The free tx buffer is divided into two part, so pick the + * larger one. + */ + return (ntc > (tx_spare->len - ntu) ? ntc : + (tx_spare->len - ntu)) - 1; +} + +static void hns3_tx_spare_update(struct hns3_enet_ring *ring) +{ + struct hns3_tx_spare *tx_spare = ring->tx_spare; + + if (!tx_spare || + tx_spare->last_to_clean == tx_spare->next_to_clean) + return; + + /* This smp_store_release() pairs with smp_load_acquire() in + * hns3_tx_spare_space() called in xmit process. + */ + smp_store_release(&tx_spare->last_to_clean, + tx_spare->next_to_clean); +} + +static bool hns3_can_use_tx_bounce(struct hns3_enet_ring *ring, + struct sk_buff *skb, + u32 space) +{ + u32 len = skb->len <= ring->tx_copybreak ? skb->len : + skb_headlen(skb); + + if (len > ring->tx_copybreak) + return false; + + if (ALIGN(len, dma_get_cache_alignment()) > space) { + u64_stats_update_begin(&ring->syncp); + ring->stats.tx_spare_full++; + u64_stats_update_end(&ring->syncp); + return false; + } + + return true; +} + +static void hns3_init_tx_spare_buffer(struct hns3_enet_ring *ring) +{ + struct hns3_tx_spare *tx_spare; + struct page *page; + dma_addr_t dma; + int order; + + if (!tx_spare_buf_size) + return; + + order = get_order(tx_spare_buf_size); + tx_spare = devm_kzalloc(ring_to_dev(ring), sizeof(*tx_spare), + GFP_KERNEL); + if (!tx_spare) { + /* The driver still work without the tx spare buffer */ + dev_warn(ring_to_dev(ring), "failed to allocate hns3_tx_spare\n"); + return; + } + + page = alloc_pages_node(dev_to_node(ring_to_dev(ring)), + GFP_KERNEL, order); + if (!page) { + dev_warn(ring_to_dev(ring), "failed to allocate tx spare pages\n"); + devm_kfree(ring_to_dev(ring), tx_spare); + return; + } + + dma = dma_map_page(ring_to_dev(ring), page, 0, + PAGE_SIZE << order, DMA_TO_DEVICE); + if (dma_mapping_error(ring_to_dev(ring), dma)) { + dev_warn(ring_to_dev(ring), "failed to map pages for tx spare\n"); + put_page(page); + devm_kfree(ring_to_dev(ring), tx_spare); + return; + } + + tx_spare->dma = dma; + tx_spare->buf = page_address(page); + tx_spare->len = PAGE_SIZE << order; + ring->tx_spare = tx_spare; +} + +/* Use hns3_tx_spare_space() to make sure there is enough buffer + * before calling below function to allocate tx buffer. + */ +static void *hns3_tx_spare_alloc(struct hns3_enet_ring *ring, + unsigned int size, dma_addr_t *dma, + u32 *cb_len) +{ + struct hns3_tx_spare *tx_spare = ring->tx_spare; + u32 ntu = tx_spare->next_to_use; + + size = ALIGN(size, dma_get_cache_alignment()); + *cb_len = size; + + /* Tx spare buffer wraps back here because the end of + * freed tx buffer is not enough. + */ + if (ntu + size > tx_spare->len) { + *cb_len += (tx_spare->len - ntu); + ntu = 0; + } + + tx_spare->next_to_use = ntu + size; + if (tx_spare->next_to_use == tx_spare->len) + tx_spare->next_to_use = 0; + + *dma = tx_spare->dma + ntu; + + return tx_spare->buf + ntu; +} + +static void hns3_tx_spare_rollback(struct hns3_enet_ring *ring, u32 len) +{ + struct hns3_tx_spare *tx_spare = ring->tx_spare; + + if (len > tx_spare->next_to_use) { + len -= tx_spare->next_to_use; + tx_spare->next_to_use = tx_spare->len - len; + } else { + tx_spare->next_to_use -= len; + } +} + +static void hns3_tx_spare_reclaim_cb(struct hns3_enet_ring *ring, + struct hns3_desc_cb *cb) +{ + struct hns3_tx_spare *tx_spare = ring->tx_spare; + u32 ntc = tx_spare->next_to_clean; + u32 len = cb->length; + + tx_spare->next_to_clean += len; + + if (tx_spare->next_to_clean >= tx_spare->len) { + tx_spare->next_to_clean -= tx_spare->len; + + if (tx_spare->next_to_clean) { + ntc = 0; + len = tx_spare->next_to_clean; + } + } + + /* This tx spare buffer is only really reclaimed after calling + * hns3_tx_spare_update(), so it is still safe to use the info in + * the tx buffer to do the dma sync after tx_spare->next_to_clean + * is moved forword. + */ + if (cb->type & (DESC_TYPE_BOUNCE_HEAD | DESC_TYPE_BOUNCE_ALL)) { + dma_addr_t dma = tx_spare->dma + ntc; + + dma_sync_single_for_cpu(ring_to_dev(ring), dma, len, + DMA_TO_DEVICE); + } +} + static int hns3_set_tso(struct sk_buff *skb, u32 *paylen_fdop_ol4cs, u16 *mss, u32 *type_cs_vlan_tso, u32 *send_bytes) { @@ -1471,6 +1646,11 @@ static int hns3_map_and_fill_desc(struct hns3_enet_ring *ring, void *priv, return 0; dma = dma_map_single(dev, skb->data, size, DMA_TO_DEVICE); + } else if (type & DESC_TYPE_BOUNCE_HEAD) { + /* Head data has been filled in hns3_handle_tx_bounce(), + * just return 0 here. + */ + return 0; } else { skb_frag_t *frag = (skb_frag_t *)priv; @@ -1739,6 +1919,9 @@ static void hns3_clear_desc(struct hns3_enet_ring *ring, int next_to_use_orig) if (desc_cb->type & (DESC_TYPE_SKB | DESC_TYPE_FRAGLIST_SKB)) dma_unmap_single(dev, desc_cb->dma, desc_cb->length, DMA_TO_DEVICE); + else if (desc_cb->type & + (DESC_TYPE_BOUNCE_HEAD | DESC_TYPE_BOUNCE_ALL)) + hns3_tx_spare_rollback(ring, desc_cb->length); else if (desc_cb->length) dma_unmap_page(dev, desc_cb->dma, desc_cb->length, DMA_TO_DEVICE); @@ -1816,6 +1999,79 @@ static void hns3_tsyn(struct net_device *netdev, struct sk_buff *skb, desc->tx.bdtp_fe_sc_vld_ra_ri |= cpu_to_le16(BIT(HNS3_TXD_TSYN_B)); } +static int hns3_handle_tx_bounce(struct hns3_enet_ring *ring, + struct sk_buff *skb) +{ + struct hns3_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_use]; + unsigned int type = DESC_TYPE_BOUNCE_HEAD; + unsigned int size = skb_headlen(skb); + dma_addr_t dma; + int bd_num = 0; + u32 cb_len; + void *buf; + int ret; + + if (skb->len <= ring->tx_copybreak) { + size = skb->len; + type = DESC_TYPE_BOUNCE_ALL; + } + + /* hns3_can_use_tx_bounce() is called to ensure the below + * function can always return the tx buffer. + */ + buf = hns3_tx_spare_alloc(ring, size, &dma, &cb_len); + + ret = skb_copy_bits(skb, 0, buf, size); + if (unlikely(ret < 0)) { + hns3_tx_spare_rollback(ring, cb_len); + u64_stats_update_begin(&ring->syncp); + ring->stats.copy_bits_err++; + u64_stats_update_end(&ring->syncp); + return ret; + } + + desc_cb->priv = skb; + desc_cb->length = cb_len; + desc_cb->dma = dma; + desc_cb->type = type; + + bd_num += hns3_fill_desc(ring, dma, size); + + if (type == DESC_TYPE_BOUNCE_HEAD) { + ret = hns3_fill_skb_to_desc(ring, skb, + DESC_TYPE_BOUNCE_HEAD); + if (unlikely(ret < 0)) + return ret; + + bd_num += ret; + } + + dma_sync_single_for_device(ring_to_dev(ring), dma, size, + DMA_TO_DEVICE); + + u64_stats_update_begin(&ring->syncp); + ring->stats.tx_bounce++; + u64_stats_update_end(&ring->syncp); + return bd_num; +} + +static int hns3_handle_desc_filling(struct hns3_enet_ring *ring, + struct sk_buff *skb) +{ + u32 space; + + if (!ring->tx_spare) + goto out; + + space = hns3_tx_spare_space(ring); + + if (hns3_can_use_tx_bounce(ring, skb, space)) + return hns3_handle_tx_bounce(ring, skb); + +out: + return hns3_fill_skb_to_desc(ring, skb, DESC_TYPE_SKB); +} + netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev) { struct hns3_nic_priv *priv = netdev_priv(netdev); @@ -1862,7 +2118,7 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev) * zero, which is unlikely, and 'ret > 0' means how many tx desc * need to be notified to the hw. */ - ret = hns3_fill_skb_to_desc(ring, skb, DESC_TYPE_SKB); + ret = hns3_handle_desc_filling(ring, skb); if (unlikely(ret <= 0)) goto fill_err; @@ -2064,6 +2320,7 @@ static void hns3_nic_get_stats64(struct net_device *netdev, tx_drop += ring->stats.tx_tso_err; tx_drop += ring->stats.over_max_recursion; tx_drop += ring->stats.hw_limitation; + tx_drop += ring->stats.copy_bits_err; tx_errors += ring->stats.sw_err_cnt; tx_errors += ring->stats.tx_vlan_err; tx_errors += ring->stats.tx_l4_proto_err; @@ -2071,6 +2328,7 @@ static void hns3_nic_get_stats64(struct net_device *netdev, tx_errors += ring->stats.tx_tso_err; tx_errors += ring->stats.over_max_recursion; tx_errors += ring->stats.hw_limitation; + tx_errors += ring->stats.copy_bits_err; } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); /* fetch the rx stats */ @@ -2864,7 +3122,8 @@ static int hns3_alloc_buffer(struct hns3_enet_ring *ring, static void hns3_free_buffer(struct hns3_enet_ring *ring, struct hns3_desc_cb *cb, int budget) { - if (cb->type & DESC_TYPE_SKB) + if (cb->type & (DESC_TYPE_SKB | DESC_TYPE_BOUNCE_HEAD | + DESC_TYPE_BOUNCE_ALL)) napi_consume_skb(cb->priv, budget); else if (!HNAE3_IS_TX_RING(ring) && cb->pagecnt_bias) __page_frag_cache_drain(cb->priv, cb->pagecnt_bias); @@ -2888,9 +3147,11 @@ static void hns3_unmap_buffer(struct hns3_enet_ring *ring, if (cb->type & (DESC_TYPE_SKB | DESC_TYPE_FRAGLIST_SKB)) dma_unmap_single(ring_to_dev(ring), cb->dma, cb->length, ring_to_dma_dir(ring)); - else if (cb->length) + else if ((cb->type & DESC_TYPE_PAGE) && cb->length) dma_unmap_page(ring_to_dev(ring), cb->dma, cb->length, ring_to_dma_dir(ring)); + else if (cb->type & (DESC_TYPE_BOUNCE_ALL | DESC_TYPE_BOUNCE_HEAD)) + hns3_tx_spare_reclaim_cb(ring, cb); } static void hns3_buffer_detach(struct hns3_enet_ring *ring, int i) @@ -3042,7 +3303,8 @@ static bool hns3_nic_reclaim_desc(struct hns3_enet_ring *ring, desc_cb = &ring->desc_cb[ntc]; - if (desc_cb->type & DESC_TYPE_SKB) { + if (desc_cb->type & (DESC_TYPE_SKB | DESC_TYPE_BOUNCE_ALL | + DESC_TYPE_BOUNCE_HEAD)) { (*pkts)++; (*bytes) += desc_cb->send_bytes; } @@ -3065,6 +3327,9 @@ static bool hns3_nic_reclaim_desc(struct hns3_enet_ring *ring, * ring_space called by hns3_nic_net_xmit. */ smp_store_release(&ring->next_to_clean, ntc); + + hns3_tx_spare_update(ring); + return true; } @@ -4245,6 +4510,8 @@ static void hns3_ring_get_cfg(struct hnae3_queue *q, struct hns3_nic_priv *priv, ring = &priv->ring[q->tqp_index]; desc_num = priv->ae_handle->kinfo.num_tx_desc; ring->queue_index = q->tqp_index; + ring->tx_copybreak = priv->tx_copybreak; + ring->last_to_use = 0; } else { ring = &priv->ring[q->tqp_index + queue_num]; desc_num = priv->ae_handle->kinfo.num_rx_desc; @@ -4262,7 +4529,6 @@ static void hns3_ring_get_cfg(struct hnae3_queue *q, struct hns3_nic_priv *priv, ring->desc_num = desc_num; ring->next_to_use = 0; ring->next_to_clean = 0; - ring->last_to_use = 0; } static void hns3_queue_to_ring(struct hnae3_queue *tqp, @@ -4322,6 +4588,8 @@ static int hns3_alloc_ring_memory(struct hns3_enet_ring *ring) ret = hns3_alloc_ring_buffers(ring); if (ret) goto out_with_desc; + } else { + hns3_init_tx_spare_buffer(ring); } return 0; @@ -4344,9 +4612,18 @@ void hns3_fini_ring(struct hns3_enet_ring *ring) ring->next_to_use = 0; ring->last_to_use = 0; ring->pending_buf = 0; - if (ring->skb) { + if (!HNAE3_IS_TX_RING(ring) && ring->skb) { dev_kfree_skb_any(ring->skb); ring->skb = NULL; + } else if (HNAE3_IS_TX_RING(ring) && ring->tx_spare) { + struct hns3_tx_spare *tx_spare = ring->tx_spare; + + dma_unmap_page(ring_to_dev(ring), tx_spare->dma, tx_spare->len, + DMA_TO_DEVICE); + free_pages((unsigned long)tx_spare->buf, + get_order(tx_spare->len)); + devm_kfree(ring_to_dev(ring), tx_spare); + ring->tx_spare = NULL; } } |