summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
diff options
context:
space:
mode:
authorTariq Toukan <tariqt@mellanox.com>2018-11-21 14:08:06 +0200
committerSaeed Mahameed <saeedm@mellanox.com>2018-12-20 22:54:19 -0800
commit5e0d2eef771ee78b092bf93d040eac02a0965fea (patch)
treee6f25b51951334631083ab74756394fb3e808ec2 /drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
parent1feeab80078078186b5080fe15c51a5cce724a0e (diff)
net/mlx5e: XDP, Support Enhanced Multi-Packet TX WQE
Add support for the HW feature of multi-packet WQE in XDP xmit flow. The conventional TX descriptor (WQE, Work Queue Element) serves a single packet. Our HW has support for multi-packet WQE (MPWQE) in which a single descriptor serves multiple TX packets. This reduces both the PCI overhead and the CPU cycles wasted on writing them. In this patch we add support for the HW feature, which is supported starting from ConnectX-5. Performance: Tested packet rate for UDP 64Byte multi-stream over ConnectX-5 NICs. CPU: Intel(R) Xeon(R) CPU E5-2680 v3 @ 2.50GHz XDP_TX: We see a huge gain on single port ConnectX-5, and reach the 100 Mpps milestone. * Single-port HCA: Before: 70 Mpps After: 100 Mpps (+42.8%) * Dual-port HCA: Before: 51.7 Mpps After: 57.3 Mpps (+10.8%) * In both cases we tested traffic on one port and for now On Dual-port HCAs we see only small gain, we are working to overcome this bottleneck, but for the moment only with experimental firmware on dual port HCAs we can reach the wanted numbers as seen on Single-port HCAs. XDP_REDIRECT: Redirect from (A) ConnectX-5 to (B) ConnectX-5. Due to a setup limitation, (A) and (B) are on different NUMA nodes, so absolute performance numbers are not optimal. Note: Below is the transmit rate of (B), not the redirect rate of (A) which is in some cases higher. * (B) is single-port: Before: 77 Mpps After: 90 Mpps (+16.8%) * (B) is dual-port: Before: 61 Mpps After: 72 Mpps (+18%) Signed-off-by: Tariq Toukan <tariqt@mellanox.com> Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h')
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h29
1 files changed, 26 insertions, 3 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
index fd689ed506af..3a67cb3cd179 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
@@ -37,15 +37,16 @@
#define MLX5E_XDP_MAX_MTU ((int)(PAGE_SIZE - \
MLX5_SKB_FRAG_SZ(XDP_PACKET_HEADROOM)))
#define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
-#define MLX5E_XDP_TX_DS_COUNT \
- ((sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) + 1 /* SG DS */)
+#define MLX5E_XDP_TX_EMPTY_DS_COUNT \
+ (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS)
+#define MLX5E_XDP_TX_DS_COUNT (MLX5E_XDP_TX_EMPTY_DS_COUNT + 1 /* SG DS */)
bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
void *va, u16 *rx_headroom, u32 *len);
bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq);
void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq);
+void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw);
void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq);
-bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi);
int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
u32 flags);
@@ -58,6 +59,28 @@ static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_xdpsq *sq)
}
static inline void
+mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq, dma_addr_t dma_addr, u16 dma_len)
+{
+ struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
+ struct mlx5_wqe_data_seg *dseg =
+ (struct mlx5_wqe_data_seg *)session->wqe + session->ds_count++;
+
+ dseg->addr = cpu_to_be64(dma_addr);
+ dseg->byte_count = cpu_to_be32(dma_len);
+ dseg->lkey = sq->mkey_be;
+}
+
+static inline void mlx5e_xdpsq_fetch_wqe(struct mlx5e_xdpsq *sq,
+ struct mlx5e_tx_wqe **wqe)
+{
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+
+ *wqe = mlx5_wq_cyc_get_wqe(wq, pi);
+ memset(*wqe, 0, sizeof(**wqe));
+}
+
+static inline void
mlx5e_xdpi_fifo_push(struct mlx5e_xdp_info_fifo *fifo,
struct mlx5e_xdp_info *xi)
{