summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
diff options
context:
space:
mode:
authorMaxim Mikityanskiy <maximmi@mellanox.com>2020-07-02 12:37:29 +0300
committerSaeed Mahameed <saeedm@nvidia.com>2020-09-21 19:41:16 -0700
commit5af75c747e2a868abbf8611494b50ed5e076fca7 (patch)
treefbca47bb5a219577b56cbaf179e94af5b3c55edb /drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
parent67044a88aa0556b929cd07ba0656b101f3a6a67c (diff)
net/mlx5e: Enhanced TX MPWQE for SKBs
This commit adds support for Enhanced TX MPWQE feature in the regular (SKB) data path. A MPWQE (multi-packet work queue element) can serve multiple packets, reducing the PCI bandwidth on control traffic. Two new stats (tx*_mpwqe_blks and tx*_mpwqe_pkts) are added. The feature is on by default and controlled by the skb_tx_mpwqe private flag. In a MPWQE, eseg is shared among all packets, so eseg-based offloads (IPSEC, GENEVE, checksum) run on a separate eseg that is compared to the eseg of the current MPWQE session to decide if the new packet can be added to the same session. MPWQE is not compatible with certain offloads and features, such as TLS offload, TSO, nonlinear SKBs. If such incompatible features are in use, the driver gracefully falls back to non-MPWQE. This change has no performance impact in TCP single stream test and XDP_TX single stream test. UDP pktgen, 64-byte packets, single stream, MPWQE off: Packet rate: 16.96 Mpps (±0.12 Mpps) -> 17.01 Mpps (±0.20 Mpps) Instructions per packet: 421 -> 429 Cycles per packet: 156 -> 161 Instructions per cycle: 2.70 -> 2.67 UDP pktgen, 64-byte packets, single stream, MPWQE on: Packet rate: 16.96 Mpps (±0.12 Mpps) -> 20.94 Mpps (±0.33 Mpps) Instructions per packet: 421 -> 329 Cycles per packet: 156 -> 123 Instructions per cycle: 2.70 -> 2.67 Enabling MPWQE can reduce PCI bandwidth: PCI Gen2, pktgen at fixed rate of 36864000 pps on 24 CPU cores: Inbound PCI utilization with MPWQE off: 80.3% Inbound PCI utilization with MPWQE on: 59.0% PCI Gen3, pktgen at fixed rate of 56064000 pps on 24 CPU cores: Inbound PCI utilization with MPWQE off: 65.4% Inbound PCI utilization with MPWQE on: 49.3% Enabling MPWQE can also reduce CPU load, increasing the packet rate in case of CPU bottleneck: PCI Gen2, pktgen at full rate on 24 CPU cores: Packet rate with MPWQE off: 37.5 Mpps Packet rate with MPWQE on: 49.0 Mpps PCI Gen3, pktgen at full rate on 24 CPU cores: Packet rate with MPWQE off: 57.0 Mpps Packet rate with MPWQE on: 66.8 Mpps Burst size in all pktgen tests is 32. CPU: Intel(R) Xeon(R) CPU E5-2680 v3 @ 2.50GHz (x86_64) NIC: Mellanox ConnectX-6 Dx GCC 10.2.0 Signed-off-by: Maxim Mikityanskiy <maximmi@mellanox.com> Reviewed-by: Tariq Toukan <tariqt@mellanox.com> Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5/core/en_stats.c')
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.c6
1 files changed, 6 insertions, 0 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index 6d5e54b964c0..c580f8b8c242 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -98,6 +98,8 @@ static const struct counter_desc sw_stats_desc[] = {
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_inner_bytes) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_added_vlan_packets) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_nop) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_mpwqe_blks) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_mpwqe_pkts) },
#ifdef CONFIG_MLX5_EN_TLS
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_encrypted_packets) },
@@ -353,6 +355,8 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(sw)
s->tx_tso_inner_bytes += sq_stats->tso_inner_bytes;
s->tx_added_vlan_packets += sq_stats->added_vlan_packets;
s->tx_nop += sq_stats->nop;
+ s->tx_mpwqe_blks += sq_stats->mpwqe_blks;
+ s->tx_mpwqe_pkts += sq_stats->mpwqe_pkts;
s->tx_queue_stopped += sq_stats->stopped;
s->tx_queue_wake += sq_stats->wake;
s->tx_queue_dropped += sq_stats->dropped;
@@ -1556,6 +1560,8 @@ static const struct counter_desc sq_stats_desc[] = {
{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_partial_inner) },
{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, added_vlan_packets) },
{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, nop) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, mpwqe_blks) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, mpwqe_pkts) },
#ifdef CONFIG_MLX5_EN_TLS
{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_packets) },
{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_bytes) },