summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2024-05-30 17:14:47 -0700
committerJakub Kicinski <kuba@kernel.org>2024-05-30 17:14:48 -0700
commit030fae30f0596ae1502cc7fc2557721617adaa7c (patch)
tree3b14a66016e14b491088cfe1bf6dbe457d21c0f7
parent95cd03f32a1680f693b291da920ab5d3f9d8c5c1 (diff)
parenta5602c6edf7cbf5d69a32f089fb3938f42a3ff03 (diff)
Merge branch 'mlx4-add-support-for-netdev-genl-api'
Joe Damato says: ==================== mlx4: Add support for netdev-genl API There are no functional changes from v5, which I mistakenly sent right after net-next was closed (oops). This revision, however, includes Tariq's Reviewed-by tags of the v5 in each commit message. See the changelog below. This series adds support to mlx4 for the netdev-genl API which makes it much easier for users and user programs to map NAPI IDs back to ifindexes, queues, and IRQs. This is extremely useful for a number of use cases, including epoll-based busy poll. In addition, this series includes a patch to generate per-queue statistics using the netlink API, as well. To facilitate the stats, patch 1/3 adds a field "alloc_fail" to the ring structure. This is incremented by the driver in an appropriate place and used in patch 3/3 as alloc_fail. Please note: I do not have access to mlx4 hardware, but I've been working closely with Martin Karsten from University of Waterloo (CC'd) who has very graciously tested my patches on their mlx4 hardware (hence his Tested-by attribution in each commit). His latest research work is particularly interesting [1] and this series helps to support that (and future) work. Martin re-test v4 using Jakub's suggested tool [2] and the stats.pkt_byte_sum and stats.qstat_by_ifindex tests passed. He also adjusted the queue count and re-ran test to confirm it still passed even if the queue count was modified. [1]: https://dl.acm.org/doi/pdf/10.1145/3626780 [2]: https://lore.kernel.org/lkml/20240423175718.4ad4dc5a@kernel.org/ ==================== Link: https://lore.kernel.org/r/20240528181139.515070-1-jdamato@fastly.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_cq.c14
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_netdev.c74
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_rx.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4_en.h2
4 files changed, 93 insertions, 1 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
index 1184ac5751e1..461cc2c79c71 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
@@ -126,6 +126,7 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq,
cq_idx = cq_idx % priv->rx_ring_num;
rx_cq = priv->rx_cq[cq_idx];
cq->vector = rx_cq->vector;
+ irq = mlx4_eq_get_irq(mdev->dev, cq->vector);
}
if (cq->type == RX)
@@ -142,18 +143,23 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq,
if (err)
goto free_eq;
+ cq->cq_idx = cq_idx;
cq->mcq.event = mlx4_en_cq_event;
switch (cq->type) {
case TX:
cq->mcq.comp = mlx4_en_tx_irq;
netif_napi_add_tx(cq->dev, &cq->napi, mlx4_en_poll_tx_cq);
+ netif_napi_set_irq(&cq->napi, irq);
napi_enable(&cq->napi);
+ netif_queue_set_napi(cq->dev, cq_idx, NETDEV_QUEUE_TYPE_TX, &cq->napi);
break;
case RX:
cq->mcq.comp = mlx4_en_rx_irq;
netif_napi_add(cq->dev, &cq->napi, mlx4_en_poll_rx_cq);
+ netif_napi_set_irq(&cq->napi, irq);
napi_enable(&cq->napi);
+ netif_queue_set_napi(cq->dev, cq_idx, NETDEV_QUEUE_TYPE_RX, &cq->napi);
break;
case TX_XDP:
/* nothing regarding napi, it's shared with rx ring */
@@ -189,6 +195,14 @@ void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq)
void mlx4_en_deactivate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq)
{
if (cq->type != TX_XDP) {
+ enum netdev_queue_type qtype;
+
+ if (cq->type == RX)
+ qtype = NETDEV_QUEUE_TYPE_RX;
+ else
+ qtype = NETDEV_QUEUE_TYPE_TX;
+
+ netif_queue_set_napi(cq->dev, cq->cq_idx, qtype, NULL);
napi_disable(&cq->napi);
netif_napi_del(&cq->napi);
}
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 4c089cfa027a..281b34af0bb4 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -43,6 +43,7 @@
#include <net/vxlan.h>
#include <net/devlink.h>
#include <net/rps.h>
+#include <net/netdev_queues.h>
#include <linux/mlx4/driver.h>
#include <linux/mlx4/device.h>
@@ -2073,6 +2074,7 @@ static void mlx4_en_clear_stats(struct net_device *dev)
priv->rx_ring[i]->csum_ok = 0;
priv->rx_ring[i]->csum_none = 0;
priv->rx_ring[i]->csum_complete = 0;
+ priv->rx_ring[i]->alloc_fail = 0;
}
}
@@ -3099,6 +3101,77 @@ void mlx4_en_set_stats_bitmap(struct mlx4_dev *dev,
last_i += NUM_PHY_STATS;
}
+static void mlx4_get_queue_stats_rx(struct net_device *dev, int i,
+ struct netdev_queue_stats_rx *stats)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ const struct mlx4_en_rx_ring *ring;
+
+ spin_lock_bh(&priv->stats_lock);
+
+ if (!priv->port_up || mlx4_is_master(priv->mdev->dev))
+ goto out_unlock;
+
+ ring = priv->rx_ring[i];
+ stats->packets = READ_ONCE(ring->packets);
+ stats->bytes = READ_ONCE(ring->bytes);
+ stats->alloc_fail = READ_ONCE(ring->alloc_fail);
+
+out_unlock:
+ spin_unlock_bh(&priv->stats_lock);
+}
+
+static void mlx4_get_queue_stats_tx(struct net_device *dev, int i,
+ struct netdev_queue_stats_tx *stats)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ const struct mlx4_en_tx_ring *ring;
+
+ spin_lock_bh(&priv->stats_lock);
+
+ if (!priv->port_up || mlx4_is_master(priv->mdev->dev))
+ goto out_unlock;
+
+ ring = priv->tx_ring[TX][i];
+ stats->packets = READ_ONCE(ring->packets);
+ stats->bytes = READ_ONCE(ring->bytes);
+
+out_unlock:
+ spin_unlock_bh(&priv->stats_lock);
+}
+
+static void mlx4_get_base_stats(struct net_device *dev,
+ struct netdev_queue_stats_rx *rx,
+ struct netdev_queue_stats_tx *tx)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+
+ spin_lock_bh(&priv->stats_lock);
+
+ if (!priv->port_up || mlx4_is_master(priv->mdev->dev))
+ goto out_unlock;
+
+ if (priv->rx_ring_num) {
+ rx->packets = 0;
+ rx->bytes = 0;
+ rx->alloc_fail = 0;
+ }
+
+ if (priv->tx_ring_num[TX]) {
+ tx->packets = 0;
+ tx->bytes = 0;
+ }
+
+out_unlock:
+ spin_unlock_bh(&priv->stats_lock);
+}
+
+static const struct netdev_stat_ops mlx4_stat_ops = {
+ .get_queue_stats_rx = mlx4_get_queue_stats_rx,
+ .get_queue_stats_tx = mlx4_get_queue_stats_tx,
+ .get_base_stats = mlx4_get_base_stats,
+};
+
int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
struct mlx4_en_port_profile *prof)
{
@@ -3262,6 +3335,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
netif_set_real_num_tx_queues(dev, priv->tx_ring_num[TX]);
netif_set_real_num_rx_queues(dev, priv->rx_ring_num);
+ dev->stat_ops = &mlx4_stat_ops;
dev->ethtool_ops = &mlx4_en_ethtool_ops;
/*
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index 8328df8645d5..15c57e9517e9 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -82,8 +82,10 @@ static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv,
for (i = 0; i < priv->num_frags; i++, frags++) {
if (!frags->page) {
- if (mlx4_alloc_page(priv, frags, gfp))
+ if (mlx4_alloc_page(priv, frags, gfp)) {
+ ring->alloc_fail++;
return -ENOMEM;
+ }
ring->rx_alloc_pages++;
}
rx_desc->data[i].addr = cpu_to_be64(frags->dma +
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index efe3f97b874f..28b70dcc652e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -355,6 +355,7 @@ struct mlx4_en_rx_ring {
unsigned long xdp_tx;
unsigned long xdp_tx_full;
unsigned long dropped;
+ unsigned long alloc_fail;
int hwtstamp_rx_filter;
cpumask_var_t affinity_mask;
struct xdp_rxq_info xdp_rxq;
@@ -379,6 +380,7 @@ struct mlx4_en_cq {
#define MLX4_EN_OPCODE_ERROR 0x1e
const struct cpumask *aff_mask;
+ int cq_idx;
};
struct mlx4_en_port_profile {