summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/google
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/ethernet/google')
-rw-r--r--drivers/net/ethernet/google/gve/gve.h112
-rw-r--r--drivers/net/ethernet/google/gve/gve_adminq.c8
-rw-r--r--drivers/net/ethernet/google/gve/gve_adminq.h4
-rw-r--r--drivers/net/ethernet/google/gve/gve_ethtool.c91
-rw-r--r--drivers/net/ethernet/google/gve/gve_main.c719
-rw-r--r--drivers/net/ethernet/google/gve/gve_rx.c147
-rw-r--r--drivers/net/ethernet/google/gve/gve_rx_dqo.c2
-rw-r--r--drivers/net/ethernet/google/gve/gve_tx.c298
-rw-r--r--drivers/net/ethernet/google/gve/gve_utils.c6
-rw-r--r--drivers/net/ethernet/google/gve/gve_utils.h3
10 files changed, 1252 insertions, 138 deletions
diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h
index 64eb0442c82f..e214b51d3c8b 100644
--- a/drivers/net/ethernet/google/gve/gve.h
+++ b/drivers/net/ethernet/google/gve/gve.h
@@ -47,6 +47,10 @@
#define GVE_RX_BUFFER_SIZE_DQO 2048
+#define GVE_XDP_ACTIONS 5
+
+#define GVE_TX_MAX_HEADER_SIZE 182
+
/* Each slot in the desc ring has a 1:1 mapping to a slot in the data ring */
struct gve_rx_desc_queue {
struct gve_rx_desc *desc_ring; /* the descriptor ring */
@@ -230,7 +234,10 @@ struct gve_rx_ring {
u64 rx_frag_flip_cnt; /* free-running count of rx segments where page_flip was used */
u64 rx_frag_copy_cnt; /* free-running count of rx segments copied */
u64 rx_frag_alloc_cnt; /* free-running count of rx page allocations */
-
+ u64 xdp_tx_errors;
+ u64 xdp_redirect_errors;
+ u64 xdp_alloc_fails;
+ u64 xdp_actions[GVE_XDP_ACTIONS];
u32 q_num; /* queue index */
u32 ntfy_id; /* notification block index */
struct gve_queue_resources *q_resources; /* head and tail pointer idx */
@@ -238,6 +245,12 @@ struct gve_rx_ring {
struct u64_stats_sync statss; /* sync stats for 32bit archs */
struct gve_rx_ctx ctx; /* Info for packet currently being processed in this ring. */
+
+ /* XDP stuff */
+ struct xdp_rxq_info xdp_rxq;
+ struct xdp_rxq_info xsk_rxq;
+ struct xsk_buff_pool *xsk_pool;
+ struct page_frag_cache page_cache; /* Page cache to allocate XDP frames */
};
/* A TX desc ring entry */
@@ -258,7 +271,14 @@ struct gve_tx_iovec {
* ring entry but only used for a pkt_desc not a seg_desc
*/
struct gve_tx_buffer_state {
- struct sk_buff *skb; /* skb for this pkt */
+ union {
+ struct sk_buff *skb; /* skb for this pkt */
+ struct xdp_frame *xdp_frame; /* xdp_frame */
+ };
+ struct {
+ u16 size; /* size of xmitted xdp pkt */
+ u8 is_xsk; /* xsk buff */
+ } xdp;
union {
struct gve_tx_iovec iov[GVE_TX_MAX_IOVEC]; /* segments of this pkt */
struct {
@@ -373,6 +393,8 @@ struct gve_tx_ring {
struct {
/* Spinlock for when cleanup in progress */
spinlock_t clean_lock;
+ /* Spinlock for XDP tx traffic */
+ spinlock_t xdp_lock;
};
/* DQO fields. */
@@ -450,6 +472,12 @@ struct gve_tx_ring {
dma_addr_t q_resources_bus; /* dma address of the queue resources */
dma_addr_t complq_bus_dqo; /* dma address of the dqo.compl_ring */
struct u64_stats_sync statss; /* sync stats for 32bit archs */
+ struct xsk_buff_pool *xsk_pool;
+ u32 xdp_xsk_wakeup;
+ u32 xdp_xsk_done;
+ u64 xdp_xsk_sent;
+ u64 xdp_xmit;
+ u64 xdp_xmit_errors;
} ____cacheline_aligned;
/* Wraps the info for one irq including the napi struct and the queues
@@ -526,9 +554,11 @@ struct gve_priv {
u16 rx_data_slot_cnt; /* rx buffer length */
u64 max_registered_pages;
u64 num_registered_pages; /* num pages registered with NIC */
+ struct bpf_prog *xdp_prog; /* XDP BPF program */
u32 rx_copybreak; /* copy packets smaller than this */
u16 default_num_queues; /* default num queues to set up */
+ u16 num_xdp_queues;
struct gve_queue_config tx_cfg;
struct gve_queue_config rx_cfg;
struct gve_qpl_config qpl_cfg; /* map used QPL ids */
@@ -785,7 +815,17 @@ static inline u32 gve_num_tx_qpls(struct gve_priv *priv)
if (priv->queue_format != GVE_GQI_QPL_FORMAT)
return 0;
- return priv->tx_cfg.num_queues;
+ return priv->tx_cfg.num_queues + priv->num_xdp_queues;
+}
+
+/* Returns the number of XDP tx queue page lists
+ */
+static inline u32 gve_num_xdp_qpls(struct gve_priv *priv)
+{
+ if (priv->queue_format != GVE_GQI_QPL_FORMAT)
+ return 0;
+
+ return priv->num_xdp_queues;
}
/* Returns the number of rx queue page lists
@@ -798,16 +838,35 @@ static inline u32 gve_num_rx_qpls(struct gve_priv *priv)
return priv->rx_cfg.num_queues;
}
+static inline u32 gve_tx_qpl_id(struct gve_priv *priv, int tx_qid)
+{
+ return tx_qid;
+}
+
+static inline u32 gve_rx_qpl_id(struct gve_priv *priv, int rx_qid)
+{
+ return priv->tx_cfg.max_queues + rx_qid;
+}
+
+static inline u32 gve_tx_start_qpl_id(struct gve_priv *priv)
+{
+ return gve_tx_qpl_id(priv, 0);
+}
+
+static inline u32 gve_rx_start_qpl_id(struct gve_priv *priv)
+{
+ return gve_rx_qpl_id(priv, 0);
+}
+
/* Returns a pointer to the next available tx qpl in the list of qpls
*/
static inline
-struct gve_queue_page_list *gve_assign_tx_qpl(struct gve_priv *priv)
+struct gve_queue_page_list *gve_assign_tx_qpl(struct gve_priv *priv, int tx_qid)
{
- int id = find_first_zero_bit(priv->qpl_cfg.qpl_id_map,
- priv->qpl_cfg.qpl_map_size);
+ int id = gve_tx_qpl_id(priv, tx_qid);
- /* we are out of tx qpls */
- if (id >= gve_num_tx_qpls(priv))
+ /* QPL already in use */
+ if (test_bit(id, priv->qpl_cfg.qpl_id_map))
return NULL;
set_bit(id, priv->qpl_cfg.qpl_id_map);
@@ -817,14 +876,12 @@ struct gve_queue_page_list *gve_assign_tx_qpl(struct gve_priv *priv)
/* Returns a pointer to the next available rx qpl in the list of qpls
*/
static inline
-struct gve_queue_page_list *gve_assign_rx_qpl(struct gve_priv *priv)
+struct gve_queue_page_list *gve_assign_rx_qpl(struct gve_priv *priv, int rx_qid)
{
- int id = find_next_zero_bit(priv->qpl_cfg.qpl_id_map,
- priv->qpl_cfg.qpl_map_size,
- gve_num_tx_qpls(priv));
+ int id = gve_rx_qpl_id(priv, rx_qid);
- /* we are out of rx qpls */
- if (id == gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv))
+ /* QPL already in use */
+ if (test_bit(id, priv->qpl_cfg.qpl_id_map))
return NULL;
set_bit(id, priv->qpl_cfg.qpl_id_map);
@@ -843,7 +900,7 @@ static inline void gve_unassign_qpl(struct gve_priv *priv, int id)
static inline enum dma_data_direction gve_qpl_dma_dir(struct gve_priv *priv,
int id)
{
- if (id < gve_num_tx_qpls(priv))
+ if (id < gve_rx_start_qpl_id(priv))
return DMA_TO_DEVICE;
else
return DMA_FROM_DEVICE;
@@ -855,6 +912,21 @@ static inline bool gve_is_gqi(struct gve_priv *priv)
priv->queue_format == GVE_GQI_QPL_FORMAT;
}
+static inline u32 gve_num_tx_queues(struct gve_priv *priv)
+{
+ return priv->tx_cfg.num_queues + priv->num_xdp_queues;
+}
+
+static inline u32 gve_xdp_tx_queue_id(struct gve_priv *priv, u32 queue_id)
+{
+ return priv->tx_cfg.num_queues + queue_id;
+}
+
+static inline u32 gve_xdp_tx_start_queue_id(struct gve_priv *priv)
+{
+ return gve_xdp_tx_queue_id(priv, 0);
+}
+
/* buffers */
int gve_alloc_page(struct gve_priv *priv, struct device *dev,
struct page **page, dma_addr_t *dma,
@@ -863,9 +935,15 @@ void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma,
enum dma_data_direction);
/* tx handling */
netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev);
+int gve_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
+ u32 flags);
+int gve_xdp_xmit_one(struct gve_priv *priv, struct gve_tx_ring *tx,
+ void *data, int len, void *frame_p);
+void gve_xdp_tx_flush(struct gve_priv *priv, u32 xdp_qid);
bool gve_tx_poll(struct gve_notify_block *block, int budget);
-int gve_tx_alloc_rings(struct gve_priv *priv);
-void gve_tx_free_rings_gqi(struct gve_priv *priv);
+bool gve_xdp_poll(struct gve_notify_block *block, int budget);
+int gve_tx_alloc_rings(struct gve_priv *priv, int start_id, int num_rings);
+void gve_tx_free_rings_gqi(struct gve_priv *priv, int start_id, int num_rings);
u32 gve_tx_load_event_counter(struct gve_priv *priv,
struct gve_tx_ring *tx);
bool gve_tx_clean_pending(struct gve_priv *priv, struct gve_tx_ring *tx);
diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c
index 60061288ad9d..252974202a3f 100644
--- a/drivers/net/ethernet/google/gve/gve_adminq.c
+++ b/drivers/net/ethernet/google/gve/gve_adminq.c
@@ -516,12 +516,12 @@ static int gve_adminq_create_tx_queue(struct gve_priv *priv, u32 queue_index)
return gve_adminq_issue_cmd(priv, &cmd);
}
-int gve_adminq_create_tx_queues(struct gve_priv *priv, u32 num_queues)
+int gve_adminq_create_tx_queues(struct gve_priv *priv, u32 start_id, u32 num_queues)
{
int err;
int i;
- for (i = 0; i < num_queues; i++) {
+ for (i = start_id; i < start_id + num_queues; i++) {
err = gve_adminq_create_tx_queue(priv, i);
if (err)
return err;
@@ -604,12 +604,12 @@ static int gve_adminq_destroy_tx_queue(struct gve_priv *priv, u32 queue_index)
return 0;
}
-int gve_adminq_destroy_tx_queues(struct gve_priv *priv, u32 num_queues)
+int gve_adminq_destroy_tx_queues(struct gve_priv *priv, u32 start_id, u32 num_queues)
{
int err;
int i;
- for (i = 0; i < num_queues; i++) {
+ for (i = start_id; i < start_id + num_queues; i++) {
err = gve_adminq_destroy_tx_queue(priv, i);
if (err)
return err;
diff --git a/drivers/net/ethernet/google/gve/gve_adminq.h b/drivers/net/ethernet/google/gve/gve_adminq.h
index cf29662e6ad1..f894beb3deaf 100644
--- a/drivers/net/ethernet/google/gve/gve_adminq.h
+++ b/drivers/net/ethernet/google/gve/gve_adminq.h
@@ -410,8 +410,8 @@ int gve_adminq_configure_device_resources(struct gve_priv *priv,
dma_addr_t db_array_bus_addr,
u32 num_ntfy_blks);
int gve_adminq_deconfigure_device_resources(struct gve_priv *priv);
-int gve_adminq_create_tx_queues(struct gve_priv *priv, u32 num_queues);
-int gve_adminq_destroy_tx_queues(struct gve_priv *priv, u32 queue_id);
+int gve_adminq_create_tx_queues(struct gve_priv *priv, u32 start_id, u32 num_queues);
+int gve_adminq_destroy_tx_queues(struct gve_priv *priv, u32 start_id, u32 num_queues);
int gve_adminq_create_rx_queues(struct gve_priv *priv, u32 num_queues);
int gve_adminq_destroy_rx_queues(struct gve_priv *priv, u32 queue_id);
int gve_adminq_register_page_list(struct gve_priv *priv,
diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c
index 5f81470843b4..cfd4b8d284d1 100644
--- a/drivers/net/ethernet/google/gve/gve_ethtool.c
+++ b/drivers/net/ethernet/google/gve/gve_ethtool.c
@@ -34,6 +34,11 @@ static u32 gve_get_msglevel(struct net_device *netdev)
return priv->msg_enable;
}
+/* For the following stats column string names, make sure the order
+ * matches how it is filled in the code. For xdp_aborted, xdp_drop,
+ * xdp_pass, xdp_tx, xdp_redirect, make sure it also matches the order
+ * as declared in enum xdp_action inside file uapi/linux/bpf.h .
+ */
static const char gve_gstrings_main_stats[][ETH_GSTRING_LEN] = {
"rx_packets", "tx_packets", "rx_bytes", "tx_bytes",
"rx_dropped", "tx_dropped", "tx_timeouts",
@@ -49,12 +54,16 @@ static const char gve_gstrings_rx_stats[][ETH_GSTRING_LEN] = {
"rx_dropped_pkt[%u]", "rx_copybreak_pkt[%u]", "rx_copied_pkt[%u]",
"rx_queue_drop_cnt[%u]", "rx_no_buffers_posted[%u]",
"rx_drops_packet_over_mru[%u]", "rx_drops_invalid_checksum[%u]",
+ "rx_xdp_aborted[%u]", "rx_xdp_drop[%u]", "rx_xdp_pass[%u]",
+ "rx_xdp_tx[%u]", "rx_xdp_redirect[%u]",
+ "rx_xdp_tx_errors[%u]", "rx_xdp_redirect_errors[%u]", "rx_xdp_alloc_fails[%u]",
};
static const char gve_gstrings_tx_stats[][ETH_GSTRING_LEN] = {
"tx_posted_desc[%u]", "tx_completed_desc[%u]", "tx_consumed_desc[%u]", "tx_bytes[%u]",
"tx_wake[%u]", "tx_stop[%u]", "tx_event_counter[%u]",
- "tx_dma_mapping_error[%u]",
+ "tx_dma_mapping_error[%u]", "tx_xsk_wakeup[%u]",
+ "tx_xsk_done[%u]", "tx_xsk_sent[%u]", "tx_xdp_xmit[%u]", "tx_xdp_xmit_errors[%u]"
};
static const char gve_gstrings_adminq_stats[][ETH_GSTRING_LEN] = {
@@ -81,8 +90,10 @@ static void gve_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
{
struct gve_priv *priv = netdev_priv(netdev);
char *s = (char *)data;
+ int num_tx_queues;
int i, j;
+ num_tx_queues = gve_num_tx_queues(priv);
switch (stringset) {
case ETH_SS_STATS:
memcpy(s, *gve_gstrings_main_stats,
@@ -97,7 +108,7 @@ static void gve_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
}
}
- for (i = 0; i < priv->tx_cfg.num_queues; i++) {
+ for (i = 0; i < num_tx_queues; i++) {
for (j = 0; j < NUM_GVE_TX_CNTS; j++) {
snprintf(s, ETH_GSTRING_LEN,
gve_gstrings_tx_stats[j], i);
@@ -124,12 +135,14 @@ static void gve_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
static int gve_get_sset_count(struct net_device *netdev, int sset)
{
struct gve_priv *priv = netdev_priv(netdev);
+ int num_tx_queues;
+ num_tx_queues = gve_num_tx_queues(priv);
switch (sset) {
case ETH_SS_STATS:
return GVE_MAIN_STATS_LEN + GVE_ADMINQ_STATS_LEN +
(priv->rx_cfg.num_queues * NUM_GVE_RX_CNTS) +
- (priv->tx_cfg.num_queues * NUM_GVE_TX_CNTS);
+ (num_tx_queues * NUM_GVE_TX_CNTS);
case ETH_SS_PRIV_FLAGS:
return GVE_PRIV_FLAGS_STR_LEN;
default:
@@ -153,18 +166,20 @@ gve_get_ethtool_stats(struct net_device *netdev,
struct gve_priv *priv;
bool skip_nic_stats;
unsigned int start;
+ int num_tx_queues;
int ring;
int i, j;
ASSERT_RTNL();
priv = netdev_priv(netdev);
+ num_tx_queues = gve_num_tx_queues(priv);
report_stats = priv->stats_report->stats;
rx_qid_to_stats_idx = kmalloc_array(priv->rx_cfg.num_queues,
sizeof(int), GFP_KERNEL);
if (!rx_qid_to_stats_idx)
return;
- tx_qid_to_stats_idx = kmalloc_array(priv->tx_cfg.num_queues,
+ tx_qid_to_stats_idx = kmalloc_array(num_tx_queues,
sizeof(int), GFP_KERNEL);
if (!tx_qid_to_stats_idx) {
kfree(rx_qid_to_stats_idx);
@@ -195,7 +210,7 @@ gve_get_ethtool_stats(struct net_device *netdev,
}
}
for (tx_pkts = 0, tx_bytes = 0, tx_dropped = 0, ring = 0;
- ring < priv->tx_cfg.num_queues; ring++) {
+ ring < num_tx_queues; ring++) {
if (priv->tx) {
do {
start =
@@ -232,7 +247,7 @@ gve_get_ethtool_stats(struct net_device *netdev,
i = GVE_MAIN_STATS_LEN;
/* For rx cross-reporting stats, start from nic rx stats in report */
- base_stats_idx = GVE_TX_STATS_REPORT_NUM * priv->tx_cfg.num_queues +
+ base_stats_idx = GVE_TX_STATS_REPORT_NUM * num_tx_queues +
GVE_RX_STATS_REPORT_NUM * priv->rx_cfg.num_queues;
max_stats_idx = NIC_RX_STATS_REPORT_NUM * priv->rx_cfg.num_queues +
base_stats_idx;
@@ -283,14 +298,26 @@ gve_get_ethtool_stats(struct net_device *netdev,
if (skip_nic_stats) {
/* skip NIC rx stats */
i += NIC_RX_STATS_REPORT_NUM;
- continue;
- }
- for (j = 0; j < NIC_RX_STATS_REPORT_NUM; j++) {
- u64 value =
- be64_to_cpu(report_stats[rx_qid_to_stats_idx[ring] + j].value);
+ } else {
+ stats_idx = rx_qid_to_stats_idx[ring];
+ for (j = 0; j < NIC_RX_STATS_REPORT_NUM; j++) {
+ u64 value =
+ be64_to_cpu(report_stats[stats_idx + j].value);
- data[i++] = value;
+ data[i++] = value;
+ }
}
+ /* XDP rx counters */
+ do {
+ start = u64_stats_fetch_begin(&priv->rx[ring].statss);
+ for (j = 0; j < GVE_XDP_ACTIONS; j++)
+ data[i + j] = rx->xdp_actions[j];
+ data[i + j++] = rx->xdp_tx_errors;
+ data[i + j++] = rx->xdp_redirect_errors;
+ data[i + j++] = rx->xdp_alloc_fails;
+ } while (u64_stats_fetch_retry(&priv->rx[ring].statss,
+ start));
+ i += GVE_XDP_ACTIONS + 3; /* XDP rx counters */
}
} else {
i += priv->rx_cfg.num_queues * NUM_GVE_RX_CNTS;
@@ -298,7 +325,7 @@ gve_get_ethtool_stats(struct net_device *netdev,
/* For tx cross-reporting stats, start from nic tx stats in report */
base_stats_idx = max_stats_idx;
- max_stats_idx = NIC_TX_STATS_REPORT_NUM * priv->tx_cfg.num_queues +
+ max_stats_idx = NIC_TX_STATS_REPORT_NUM * num_tx_queues +
max_stats_idx;
/* Preprocess the stats report for tx, map queue id to start index */
skip_nic_stats = false;
@@ -316,7 +343,7 @@ gve_get_ethtool_stats(struct net_device *netdev,
}
/* walk TX rings */
if (priv->tx) {
- for (ring = 0; ring < priv->tx_cfg.num_queues; ring++) {
+ for (ring = 0; ring < num_tx_queues; ring++) {
struct gve_tx_ring *tx = &priv->tx[ring];
if (gve_is_gqi(priv)) {
@@ -346,16 +373,28 @@ gve_get_ethtool_stats(struct net_device *netdev,
if (skip_nic_stats) {
/* skip NIC tx stats */
i += NIC_TX_STATS_REPORT_NUM;
- continue;
- }
- for (j = 0; j < NIC_TX_STATS_REPORT_NUM; j++) {
- u64 value =
- be64_to_cpu(report_stats[tx_qid_to_stats_idx[ring] + j].value);
- data[i++] = value;
+ } else {
+ stats_idx = tx_qid_to_stats_idx[ring];
+ for (j = 0; j < NIC_TX_STATS_REPORT_NUM; j++) {
+ u64 value =
+ be64_to_cpu(report_stats[stats_idx + j].value);
+ data[i++] = value;
+ }
}
+ /* XDP xsk counters */
+ data[i++] = tx->xdp_xsk_wakeup;
+ data[i++] = tx->xdp_xsk_done;
+ do {
+ start = u64_stats_fetch_begin(&priv->tx[ring].statss);
+ data[i] = tx->xdp_xsk_sent;
+ data[i + 1] = tx->xdp_xmit;
+ data[i + 2] = tx->xdp_xmit_errors;
+ } while (u64_stats_fetch_retry(&priv->tx[ring].statss,
+ start));
+ i += 3; /* XDP tx counters */
}
} else {
- i += priv->tx_cfg.num_queues * NUM_GVE_TX_CNTS;
+ i += num_tx_queues * NUM_GVE_TX_CNTS;
}
kfree(rx_qid_to_stats_idx);
@@ -412,6 +451,12 @@ static int gve_set_channels(struct net_device *netdev,
if (!new_rx || !new_tx)
return -EINVAL;
+ if (priv->num_xdp_queues &&
+ (new_tx != new_rx || (2 * new_tx > priv->tx_cfg.max_queues))) {
+ dev_err(&priv->pdev->dev, "XDP load failed: The number of configured RX queues should be equal to the number of configured TX queues and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues");
+ return -EINVAL;
+ }
+
if (!netif_carrier_ok(netdev)) {
priv->tx_cfg.num_queues = new_tx;
priv->rx_cfg.num_queues = new_rx;
@@ -502,7 +547,9 @@ static int gve_set_priv_flags(struct net_device *netdev, u32 flags)
{
struct gve_priv *priv = netdev_priv(netdev);
u64 ori_flags, new_flags;
+ int num_tx_queues;
+ num_tx_queues = gve_num_tx_queues(priv);
ori_flags = READ_ONCE(priv->ethtool_flags);
new_flags = ori_flags;
@@ -522,7 +569,7 @@ static int gve_set_priv_flags(struct net_device *netdev, u32 flags)
/* delete report stats timer. */
if (!(flags & BIT(0)) && (ori_flags & BIT(0))) {
int tx_stats_num = GVE_TX_STATS_REPORT_NUM *
- priv->tx_cfg.num_queues;
+ num_tx_queues;
int rx_stats_num = GVE_RX_STATS_REPORT_NUM *
priv->rx_cfg.num_queues;
diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
index 07111c241e0e..57ce74315eba 100644
--- a/drivers/net/ethernet/google/gve/gve_main.c
+++ b/drivers/net/ethernet/google/gve/gve_main.c
@@ -4,8 +4,10 @@
* Copyright (C) 2015-2021 Google, Inc.
*/
+#include <linux/bpf.h>
#include <linux/cpumask.h>
#include <linux/etherdevice.h>
+#include <linux/filter.h>
#include <linux/interrupt.h>
#include <linux/module.h>
#include <linux/pci.h>
@@ -15,6 +17,7 @@
#include <linux/utsname.h>
#include <linux/version.h>
#include <net/sch_generic.h>
+#include <net/xdp_sock_drv.h>
#include "gve.h"
#include "gve_dqo.h"
#include "gve_adminq.h"
@@ -90,8 +93,10 @@ static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s)
struct gve_priv *priv = netdev_priv(dev);
unsigned int start;
u64 packets, bytes;
+ int num_tx_queues;
int ring;
+ num_tx_queues = gve_num_tx_queues(priv);
if (priv->rx) {
for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) {
do {
@@ -106,7 +111,7 @@ static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s)
}
}
if (priv->tx) {
- for (ring = 0; ring < priv->tx_cfg.num_queues; ring++) {
+ for (ring = 0; ring < num_tx_queues; ring++) {
do {
start =
u64_stats_fetch_begin(&priv->tx[ring].statss);
@@ -180,7 +185,7 @@ static int gve_alloc_stats_report(struct gve_priv *priv)
int tx_stats_num, rx_stats_num;
tx_stats_num = (GVE_TX_STATS_REPORT_NUM + NIC_TX_STATS_REPORT_NUM) *
- priv->tx_cfg.num_queues;
+ gve_num_tx_queues(priv);
rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) *
priv->rx_cfg.num_queues;
priv->stats_report_len = struct_size(priv->stats_report, stats,
@@ -245,8 +250,13 @@ static int gve_napi_poll(struct napi_struct *napi, int budget)
block = container_of(napi, struct gve_notify_block, napi);
priv = block->priv;
- if (block->tx)
- reschedule |= gve_tx_poll(block, budget);
+ if (block->tx) {
+ if (block->tx->q_num < priv->tx_cfg.num_queues)
+ reschedule |= gve_tx_poll(block, budget);
+ else
+ reschedule |= gve_xdp_poll(block, budget);
+ }
+
if (block->rx) {
work_done = gve_rx_poll(block, budget);
reschedule |= work_done == budget;
@@ -580,13 +590,36 @@ static void gve_remove_napi(struct gve_priv *priv, int ntfy_idx)
netif_napi_del(&block->napi);
}
+static int gve_register_xdp_qpls(struct gve_priv *priv)
+{
+ int start_id;
+ int err;
+ int i;
+
+ start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv));
+ for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) {
+ err = gve_adminq_register_page_list(priv, &priv->qpls[i]);
+ if (err) {
+ netif_err(priv, drv, priv->dev,
+ "failed to register queue page list %d\n",
+ priv->qpls[i].id);
+ /* This failure will trigger a reset - no need to clean
+ * up
+ */
+ return err;
+ }
+ }
+ return 0;
+}
+
static int gve_register_qpls(struct gve_priv *priv)
{
- int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
+ int start_id;
int err;
int i;
- for (i = 0; i < num_qpls; i++) {
+ start_id = gve_tx_start_qpl_id(priv);
+ for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) {
err = gve_adminq_register_page_list(priv, &priv->qpls[i]);
if (err) {
netif_err(priv, drv, priv->dev,
@@ -598,16 +631,63 @@ static int gve_register_qpls(struct gve_priv *priv)
return err;
}
}
+
+ start_id = gve_rx_start_qpl_id(priv);
+ for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) {
+ err = gve_adminq_register_page_list(priv, &priv->qpls[i]);
+ if (err) {
+ netif_err(priv, drv, priv->dev,
+ "failed to register queue page list %d\n",
+ priv->qpls[i].id);
+ /* This failure will trigger a reset - no need to clean
+ * up
+ */
+ return err;
+ }
+ }
+ return 0;
+}
+
+static int gve_unregister_xdp_qpls(struct gve_priv *priv)
+{
+ int start_id;
+ int err;
+ int i;
+
+ start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv));
+ for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) {
+ err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id);
+ /* This failure will trigger a reset - no need to clean up */
+ if (err) {
+ netif_err(priv, drv, priv->dev,
+ "Failed to unregister queue page list %d\n",
+ priv->qpls[i].id);
+ return err;
+ }
+ }
return 0;
}
static int gve_unregister_qpls(struct gve_priv *priv)
{
- int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
+ int start_id;
int err;
int i;
- for (i = 0; i < num_qpls; i++) {
+ start_id = gve_tx_start_qpl_id(priv);
+ for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) {
+ err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id);
+ /* This failure will trigger a reset - no need to clean up */
+ if (err) {
+ netif_err(priv, drv, priv->dev,
+ "Failed to unregister queue page list %d\n",
+ priv->qpls[i].id);
+ return err;
+ }
+ }
+
+ start_id = gve_rx_start_qpl_id(priv);
+ for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) {
err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id);
/* This failure will trigger a reset - no need to clean up */
if (err) {
@@ -620,22 +700,44 @@ static int gve_unregister_qpls(struct gve_priv *priv)
return 0;
}
+static int gve_create_xdp_rings(struct gve_priv *priv)
+{
+ int err;
+
+ err = gve_adminq_create_tx_queues(priv,
+ gve_xdp_tx_start_queue_id(priv),
+ priv->num_xdp_queues);
+ if (err) {
+ netif_err(priv, drv, priv->dev, "failed to create %d XDP tx queues\n",
+ priv->num_xdp_queues);
+ /* This failure will trigger a reset - no need to clean
+ * up
+ */
+ return err;
+ }
+ netif_dbg(priv, drv, priv->dev, "created %d XDP tx queues\n",
+ priv->num_xdp_queues);
+
+ return 0;
+}
+
static int gve_create_rings(struct gve_priv *priv)
{
+ int num_tx_queues = gve_num_tx_queues(priv);
int err;
int i;
- err = gve_adminq_create_tx_queues(priv, priv->tx_cfg.num_queues);
+ err = gve_adminq_create_tx_queues(priv, 0, num_tx_queues);
if (err) {
netif_err(priv, drv, priv->dev, "failed to create %d tx queues\n",
- priv->tx_cfg.num_queues);
+ num_tx_queues);
/* This failure will trigger a reset - no need to clean
* up
*/
return err;
}
netif_dbg(priv, drv, priv->dev, "created %d tx queues\n",
- priv->tx_cfg.num_queues);
+ num_tx_queues);
err = gve_adminq_create_rx_queues(priv, priv->rx_cfg.num_queues);
if (err) {
@@ -668,6 +770,23 @@ static int gve_create_rings(struct gve_priv *priv)
return 0;
}
+static void add_napi_init_xdp_sync_stats(struct gve_priv *priv,
+ int (*napi_poll)(struct napi_struct *napi,
+ int budget))
+{
+ int start_id = gve_xdp_tx_start_queue_id(priv);
+ int i;
+
+ /* Add xdp tx napi & init sync stats*/
+ for (i = start_id; i < start_id + priv->num_xdp_queues; i++) {
+ int ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
+
+ u64_stats_init(&priv->tx[i].statss);
+ priv->tx[i].ntfy_id = ntfy_idx;
+ gve_add_napi(priv, ntfy_idx, napi_poll);
+ }
+}
+
static void add_napi_init_sync_stats(struct gve_priv *priv,
int (*napi_poll)(struct napi_struct *napi,
int budget))
@@ -675,7 +794,7 @@ static void add_napi_init_sync_stats(struct gve_priv *priv,
int i;
/* Add tx napi & init sync stats*/
- for (i = 0; i < priv->tx_cfg.num_queues; i++) {
+ for (i = 0; i < gve_num_tx_queues(priv); i++) {
int ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
u64_stats_init(&priv->tx[i].statss);
@@ -692,34 +811,51 @@ static void add_napi_init_sync_stats(struct gve_priv *priv,
}
}
-static void gve_tx_free_rings(struct gve_priv *priv)
+static void gve_tx_free_rings(struct gve_priv *priv, int start_id, int num_rings)
{
if (gve_is_gqi(priv)) {
- gve_tx_free_rings_gqi(priv);
+ gve_tx_free_rings_gqi(priv, start_id, num_rings);
} else {
gve_tx_free_rings_dqo(priv);
}
}
+static int gve_alloc_xdp_rings(struct gve_priv *priv)
+{
+ int start_id;
+ int err = 0;
+
+ if (!priv->num_xdp_queues)
+ return 0;
+
+ start_id = gve_xdp_tx_start_queue_id(priv);
+ err = gve_tx_alloc_rings(priv, start_id, priv->num_xdp_queues);
+ if (err)
+ return err;
+ add_napi_init_xdp_sync_stats(priv, gve_napi_poll);
+
+ return 0;
+}
+
static int gve_alloc_rings(struct gve_priv *priv)
{
int err;
/* Setup tx rings */
- priv->tx = kvcalloc(priv->tx_cfg.num_queues, sizeof(*priv->tx),
+ priv->tx = kvcalloc(priv->tx_cfg.max_queues, sizeof(*priv->tx),
GFP_KERNEL);
if (!priv->tx)
return -ENOMEM;
if (gve_is_gqi(priv))
- err = gve_tx_alloc_rings(priv);
+ err = gve_tx_alloc_rings(priv, 0, gve_num_tx_queues(priv));
else
err = gve_tx_alloc_rings_dqo(priv);
if (err)
goto free_tx;
/* Setup rx rings */
- priv->rx = kvcalloc(priv->rx_cfg.num_queues, sizeof(*priv->rx),
+ priv->rx = kvcalloc(priv->rx_cfg.max_queues, sizeof(*priv->rx),
GFP_KERNEL);
if (!priv->rx) {
err = -ENOMEM;
@@ -744,18 +880,39 @@ free_rx:
kvfree(priv->rx);
priv->rx = NULL;
free_tx_queue:
- gve_tx_free_rings(priv);
+ gve_tx_free_rings(priv, 0, gve_num_tx_queues(priv));
free_tx:
kvfree(priv->tx);
priv->tx = NULL;
return err;
}
+static int gve_destroy_xdp_rings(struct gve_priv *priv)
+{
+ int start_id;
+ int err;
+
+ start_id = gve_xdp_tx_start_queue_id(priv);
+ err = gve_adminq_destroy_tx_queues(priv,
+ start_id,
+ priv->num_xdp_queues);
+ if (err) {
+ netif_err(priv, drv, priv->dev,
+ "failed to destroy XDP queues\n");
+ /* This failure will trigger a reset - no need to clean up */
+ return err;
+ }
+ netif_dbg(priv, drv, priv->dev, "destroyed XDP queues\n");
+
+ return 0;
+}
+
static int gve_destroy_rings(struct gve_priv *priv)
{
+ int num_tx_queues = gve_num_tx_queues(priv);
int err;
- err = gve_adminq_destroy_tx_queues(priv, priv->tx_cfg.num_queues);
+ err = gve_adminq_destroy_tx_queues(priv, 0, num_tx_queues);
if (err) {
netif_err(priv, drv, priv->dev,
"failed to destroy tx queues\n");
@@ -782,17 +939,33 @@ static void gve_rx_free_rings(struct gve_priv *priv)
gve_rx_free_rings_dqo(priv);
}
+static void gve_free_xdp_rings(struct gve_priv *priv)
+{
+ int ntfy_idx, start_id;
+ int i;
+
+ start_id = gve_xdp_tx_start_queue_id(priv);
+ if (priv->tx) {
+ for (i = start_id; i < start_id + priv->num_xdp_queues; i++) {
+ ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
+ gve_remove_napi(priv, ntfy_idx);
+ }
+ gve_tx_free_rings(priv, start_id, priv->num_xdp_queues);
+ }
+}
+
static void gve_free_rings(struct gve_priv *priv)
{
+ int num_tx_queues = gve_num_tx_queues(priv);
int ntfy_idx;
int i;
if (priv->tx) {
- for (i = 0; i < priv->tx_cfg.num_queues; i++) {
+ for (i = 0; i < num_tx_queues; i++) {
ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
gve_remove_napi(priv, ntfy_idx);
}
- gve_tx_free_rings(priv);
+ gve_tx_free_rings(priv, 0, num_tx_queues);
kvfree(priv->tx);
priv->tx = NULL;
}
@@ -889,40 +1062,68 @@ static void gve_free_queue_page_list(struct gve_priv *priv, u32 id)
qpl->page_buses[i], gve_qpl_dma_dir(priv, id));
kvfree(qpl->page_buses);
+ qpl->page_buses = NULL;
free_pages:
kvfree(qpl->pages);
+ qpl->pages = NULL;
priv->num_registered_pages -= qpl->num_entries;
}
+static int gve_alloc_xdp_qpls(struct gve_priv *priv)
+{
+ int start_id;
+ int i, j;
+ int err;
+
+ start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv));
+ for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) {
+ err = gve_alloc_queue_page_list(priv, i,
+ priv->tx_pages_per_qpl);
+ if (err)
+ goto free_qpls;
+ }
+
+ return 0;
+
+free_qpls:
+ for (j = start_id; j <= i; j++)
+ gve_free_queue_page_list(priv, j);
+ return err;
+}
+
static int gve_alloc_qpls(struct gve_priv *priv)
{
- int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
+ int max_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues;
+ int start_id;
int i, j;
int err;
- if (num_qpls == 0)
+ if (priv->queue_format != GVE_GQI_QPL_FORMAT)
return 0;
- priv->qpls = kvcalloc(num_qpls, sizeof(*priv->qpls), GFP_KERNEL);
+ priv->qpls = kvcalloc(max_queues, sizeof(*priv->qpls), GFP_KERNEL);
if (!priv->qpls)
return -ENOMEM;
- for (i = 0; i < gve_num_tx_qpls(priv); i++) {
+ start_id = gve_tx_start_qpl_id(priv);
+ for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) {
err = gve_alloc_queue_page_list(priv, i,
priv->tx_pages_per_qpl);
if (err)
goto free_qpls;
}
- for (; i < num_qpls; i++) {
+
+ start_id = gve_rx_start_qpl_id(priv);
+ for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) {
err = gve_alloc_queue_page_list(priv, i,
priv->rx_data_slot_cnt);
if (err)
goto free_qpls;
}
- priv->qpl_cfg.qpl_map_size = BITS_TO_LONGS(num_qpls) *
+ priv->qpl_cfg.qpl_map_size = BITS_TO_LONGS(max_queues) *
sizeof(unsigned long) * BITS_PER_BYTE;
- priv->qpl_cfg.qpl_id_map = kvcalloc(BITS_TO_LONGS(num_qpls),
+ priv->qpl_cfg.qpl_id_map = kvcalloc(BITS_TO_LONGS(max_queues),
sizeof(unsigned long), GFP_KERNEL);
if (!priv->qpl_cfg.qpl_id_map) {
err = -ENOMEM;
@@ -935,23 +1136,36 @@ free_qpls:
for (j = 0; j <= i; j++)
gve_free_queue_page_list(priv, j);
kvfree(priv->qpls);
+ priv->qpls = NULL;
return err;
}
+static void gve_free_xdp_qpls(struct gve_priv *priv)
+{
+ int start_id;
+ int i;
+
+ start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv));
+ for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++)
+ gve_free_queue_page_list(priv, i);
+}
+
static void gve_free_qpls(struct gve_priv *priv)
{
- int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
+ int max_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues;
int i;
- if (num_qpls == 0)
+ if (!priv->qpls)
return;
kvfree(priv->qpl_cfg.qpl_id_map);
+ priv->qpl_cfg.qpl_id_map = NULL;
- for (i = 0; i < num_qpls; i++)
+ for (i = 0; i < max_queues; i++)
gve_free_queue_page_list(priv, i);
kvfree(priv->qpls);
+ priv->qpls = NULL;
}
/* Use this to schedule a reset when the device is capable of continuing
@@ -969,11 +1183,109 @@ static int gve_reset_recovery(struct gve_priv *priv, bool was_up);
static void gve_turndown(struct gve_priv *priv);
static void gve_turnup(struct gve_priv *priv);
+static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev)
+{
+ struct napi_struct *napi;
+ struct gve_rx_ring *rx;
+ int err = 0;
+ int i, j;
+ u32 tx_qid;
+
+ if (!priv->num_xdp_queues)
+ return 0;
+
+ for (i = 0; i < priv->rx_cfg.num_queues; i++) {
+ rx = &priv->rx[i];
+ napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
+
+ err = xdp_rxq_info_reg(&rx->xdp_rxq, dev, i,
+ napi->napi_id);
+ if (err)
+ goto err;
+ err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq,
+ MEM_TYPE_PAGE_SHARED, NULL);
+ if (err)
+ goto err;
+ rx->xsk_pool = xsk_get_pool_from_qid(dev, i);
+ if (rx->xsk_pool) {
+ err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, i,
+ napi->napi_id);
+ if (err)
+ goto err;
+ err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq,
+ MEM_TYPE_XSK_BUFF_POOL, NULL);
+ if (err)
+ goto err;
+ xsk_pool_set_rxq_info(rx->xsk_pool,
+ &rx->xsk_rxq);
+ }
+ }
+
+ for (i = 0; i < priv->num_xdp_queues; i++) {
+ tx_qid = gve_xdp_tx_queue_id(priv, i);
+ priv->tx[tx_qid].xsk_pool = xsk_get_pool_from_qid(dev, i);
+ }
+ return 0;
+
+err:
+ for (j = i; j >= 0; j--) {
+ rx = &priv->rx[j];
+ if (xdp_rxq_info_is_reg(&rx->xdp_rxq))
+ xdp_rxq_info_unreg(&rx->xdp_rxq);
+ if (xdp_rxq_info_is_reg(&rx->xsk_rxq))
+ xdp_rxq_info_unreg(&rx->xsk_rxq);
+ }
+ return err;
+}
+
+static void gve_unreg_xdp_info(struct gve_priv *priv)
+{
+ int i, tx_qid;
+
+ if (!priv->num_xdp_queues)
+ return;
+
+ for (i = 0; i < priv->rx_cfg.num_queues; i++) {
+ struct gve_rx_ring *rx = &priv->rx[i];
+
+ xdp_rxq_info_unreg(&rx->xdp_rxq);
+ if (rx->xsk_pool) {
+ xdp_rxq_info_unreg(&rx->xsk_rxq);
+ rx->xsk_pool = NULL;
+ }
+ }
+
+ for (i = 0; i < priv->num_xdp_queues; i++) {
+ tx_qid = gve_xdp_tx_queue_id(priv, i);
+ priv->tx[tx_qid].xsk_pool = NULL;
+ }
+}
+
+static void gve_drain_page_cache(struct gve_priv *priv)
+{
+ struct page_frag_cache *nc;
+ int i;
+
+ for (i = 0; i < priv->rx_cfg.num_queues; i++) {
+ nc = &priv->rx[i].page_cache;
+ if (nc->va) {
+ __page_frag_cache_drain(virt_to_page(nc->va),
+ nc->pagecnt_bias);
+ nc->va = NULL;
+ }
+ }
+}
+
static int gve_open(struct net_device *dev)
{
struct gve_priv *priv = netdev_priv(dev);
int err;
+ if (priv->xdp_prog)
+ priv->num_xdp_queues = priv->rx_cfg.num_queues;
+ else
+ priv->num_xdp_queues = 0;
+
err = gve_alloc_qpls(priv);
if (err)
return err;
@@ -989,6 +1301,10 @@ static int gve_open(struct net_device *dev)
if (err)
goto free_rings;
+ err = gve_reg_xdp_info(priv, dev);
+ if (err)
+ goto free_rings;
+
err = gve_register_qpls(priv);
if (err)
goto reset;
@@ -1043,6 +1359,7 @@ static int gve_close(struct net_device *dev)
netif_carrier_off(dev);
if (gve_get_device_rings_ok(priv)) {
gve_turndown(priv);
+ gve_drain_page_cache(priv);
err = gve_destroy_rings(priv);
if (err)
goto err;
@@ -1053,6 +1370,7 @@ static int gve_close(struct net_device *dev)
}
del_timer_sync(&priv->stats_report_timer);
+ gve_unreg_xdp_info(priv);
gve_free_rings(priv);
gve_free_qpls(priv);
priv->interface_down_cnt++;
@@ -1069,6 +1387,306 @@ err:
return gve_reset_recovery(priv, false);
}
+static int gve_remove_xdp_queues(struct gve_priv *priv)
+{
+ int err;
+
+ err = gve_destroy_xdp_rings(priv);
+ if (err)
+ return err;
+
+ err = gve_unregister_xdp_qpls(priv);
+ if (err)
+ return err;
+
+ gve_unreg_xdp_info(priv);
+ gve_free_xdp_rings(priv);
+ gve_free_xdp_qpls(priv);
+ priv->num_xdp_queues = 0;
+ return 0;
+}
+
+static int gve_add_xdp_queues(struct gve_priv *priv)
+{
+ int err;
+
+ priv->num_xdp_queues = priv->tx_cfg.num_queues;
+
+ err = gve_alloc_xdp_qpls(priv);
+ if (err)
+ goto err;
+
+ err = gve_alloc_xdp_rings(priv);
+ if (err)
+ goto free_xdp_qpls;
+
+ err = gve_reg_xdp_info(priv, priv->dev);
+ if (err)
+ goto free_xdp_rings;
+
+ err = gve_register_xdp_qpls(priv);
+ if (err)
+ goto free_xdp_rings;
+
+ err = gve_create_xdp_rings(priv);
+ if (err)
+ goto free_xdp_rings;
+
+ return 0;
+
+free_xdp_rings:
+ gve_free_xdp_rings(priv);
+free_xdp_qpls:
+ gve_free_xdp_qpls(priv);
+err:
+ priv->num_xdp_queues = 0;
+ return err;
+}
+
+static void gve_handle_link_status(struct gve_priv *priv, bool link_status)
+{
+ if (!gve_get_napi_enabled(priv))
+ return;
+
+ if (link_status == netif_carrier_ok(priv->dev))
+ return;
+
+ if (link_status) {
+ netdev_info(priv->dev, "Device link is up.\n");
+ netif_carrier_on(priv->dev);
+ } else {
+ netdev_info(priv->dev, "Device link is down.\n");
+ netif_carrier_off(priv->dev);
+ }
+}
+
+static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog,
+ struct netlink_ext_ack *extack)
+{
+ struct bpf_prog *old_prog;
+ int err = 0;
+ u32 status;
+
+ old_prog = READ_ONCE(priv->xdp_prog);
+ if (!netif_carrier_ok(priv->dev)) {
+ WRITE_ONCE(priv->xdp_prog, prog);
+ if (old_prog)
+ bpf_prog_put(old_prog);
+ return 0;
+ }
+
+ gve_turndown(priv);
+ if (!old_prog && prog) {
+ // Allocate XDP TX queues if an XDP program is
+ // being installed
+ err = gve_add_xdp_queues(priv);
+ if (err)
+ goto out;
+ } else if (old_prog && !prog) {
+ // Remove XDP TX queues if an XDP program is
+ // being uninstalled
+ err = gve_remove_xdp_queues(priv);
+ if (err)
+ goto out;
+ }
+ WRITE_ONCE(priv->xdp_prog, prog);
+ if (old_prog)
+ bpf_prog_put(old_prog);
+
+out:
+ gve_turnup(priv);
+ status = ioread32be(&priv->reg_bar0->device_status);
+ gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
+ return err;
+}
+
+static int gve_xsk_pool_enable(struct net_device *dev,
+ struct xsk_buff_pool *pool,
+ u16 qid)
+{
+ struct gve_priv *priv = netdev_priv(dev);
+ struct napi_struct *napi;
+ struct gve_rx_ring *rx;
+ int tx_qid;
+ int err;
+
+ if (qid >= priv->rx_cfg.num_queues) {
+ dev_err(&priv->pdev->dev, "xsk pool invalid qid %d", qid);
+ return -EINVAL;
+ }
+ if (xsk_pool_get_rx_frame_size(pool) <
+ priv->dev->max_mtu + sizeof(struct ethhdr)) {
+ dev_err(&priv->pdev->dev, "xsk pool frame_len too small");
+ return -EINVAL;
+ }
+
+ err = xsk_pool_dma_map(pool, &priv->pdev->dev,
+ DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
+ if (err)
+ return err;
+
+ /* If XDP prog is not installed, return */
+ if (!priv->xdp_prog)
+ return 0;
+
+ rx = &priv->rx[qid];
+ napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
+ err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, qid, napi->napi_id);
+ if (err)
+ goto err;
+
+ err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq,
+ MEM_TYPE_XSK_BUFF_POOL, NULL);
+ if (err)
+ goto err;
+
+ xsk_pool_set_rxq_info(pool, &rx->xsk_rxq);
+ rx->xsk_pool = pool;
+
+ tx_qid = gve_xdp_tx_queue_id(priv, qid);
+ priv->tx[tx_qid].xsk_pool = pool;
+
+ return 0;
+err:
+ if (xdp_rxq_info_is_reg(&rx->xsk_rxq))
+ xdp_rxq_info_unreg(&rx->xsk_rxq);
+
+ xsk_pool_dma_unmap(pool,
+ DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
+ return err;
+}
+
+static int gve_xsk_pool_disable(struct net_device *dev,
+ u16 qid)
+{
+ struct gve_priv *priv = netdev_priv(dev);
+ struct napi_struct *napi_rx;
+ struct napi_struct *napi_tx;
+ struct xsk_buff_pool *pool;
+ int tx_qid;
+
+ pool = xsk_get_pool_from_qid(dev, qid);
+ if (!pool)
+ return -EINVAL;
+ if (qid >= priv->rx_cfg.num_queues)
+ return -EINVAL;
+
+ /* If XDP prog is not installed, unmap DMA and return */
+ if (!priv->xdp_prog)
+ goto done;
+
+ tx_qid = gve_xdp_tx_queue_id(priv, qid);
+ if (!netif_running(dev)) {
+ priv->rx[qid].xsk_pool = NULL;
+ xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq);
+ priv->tx[tx_qid].xsk_pool = NULL;
+ goto done;
+ }
+
+ napi_rx = &priv->ntfy_blocks[priv->rx[qid].ntfy_id].napi;
+ napi_disable(napi_rx); /* make sure current rx poll is done */
+
+ napi_tx = &priv->ntfy_blocks[priv->tx[tx_qid].ntfy_id].napi;
+ napi_disable(napi_tx); /* make sure current tx poll is done */
+
+ priv->rx[qid].xsk_pool = NULL;
+ xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq);
+ priv->tx[tx_qid].xsk_pool = NULL;
+ smp_mb(); /* Make sure it is visible to the workers on datapath */
+
+ napi_enable(napi_rx);
+ if (gve_rx_work_pending(&priv->rx[qid]))
+ napi_schedule(napi_rx);
+
+ napi_enable(napi_tx);
+ if (gve_tx_clean_pending(priv, &priv->tx[tx_qid]))
+ napi_schedule(napi_tx);
+
+done:
+ xsk_pool_dma_unmap(pool,
+ DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
+ return 0;
+}
+
+static int gve_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags)
+{
+ struct gve_priv *priv = netdev_priv(dev);
+ int tx_queue_id = gve_xdp_tx_queue_id(priv, queue_id);
+
+ if (queue_id >= priv->rx_cfg.num_queues || !priv->xdp_prog)
+ return -EINVAL;
+
+ if (flags & XDP_WAKEUP_TX) {
+ struct gve_tx_ring *tx = &priv->tx[tx_queue_id];
+ struct napi_struct *napi =
+ &priv->ntfy_blocks[tx->ntfy_id].napi;
+
+ if (!napi_if_scheduled_mark_missed(napi)) {
+ /* Call local_bh_enable to trigger SoftIRQ processing */
+ local_bh_disable();
+ napi_schedule(napi);
+ local_bh_enable();
+ }
+
+ tx->xdp_xsk_wakeup++;
+ }
+
+ return 0;
+}
+
+static int verify_xdp_configuration(struct net_device *dev)
+{
+ struct gve_priv *priv = netdev_priv(dev);
+
+ if (dev->features & NETIF_F_LRO) {
+ netdev_warn(dev, "XDP is not supported when LRO is on.\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (priv->queue_format != GVE_GQI_QPL_FORMAT) {
+ netdev_warn(dev, "XDP is not supported in mode %d.\n",
+ priv->queue_format);
+ return -EOPNOTSUPP;
+ }
+
+ if (dev->mtu > (PAGE_SIZE / 2) - sizeof(struct ethhdr) - GVE_RX_PAD) {
+ netdev_warn(dev, "XDP is not supported for mtu %d.\n",
+ dev->mtu);
+ return -EOPNOTSUPP;
+ }
+
+ if (priv->rx_cfg.num_queues != priv->tx_cfg.num_queues ||
+ (2 * priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)) {
+ netdev_warn(dev, "XDP load failed: The number of configured RX queues %d should be equal to the number of configured TX queues %d and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues %d",
+ priv->rx_cfg.num_queues,
+ priv->tx_cfg.num_queues,
+ priv->tx_cfg.max_queues);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int gve_xdp(struct net_device *dev, struct netdev_bpf *xdp)
+{
+ struct gve_priv *priv = netdev_priv(dev);
+ int err;
+
+ err = verify_xdp_configuration(dev);
+ if (err)
+ return err;
+ switch (xdp->command) {
+ case XDP_SETUP_PROG:
+ return gve_set_xdp(priv, xdp->prog, xdp->extack);
+ case XDP_SETUP_XSK_POOL:
+ if (xdp->xsk.pool)
+ return gve_xsk_pool_enable(dev, xdp->xsk.pool, xdp->xsk.queue_id);
+ else
+ return gve_xsk_pool_disable(dev, xdp->xsk.queue_id);
+ default:
+ return -EINVAL;
+ }
+}
+
int gve_adjust_queues(struct gve_priv *priv,
struct gve_queue_config new_rx_config,
struct gve_queue_config new_tx_config)
@@ -1118,7 +1736,7 @@ static void gve_turndown(struct gve_priv *priv)
return;
/* Disable napi to prevent more work from coming in */
- for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) {
+ for (idx = 0; idx < gve_num_tx_queues(priv); idx++) {
int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
@@ -1146,7 +1764,7 @@ static void gve_turnup(struct gve_priv *priv)
netif_tx_start_all_queues(priv->dev);
/* Enable napi and unmask interrupts for all queues */
- for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) {
+ for (idx = 0; idx < gve_num_tx_queues(priv); idx++) {
int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
@@ -1263,6 +1881,9 @@ static const struct net_device_ops gve_netdev_ops = {
.ndo_get_stats64 = gve_get_stats,
.ndo_tx_timeout = gve_tx_timeout,
.ndo_set_features = gve_set_features,
+ .ndo_bpf = gve_xdp,
+ .ndo_xdp_xmit = gve_xdp_xmit,
+ .ndo_xsk_wakeup = gve_xsk_wakeup,
};
static void gve_handle_status(struct gve_priv *priv, u32 status)
@@ -1306,7 +1927,7 @@ void gve_handle_report_stats(struct gve_priv *priv)
be64_add_cpu(&priv->stats_report->written_count, 1);
/* tx stats */
if (priv->tx) {
- for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) {
+ for (idx = 0; idx < gve_num_tx_queues(priv); idx++) {
u32 last_completion = 0;
u32 tx_frames = 0;
@@ -1369,23 +1990,6 @@ void gve_handle_report_stats(struct gve_priv *priv)
}
}
-static void gve_handle_link_status(struct gve_priv *priv, bool link_status)
-{
- if (!gve_get_napi_enabled(priv))
- return;
-
- if (link_status == netif_carrier_ok(priv->dev))
- return;
-
- if (link_status) {
- netdev_info(priv->dev, "Device link is up.\n");
- netif_carrier_on(priv->dev);
- } else {
- netdev_info(priv->dev, "Device link is down.\n");
- netif_carrier_off(priv->dev);
- }
-}
-
/* Handle NIC status register changes, reset requests and report stats */
static void gve_service_task(struct work_struct *work)
{
@@ -1399,6 +2003,18 @@ static void gve_service_task(struct work_struct *work)
gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
}
+static void gve_set_netdev_xdp_features(struct gve_priv *priv)
+{
+ if (priv->queue_format == GVE_GQI_QPL_FORMAT) {
+ priv->dev->xdp_features = NETDEV_XDP_ACT_BASIC;
+ priv->dev->xdp_features |= NETDEV_XDP_ACT_REDIRECT;
+ priv->dev->xdp_features |= NETDEV_XDP_ACT_NDO_XMIT;
+ priv->dev->xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY;
+ } else {
+ priv->dev->xdp_features = 0;
+ }
+}
+
static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
{
int num_ntfy;
@@ -1477,6 +2093,7 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
}
setup_device:
+ gve_set_netdev_xdp_features(priv);
err = gve_setup_device_resources(priv);
if (!err)
return 0;
diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c
index 1f55137722b0..d1da7413dc4d 100644
--- a/drivers/net/ethernet/google/gve/gve_rx.c
+++ b/drivers/net/ethernet/google/gve/gve_rx.c
@@ -8,6 +8,9 @@
#include "gve_adminq.h"
#include "gve_utils.h"
#include <linux/etherdevice.h>
+#include <linux/filter.h>
+#include <net/xdp.h>
+#include <net/xdp_sock_drv.h>
static void gve_rx_free_buffer(struct device *dev,
struct gve_rx_slot_page_info *page_info,
@@ -124,7 +127,7 @@ static int gve_prefill_rx_pages(struct gve_rx_ring *rx)
return -ENOMEM;
if (!rx->data.raw_addressing) {
- rx->data.qpl = gve_assign_rx_qpl(priv);
+ rx->data.qpl = gve_assign_rx_qpl(priv, rx->q_num);
if (!rx->data.qpl) {
kvfree(rx->data.page_info);
rx->data.page_info = NULL;
@@ -556,7 +559,7 @@ static struct sk_buff *gve_rx_skb(struct gve_priv *priv, struct gve_rx_ring *rx,
if (len <= priv->rx_copybreak && is_only_frag) {
/* Just copy small packets */
- skb = gve_rx_copy(netdev, napi, page_info, len, GVE_RX_PAD);
+ skb = gve_rx_copy(netdev, napi, page_info, len);
if (skb) {
u64_stats_update_begin(&rx->statss);
rx->rx_copied_pkt++;
@@ -591,6 +594,107 @@ static struct sk_buff *gve_rx_skb(struct gve_priv *priv, struct gve_rx_ring *rx,
return skb;
}
+static int gve_xsk_pool_redirect(struct net_device *dev,
+ struct gve_rx_ring *rx,
+ void *data, int len,
+ struct bpf_prog *xdp_prog)
+{
+ struct xdp_buff *xdp;
+ int err;
+
+ if (rx->xsk_pool->frame_len < len)
+ return -E2BIG;
+ xdp = xsk_buff_alloc(rx->xsk_pool);
+ if (!xdp) {
+ u64_stats_update_begin(&rx->statss);
+ rx->xdp_alloc_fails++;
+ u64_stats_update_end(&rx->statss);
+ return -ENOMEM;
+ }
+ xdp->data_end = xdp->data + len;
+ memcpy(xdp->data, data, len);
+ err = xdp_do_redirect(dev, xdp, xdp_prog);
+ if (err)
+ xsk_buff_free(xdp);
+ return err;
+}
+
+static int gve_xdp_redirect(struct net_device *dev, struct gve_rx_ring *rx,
+ struct xdp_buff *orig, struct bpf_prog *xdp_prog)
+{
+ int total_len, len = orig->data_end - orig->data;
+ int headroom = XDP_PACKET_HEADROOM;
+ struct xdp_buff new;
+ void *frame;
+ int err;
+
+ if (rx->xsk_pool)
+ return gve_xsk_pool_redirect(dev, rx, orig->data,
+ len, xdp_prog);
+
+ total_len = headroom + SKB_DATA_ALIGN(len) +
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+ frame = page_frag_alloc(&rx->page_cache, total_len, GFP_ATOMIC);
+ if (!frame) {
+ u64_stats_update_begin(&rx->statss);
+ rx->xdp_alloc_fails++;
+ u64_stats_update_end(&rx->statss);
+ return -ENOMEM;
+ }
+ xdp_init_buff(&new, total_len, &rx->xdp_rxq);
+ xdp_prepare_buff(&new, frame, headroom, len, false);
+ memcpy(new.data, orig->data, len);
+
+ err = xdp_do_redirect(dev, &new, xdp_prog);
+ if (err)
+ page_frag_free(frame);
+
+ return err;
+}
+
+static void gve_xdp_done(struct gve_priv *priv, struct gve_rx_ring *rx,
+ struct xdp_buff *xdp, struct bpf_prog *xprog,
+ int xdp_act)
+{
+ struct gve_tx_ring *tx;
+ int tx_qid;
+ int err;
+
+ switch (xdp_act) {
+ case XDP_ABORTED:
+ case XDP_DROP:
+ default:
+ break;
+ case XDP_TX:
+ tx_qid = gve_xdp_tx_queue_id(priv, rx->q_num);
+ tx = &priv->tx[tx_qid];
+ spin_lock(&tx->xdp_lock);
+ err = gve_xdp_xmit_one(priv, tx, xdp->data,
+ xdp->data_end - xdp->data, NULL);
+ spin_unlock(&tx->xdp_lock);
+
+ if (unlikely(err)) {
+ u64_stats_update_begin(&rx->statss);
+ rx->xdp_tx_errors++;
+ u64_stats_update_end(&rx->statss);
+ }
+ break;
+ case XDP_REDIRECT:
+ err = gve_xdp_redirect(priv->dev, rx, xdp, xprog);
+
+ if (unlikely(err)) {
+ u64_stats_update_begin(&rx->statss);
+ rx->xdp_redirect_errors++;
+ u64_stats_update_end(&rx->statss);
+ }
+ break;
+ }
+ u64_stats_update_begin(&rx->statss);
+ if ((u32)xdp_act < GVE_XDP_ACTIONS)
+ rx->xdp_actions[xdp_act]++;
+ u64_stats_update_end(&rx->statss);
+}
+
#define GVE_PKTCONT_BIT_IS_SET(x) (GVE_RXF_PKT_CONT & (x))
static void gve_rx(struct gve_rx_ring *rx, netdev_features_t feat,
struct gve_rx_desc *desc, u32 idx,
@@ -603,9 +707,12 @@ static void gve_rx(struct gve_rx_ring *rx, netdev_features_t feat,
union gve_rx_data_slot *data_slot;
struct gve_priv *priv = rx->gve;
struct sk_buff *skb = NULL;
+ struct bpf_prog *xprog;
+ struct xdp_buff xdp;
dma_addr_t page_bus;
void *va;
+ u16 len = frag_size;
struct napi_struct *napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
bool is_first_frag = ctx->frag_cnt == 0;
@@ -645,9 +752,35 @@ static void gve_rx(struct gve_rx_ring *rx, netdev_features_t feat,
dma_sync_single_for_cpu(&priv->pdev->dev, page_bus,
PAGE_SIZE, DMA_FROM_DEVICE);
page_info->pad = is_first_frag ? GVE_RX_PAD : 0;
+ len -= page_info->pad;
frag_size -= page_info->pad;
- skb = gve_rx_skb(priv, rx, page_info, napi, frag_size,
+ xprog = READ_ONCE(priv->xdp_prog);
+ if (xprog && is_only_frag) {
+ void *old_data;
+ int xdp_act;
+
+ xdp_init_buff(&xdp, rx->packet_buffer_size, &rx->xdp_rxq);
+ xdp_prepare_buff(&xdp, page_info->page_address +
+ page_info->page_offset, GVE_RX_PAD,
+ len, false);
+ old_data = xdp.data;
+ xdp_act = bpf_prog_run_xdp(xprog, &xdp);
+ if (xdp_act != XDP_PASS) {
+ gve_xdp_done(priv, rx, &xdp, xprog, xdp_act);
+ ctx->total_size += frag_size;
+ goto finish_ok_pkt;
+ }
+
+ page_info->pad += xdp.data - old_data;
+ len = xdp.data_end - xdp.data;
+
+ u64_stats_update_begin(&rx->statss);
+ rx->xdp_actions[XDP_PASS]++;
+ u64_stats_update_end(&rx->statss);
+ }
+
+ skb = gve_rx_skb(priv, rx, page_info, napi, len,
data_slot, is_only_frag);
if (!skb) {
u64_stats_update_begin(&rx->statss);
@@ -773,6 +906,8 @@ static bool gve_rx_refill_buffers(struct gve_priv *priv, struct gve_rx_ring *rx)
static int gve_clean_rx_done(struct gve_rx_ring *rx, int budget,
netdev_features_t feat)
{
+ u64 xdp_redirects = rx->xdp_actions[XDP_REDIRECT];
+ u64 xdp_txs = rx->xdp_actions[XDP_TX];
struct gve_rx_ctx *ctx = &rx->ctx;
struct gve_priv *priv = rx->gve;
struct gve_rx_cnts cnts = {0};
@@ -820,6 +955,12 @@ static int gve_clean_rx_done(struct gve_rx_ring *rx, int budget,
u64_stats_update_end(&rx->statss);
}
+ if (xdp_txs != rx->xdp_actions[XDP_TX])
+ gve_xdp_tx_flush(priv, rx->q_num);
+
+ if (xdp_redirects != rx->xdp_actions[XDP_REDIRECT])
+ xdp_do_flush();
+
/* restock ring slots */
if (!rx->data.raw_addressing) {
/* In QPL mode buffs are refilled as the desc are processed */
diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c
index 630f42a3037b..e57b73eb70f6 100644
--- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c
+++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c
@@ -568,7 +568,7 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
if (eop && buf_len <= priv->rx_copybreak) {
rx->ctx.skb_head = gve_rx_copy(priv->dev, napi,
- &buf_state->page_info, buf_len, 0);
+ &buf_state->page_info, buf_len);
if (unlikely(!rx->ctx.skb_head))
goto error;
rx->ctx.skb_tail = rx->ctx.skb_head;
diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c
index 4888bf05fbed..e50510b8e784 100644
--- a/drivers/net/ethernet/google/gve/gve_tx.c
+++ b/drivers/net/ethernet/google/gve/gve_tx.c
@@ -11,6 +11,7 @@
#include <linux/tcp.h>
#include <linux/vmalloc.h>
#include <linux/skbuff.h>
+#include <net/xdp_sock_drv.h>
static inline void gve_tx_put_doorbell(struct gve_priv *priv,
struct gve_queue_resources *q_resources,
@@ -19,6 +20,14 @@ static inline void gve_tx_put_doorbell(struct gve_priv *priv,
iowrite32be(val, &priv->db_bar2[be32_to_cpu(q_resources->db_index)]);
}
+void gve_xdp_tx_flush(struct gve_priv *priv, u32 xdp_qid)
+{
+ u32 tx_qid = gve_xdp_tx_queue_id(priv, xdp_qid);
+ struct gve_tx_ring *tx = &priv->tx[tx_qid];
+
+ gve_tx_put_doorbell(priv, tx->q_resources, tx->req);
+}
+
/* gvnic can only transmit from a Registered Segment.
* We copy skb payloads into the registered segment before writing Tx
* descriptors and ringing the Tx doorbell.
@@ -132,6 +141,58 @@ static void gve_tx_free_fifo(struct gve_tx_fifo *fifo, size_t bytes)
atomic_add(bytes, &fifo->available);
}
+static size_t gve_tx_clear_buffer_state(struct gve_tx_buffer_state *info)
+{
+ size_t space_freed = 0;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(info->iov); i++) {
+ space_freed += info->iov[i].iov_len + info->iov[i].iov_padding;
+ info->iov[i].iov_len = 0;
+ info->iov[i].iov_padding = 0;
+ }
+ return space_freed;
+}
+
+static int gve_clean_xdp_done(struct gve_priv *priv, struct gve_tx_ring *tx,
+ u32 to_do)
+{
+ struct gve_tx_buffer_state *info;
+ u32 clean_end = tx->done + to_do;
+ u64 pkts = 0, bytes = 0;
+ size_t space_freed = 0;
+ u32 xsk_complete = 0;
+ u32 idx;
+
+ for (; tx->done < clean_end; tx->done++) {
+ idx = tx->done & tx->mask;
+ info = &tx->info[idx];
+
+ if (unlikely(!info->xdp.size))
+ continue;
+
+ bytes += info->xdp.size;
+ pkts++;
+ xsk_complete += info->xdp.is_xsk;
+
+ info->xdp.size = 0;
+ if (info->xdp_frame) {
+ xdp_return_frame(info->xdp_frame);
+ info->xdp_frame = NULL;
+ }
+ space_freed += gve_tx_clear_buffer_state(info);
+ }
+
+ gve_tx_free_fifo(&tx->tx_fifo, space_freed);
+ if (xsk_complete > 0 && tx->xsk_pool)
+ xsk_tx_completed(tx->xsk_pool, xsk_complete);
+ u64_stats_update_begin(&tx->statss);
+ tx->bytes_done += bytes;
+ tx->pkt_done += pkts;
+ u64_stats_update_end(&tx->statss);
+ return pkts;
+}
+
static int gve_clean_tx_done(struct gve_priv *priv, struct gve_tx_ring *tx,
u32 to_do, bool try_to_wake);
@@ -144,8 +205,12 @@ static void gve_tx_free_ring(struct gve_priv *priv, int idx)
gve_tx_remove_from_block(priv, idx);
slots = tx->mask + 1;
- gve_clean_tx_done(priv, tx, priv->tx_desc_cnt, false);
- netdev_tx_reset_queue(tx->netdev_txq);
+ if (tx->q_num < priv->tx_cfg.num_queues) {
+ gve_clean_tx_done(priv, tx, priv->tx_desc_cnt, false);
+ netdev_tx_reset_queue(tx->netdev_txq);
+ } else {
+ gve_clean_xdp_done(priv, tx, priv->tx_desc_cnt);
+ }
dma_free_coherent(hdev, sizeof(*tx->q_resources),
tx->q_resources, tx->q_resources_bus);
@@ -177,6 +242,7 @@ static int gve_tx_alloc_ring(struct gve_priv *priv, int idx)
/* Make sure everything is zeroed to start */
memset(tx, 0, sizeof(*tx));
spin_lock_init(&tx->clean_lock);
+ spin_lock_init(&tx->xdp_lock);
tx->q_num = idx;
tx->mask = slots - 1;
@@ -195,7 +261,7 @@ static int gve_tx_alloc_ring(struct gve_priv *priv, int idx)
tx->raw_addressing = priv->queue_format == GVE_GQI_RDA_FORMAT;
tx->dev = &priv->pdev->dev;
if (!tx->raw_addressing) {
- tx->tx_fifo.qpl = gve_assign_tx_qpl(priv);
+ tx->tx_fifo.qpl = gve_assign_tx_qpl(priv, idx);
if (!tx->tx_fifo.qpl)
goto abort_with_desc;
/* map Tx FIFO */
@@ -213,7 +279,8 @@ static int gve_tx_alloc_ring(struct gve_priv *priv, int idx)
netif_dbg(priv, drv, priv->dev, "tx[%d]->bus=%lx\n", idx,
(unsigned long)tx->bus);
- tx->netdev_txq = netdev_get_tx_queue(priv->dev, idx);
+ if (idx < priv->tx_cfg.num_queues)
+ tx->netdev_txq = netdev_get_tx_queue(priv->dev, idx);
gve_tx_add_to_block(priv, idx);
return 0;
@@ -233,12 +300,12 @@ abort_with_info:
return -ENOMEM;
}
-int gve_tx_alloc_rings(struct gve_priv *priv)
+int gve_tx_alloc_rings(struct gve_priv *priv, int start_id, int num_rings)
{
int err = 0;
int i;
- for (i = 0; i < priv->tx_cfg.num_queues; i++) {
+ for (i = start_id; i < start_id + num_rings; i++) {
err = gve_tx_alloc_ring(priv, i);
if (err) {
netif_err(priv, drv, priv->dev,
@@ -251,17 +318,17 @@ int gve_tx_alloc_rings(struct gve_priv *priv)
if (err) {
int j;
- for (j = 0; j < i; j++)
+ for (j = start_id; j < i; j++)
gve_tx_free_ring(priv, j);
}
return err;
}
-void gve_tx_free_rings_gqi(struct gve_priv *priv)
+void gve_tx_free_rings_gqi(struct gve_priv *priv, int start_id, int num_rings)
{
int i;
- for (i = 0; i < priv->tx_cfg.num_queues; i++)
+ for (i = start_id; i < start_id + num_rings; i++)
gve_tx_free_ring(priv, i);
}
@@ -374,18 +441,18 @@ static int gve_maybe_stop_tx(struct gve_priv *priv, struct gve_tx_ring *tx,
}
static void gve_tx_fill_pkt_desc(union gve_tx_desc *pkt_desc,
- struct sk_buff *skb, bool is_gso,
+ u16 csum_offset, u8 ip_summed, bool is_gso,
int l4_hdr_offset, u32 desc_cnt,
- u16 hlen, u64 addr)
+ u16 hlen, u64 addr, u16 pkt_len)
{
/* l4_hdr_offset and csum_offset are in units of 16-bit words */
if (is_gso) {
pkt_desc->pkt.type_flags = GVE_TXD_TSO | GVE_TXF_L4CSUM;
- pkt_desc->pkt.l4_csum_offset = skb->csum_offset >> 1;
+ pkt_desc->pkt.l4_csum_offset = csum_offset >> 1;
pkt_desc->pkt.l4_hdr_offset = l4_hdr_offset >> 1;
- } else if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
+ } else if (likely(ip_summed == CHECKSUM_PARTIAL)) {
pkt_desc->pkt.type_flags = GVE_TXD_STD | GVE_TXF_L4CSUM;
- pkt_desc->pkt.l4_csum_offset = skb->csum_offset >> 1;
+ pkt_desc->pkt.l4_csum_offset = csum_offset >> 1;
pkt_desc->pkt.l4_hdr_offset = l4_hdr_offset >> 1;
} else {
pkt_desc->pkt.type_flags = GVE_TXD_STD;
@@ -393,7 +460,7 @@ static void gve_tx_fill_pkt_desc(union gve_tx_desc *pkt_desc,
pkt_desc->pkt.l4_hdr_offset = 0;
}
pkt_desc->pkt.desc_cnt = desc_cnt;
- pkt_desc->pkt.len = cpu_to_be16(skb->len);
+ pkt_desc->pkt.len = cpu_to_be16(pkt_len);
pkt_desc->pkt.seg_len = cpu_to_be16(hlen);
pkt_desc->pkt.seg_addr = cpu_to_be64(addr);
}
@@ -412,15 +479,16 @@ static void gve_tx_fill_mtd_desc(union gve_tx_desc *mtd_desc,
}
static void gve_tx_fill_seg_desc(union gve_tx_desc *seg_desc,
- struct sk_buff *skb, bool is_gso,
+ u16 l3_offset, u16 gso_size,
+ bool is_gso_v6, bool is_gso,
u16 len, u64 addr)
{
seg_desc->seg.type_flags = GVE_TXD_SEG;
if (is_gso) {
- if (skb_is_gso_v6(skb))
+ if (is_gso_v6)
seg_desc->seg.type_flags |= GVE_TXSF_IPV6;
- seg_desc->seg.l3_offset = skb_network_offset(skb) >> 1;
- seg_desc->seg.mss = cpu_to_be16(skb_shinfo(skb)->gso_size);
+ seg_desc->seg.l3_offset = l3_offset >> 1;
+ seg_desc->seg.mss = cpu_to_be16(gso_size);
}
seg_desc->seg.seg_len = cpu_to_be16(len);
seg_desc->seg.seg_addr = cpu_to_be64(addr);
@@ -473,9 +541,10 @@ static int gve_tx_add_skb_copy(struct gve_priv *priv, struct gve_tx_ring *tx, st
payload_nfrags = gve_tx_alloc_fifo(&tx->tx_fifo, skb->len - hlen,
&info->iov[payload_iov]);
- gve_tx_fill_pkt_desc(pkt_desc, skb, is_gso, l4_hdr_offset,
+ gve_tx_fill_pkt_desc(pkt_desc, skb->csum_offset, skb->ip_summed,
+ is_gso, l4_hdr_offset,
1 + mtd_desc_nr + payload_nfrags, hlen,
- info->iov[hdr_nfrags - 1].iov_offset);
+ info->iov[hdr_nfrags - 1].iov_offset, skb->len);
skb_copy_bits(skb, 0,
tx->tx_fifo.base + info->iov[hdr_nfrags - 1].iov_offset,
@@ -494,7 +563,9 @@ static int gve_tx_add_skb_copy(struct gve_priv *priv, struct gve_tx_ring *tx, st
next_idx = (tx->req + 1 + mtd_desc_nr + i - payload_iov) & tx->mask;
seg_desc = &tx->desc[next_idx];
- gve_tx_fill_seg_desc(seg_desc, skb, is_gso,
+ gve_tx_fill_seg_desc(seg_desc, skb_network_offset(skb),
+ skb_shinfo(skb)->gso_size,
+ skb_is_gso_v6(skb), is_gso,
info->iov[i].iov_len,
info->iov[i].iov_offset);
@@ -552,8 +623,9 @@ static int gve_tx_add_skb_no_copy(struct gve_priv *priv, struct gve_tx_ring *tx,
if (mtd_desc_nr)
num_descriptors++;
- gve_tx_fill_pkt_desc(pkt_desc, skb, is_gso, l4_hdr_offset,
- num_descriptors, hlen, addr);
+ gve_tx_fill_pkt_desc(pkt_desc, skb->csum_offset, skb->ip_summed,
+ is_gso, l4_hdr_offset,
+ num_descriptors, hlen, addr, skb->len);
if (mtd_desc_nr) {
idx = (idx + 1) & tx->mask;
@@ -569,7 +641,9 @@ static int gve_tx_add_skb_no_copy(struct gve_priv *priv, struct gve_tx_ring *tx,
addr += hlen;
idx = (idx + 1) & tx->mask;
seg_desc = &tx->desc[idx];
- gve_tx_fill_seg_desc(seg_desc, skb, is_gso, len, addr);
+ gve_tx_fill_seg_desc(seg_desc, skb_network_offset(skb),
+ skb_shinfo(skb)->gso_size,
+ skb_is_gso_v6(skb), is_gso, len, addr);
}
for (i = 0; i < shinfo->nr_frags; i++) {
@@ -587,7 +661,9 @@ static int gve_tx_add_skb_no_copy(struct gve_priv *priv, struct gve_tx_ring *tx,
dma_unmap_len_set(&tx->info[idx], len, len);
dma_unmap_addr_set(&tx->info[idx], dma, addr);
- gve_tx_fill_seg_desc(seg_desc, skb, is_gso, len, addr);
+ gve_tx_fill_seg_desc(seg_desc, skb_network_offset(skb),
+ skb_shinfo(skb)->gso_size,
+ skb_is_gso_v6(skb), is_gso, len, addr);
}
return num_descriptors;
@@ -648,6 +724,103 @@ netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev)
return NETDEV_TX_OK;
}
+static int gve_tx_fill_xdp(struct gve_priv *priv, struct gve_tx_ring *tx,
+ void *data, int len, void *frame_p, bool is_xsk)
+{
+ int pad, nfrags, ndescs, iovi, offset;
+ struct gve_tx_buffer_state *info;
+ u32 reqi = tx->req;
+
+ pad = gve_tx_fifo_pad_alloc_one_frag(&tx->tx_fifo, len);
+ if (pad >= GVE_TX_MAX_HEADER_SIZE)
+ pad = 0;
+ info = &tx->info[reqi & tx->mask];
+ info->xdp_frame = frame_p;
+ info->xdp.size = len;
+ info->xdp.is_xsk = is_xsk;
+
+ nfrags = gve_tx_alloc_fifo(&tx->tx_fifo, pad + len,
+ &info->iov[0]);
+ iovi = pad > 0;
+ ndescs = nfrags - iovi;
+ offset = 0;
+
+ while (iovi < nfrags) {
+ if (!offset)
+ gve_tx_fill_pkt_desc(&tx->desc[reqi & tx->mask], 0,
+ CHECKSUM_NONE, false, 0, ndescs,
+ info->iov[iovi].iov_len,
+ info->iov[iovi].iov_offset, len);
+ else
+ gve_tx_fill_seg_desc(&tx->desc[reqi & tx->mask],
+ 0, 0, false, false,
+ info->iov[iovi].iov_len,
+ info->iov[iovi].iov_offset);
+
+ memcpy(tx->tx_fifo.base + info->iov[iovi].iov_offset,
+ data + offset, info->iov[iovi].iov_len);
+ gve_dma_sync_for_device(&priv->pdev->dev,
+ tx->tx_fifo.qpl->page_buses,
+ info->iov[iovi].iov_offset,
+ info->iov[iovi].iov_len);
+ offset += info->iov[iovi].iov_len;
+ iovi++;
+ reqi++;
+ }
+
+ return ndescs;
+}
+
+int gve_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
+ u32 flags)
+{
+ struct gve_priv *priv = netdev_priv(dev);
+ struct gve_tx_ring *tx;
+ int i, err = 0, qid;
+
+ if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+ return -EINVAL;
+
+ qid = gve_xdp_tx_queue_id(priv,
+ smp_processor_id() % priv->num_xdp_queues);
+
+ tx = &priv->tx[qid];
+
+ spin_lock(&tx->xdp_lock);
+ for (i = 0; i < n; i++) {
+ err = gve_xdp_xmit_one(priv, tx, frames[i]->data,
+ frames[i]->len, frames[i]);
+ if (err)
+ break;
+ }
+
+ if (flags & XDP_XMIT_FLUSH)
+ gve_tx_put_doorbell(priv, tx->q_resources, tx->req);
+
+ spin_unlock(&tx->xdp_lock);
+
+ u64_stats_update_begin(&tx->statss);
+ tx->xdp_xmit += n;
+ tx->xdp_xmit_errors += n - i;
+ u64_stats_update_end(&tx->statss);
+
+ return i ? i : err;
+}
+
+int gve_xdp_xmit_one(struct gve_priv *priv, struct gve_tx_ring *tx,
+ void *data, int len, void *frame_p)
+{
+ int nsegs;
+
+ if (!gve_can_tx(tx, len + GVE_TX_MAX_HEADER_SIZE - 1))
+ return -EBUSY;
+
+ nsegs = gve_tx_fill_xdp(priv, tx, data, len, frame_p, false);
+ tx->req += nsegs;
+
+ return 0;
+}
+
#define GVE_TX_START_THRESH PAGE_SIZE
static int gve_clean_tx_done(struct gve_priv *priv, struct gve_tx_ring *tx,
@@ -657,8 +830,8 @@ static int gve_clean_tx_done(struct gve_priv *priv, struct gve_tx_ring *tx,
u64 pkts = 0, bytes = 0;
size_t space_freed = 0;
struct sk_buff *skb;
- int i, j;
u32 idx;
+ int j;
for (j = 0; j < to_do; j++) {
idx = tx->done & tx->mask;
@@ -680,12 +853,7 @@ static int gve_clean_tx_done(struct gve_priv *priv, struct gve_tx_ring *tx,
dev_consume_skb_any(skb);
if (tx->raw_addressing)
continue;
- /* FIFO free */
- for (i = 0; i < ARRAY_SIZE(info->iov); i++) {
- space_freed += info->iov[i].iov_len + info->iov[i].iov_padding;
- info->iov[i].iov_len = 0;
- info->iov[i].iov_padding = 0;
- }
+ space_freed += gve_tx_clear_buffer_state(info);
}
}
@@ -720,6 +888,70 @@ u32 gve_tx_load_event_counter(struct gve_priv *priv,
return be32_to_cpu(counter);
}
+static int gve_xsk_tx(struct gve_priv *priv, struct gve_tx_ring *tx,
+ int budget)
+{
+ struct xdp_desc desc;
+ int sent = 0, nsegs;
+ void *data;
+
+ spin_lock(&tx->xdp_lock);
+ while (sent < budget) {
+ if (!gve_can_tx(tx, GVE_TX_START_THRESH))
+ goto out;
+
+ if (!xsk_tx_peek_desc(tx->xsk_pool, &desc)) {
+ tx->xdp_xsk_done = tx->xdp_xsk_wakeup;
+ goto out;
+ }
+
+ data = xsk_buff_raw_get_data(tx->xsk_pool, desc.addr);
+ nsegs = gve_tx_fill_xdp(priv, tx, data, desc.len, NULL, true);
+ tx->req += nsegs;
+ sent++;
+ }
+out:
+ if (sent > 0) {
+ gve_tx_put_doorbell(priv, tx->q_resources, tx->req);
+ xsk_tx_release(tx->xsk_pool);
+ }
+ spin_unlock(&tx->xdp_lock);
+ return sent;
+}
+
+bool gve_xdp_poll(struct gve_notify_block *block, int budget)
+{
+ struct gve_priv *priv = block->priv;
+ struct gve_tx_ring *tx = block->tx;
+ u32 nic_done;
+ bool repoll;
+ u32 to_do;
+
+ /* If budget is 0, do all the work */
+ if (budget == 0)
+ budget = INT_MAX;
+
+ /* Find out how much work there is to be done */
+ nic_done = gve_tx_load_event_counter(priv, tx);
+ to_do = min_t(u32, (nic_done - tx->done), budget);
+ gve_clean_xdp_done(priv, tx, to_do);
+ repoll = nic_done != tx->done;
+
+ if (tx->xsk_pool) {
+ int sent = gve_xsk_tx(priv, tx, budget);
+
+ u64_stats_update_begin(&tx->statss);
+ tx->xdp_xsk_sent += sent;
+ u64_stats_update_end(&tx->statss);
+ repoll |= (sent == budget);
+ if (xsk_uses_need_wakeup(tx->xsk_pool))
+ xsk_set_tx_need_wakeup(tx->xsk_pool);
+ }
+
+ /* If we still have work we want to repoll */
+ return repoll;
+}
+
bool gve_tx_poll(struct gve_notify_block *block, int budget)
{
struct gve_priv *priv = block->priv;
diff --git a/drivers/net/ethernet/google/gve/gve_utils.c b/drivers/net/ethernet/google/gve/gve_utils.c
index 6ba46adaaee3..26e08d753270 100644
--- a/drivers/net/ethernet/google/gve/gve_utils.c
+++ b/drivers/net/ethernet/google/gve/gve_utils.c
@@ -49,10 +49,10 @@ void gve_rx_add_to_block(struct gve_priv *priv, int queue_idx)
}
struct sk_buff *gve_rx_copy(struct net_device *dev, struct napi_struct *napi,
- struct gve_rx_slot_page_info *page_info, u16 len,
- u16 padding)
+ struct gve_rx_slot_page_info *page_info, u16 len)
{
- void *va = page_info->page_address + padding + page_info->page_offset;
+ void *va = page_info->page_address + page_info->page_offset +
+ page_info->pad;
struct sk_buff *skb;
skb = napi_alloc_skb(napi, len);
diff --git a/drivers/net/ethernet/google/gve/gve_utils.h b/drivers/net/ethernet/google/gve/gve_utils.h
index 79595940b351..324fd98a6112 100644
--- a/drivers/net/ethernet/google/gve/gve_utils.h
+++ b/drivers/net/ethernet/google/gve/gve_utils.h
@@ -18,8 +18,7 @@ void gve_rx_remove_from_block(struct gve_priv *priv, int queue_idx);
void gve_rx_add_to_block(struct gve_priv *priv, int queue_idx);
struct sk_buff *gve_rx_copy(struct net_device *dev, struct napi_struct *napi,
- struct gve_rx_slot_page_info *page_info, u16 len,
- u16 pad);
+ struct gve_rx_slot_page_info *page_info, u16 len);
/* Decrement pagecnt_bias. Set it back to INT_MAX if it reached zero. */
void gve_dec_pagecnt_bias(struct gve_rx_slot_page_info *page_info);