summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/google
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/ethernet/google')
-rw-r--r--drivers/net/ethernet/google/Kconfig1
-rw-r--r--drivers/net/ethernet/google/gve/Makefile3
-rw-r--r--drivers/net/ethernet/google/gve/gve.h294
-rw-r--r--drivers/net/ethernet/google/gve/gve_adminq.c681
-rw-r--r--drivers/net/ethernet/google/gve/gve_adminq.h213
-rw-r--r--drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c316
-rw-r--r--drivers/net/ethernet/google/gve/gve_dqo.h6
-rw-r--r--drivers/net/ethernet/google/gve/gve_ethtool.c370
-rw-r--r--drivers/net/ethernet/google/gve/gve_flow_rule.c298
-rw-r--r--drivers/net/ethernet/google/gve/gve_main.c1222
-rw-r--r--drivers/net/ethernet/google/gve/gve_rx.c168
-rw-r--r--drivers/net/ethernet/google/gve/gve_rx_dqo.c528
-rw-r--r--drivers/net/ethernet/google/gve/gve_tx.c123
-rw-r--r--drivers/net/ethernet/google/gve/gve_tx_dqo.c109
-rw-r--r--drivers/net/ethernet/google/gve/gve_utils.c5
15 files changed, 3001 insertions, 1336 deletions
diff --git a/drivers/net/ethernet/google/Kconfig b/drivers/net/ethernet/google/Kconfig
index 8641a00f8e63..564862a57124 100644
--- a/drivers/net/ethernet/google/Kconfig
+++ b/drivers/net/ethernet/google/Kconfig
@@ -18,6 +18,7 @@ if NET_VENDOR_GOOGLE
config GVE
tristate "Google Virtual NIC (gVNIC) support"
depends on (PCI_MSI && (X86 || CPU_LITTLE_ENDIAN))
+ select PAGE_POOL
help
This driver supports Google Virtual NIC (gVNIC)"
diff --git a/drivers/net/ethernet/google/gve/Makefile b/drivers/net/ethernet/google/gve/Makefile
index b9a6be76531b..4520f1c07a63 100644
--- a/drivers/net/ethernet/google/gve/Makefile
+++ b/drivers/net/ethernet/google/gve/Makefile
@@ -1,4 +1,5 @@
# Makefile for the Google virtual Ethernet (gve) driver
obj-$(CONFIG_GVE) += gve.o
-gve-objs := gve_main.o gve_tx.o gve_tx_dqo.o gve_rx.o gve_rx_dqo.o gve_ethtool.o gve_adminq.o gve_utils.o
+gve-objs := gve_main.o gve_tx.o gve_tx_dqo.o gve_rx.o gve_rx_dqo.o gve_ethtool.o gve_adminq.o gve_utils.o gve_flow_rule.o \
+ gve_buffer_mgmt_dqo.o
diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h
index 4814c96d5fe7..2fab38c8ee78 100644
--- a/drivers/net/ethernet/google/gve/gve.h
+++ b/drivers/net/ethernet/google/gve/gve.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: (GPL-2.0 OR MIT)
* Google virtual Ethernet (gve) driver
*
- * Copyright (C) 2015-2021 Google, Inc.
+ * Copyright (C) 2015-2024 Google LLC
*/
#ifndef _GVE_H_
@@ -13,6 +13,7 @@
#include <linux/netdevice.h>
#include <linux/pci.h>
#include <linux/u64_stats_sync.h>
+#include <net/page_pool/helpers.h>
#include <net/xdp.h>
#include "gve_desc.h"
@@ -50,12 +51,28 @@
/* PTYPEs are always 10 bits. */
#define GVE_NUM_PTYPES 1024
+/* Default minimum ring size */
+#define GVE_DEFAULT_MIN_TX_RING_SIZE 256
+#define GVE_DEFAULT_MIN_RX_RING_SIZE 512
+
#define GVE_DEFAULT_RX_BUFFER_SIZE 2048
#define GVE_MAX_RX_BUFFER_SIZE 4096
+#define GVE_XDP_RX_BUFFER_SIZE_DQO 4096
+
#define GVE_DEFAULT_RX_BUFFER_OFFSET 2048
+#define GVE_PAGE_POOL_SIZE_MULTIPLIER 4
+
+#define GVE_FLOW_RULES_CACHE_SIZE \
+ (GVE_ADMINQ_BUFFER_SIZE / sizeof(struct gve_adminq_queried_flow_rule))
+#define GVE_FLOW_RULE_IDS_CACHE_SIZE \
+ (GVE_ADMINQ_BUFFER_SIZE / sizeof(((struct gve_adminq_queried_flow_rule *)0)->location))
+
+#define GVE_RSS_KEY_SIZE 40
+#define GVE_RSS_INDIR_SIZE 128
+
#define GVE_XDP_ACTIONS 5
#define GVE_GQ_TX_MIN_PKT_DESC_BYTES 182
@@ -63,7 +80,6 @@
#define GVE_DEFAULT_HEADER_BUFFER_SIZE 128
#define DQO_QPL_DEFAULT_TX_PAGES 512
-#define DQO_QPL_DEFAULT_RX_PAGES 2048
/* Maximum TSO size supported on DQO */
#define GVE_DQO_TX_MAX 0x3FFFF
@@ -91,9 +107,16 @@ struct gve_rx_desc_queue {
/* The page info for a single slot in the RX data queue */
struct gve_rx_slot_page_info {
- struct page *page;
+ /* netmem is used for DQO RDA mode
+ * page is used in all other modes
+ */
+ union {
+ struct page *page;
+ netmem_ref netmem;
+ };
void *page_address;
u32 page_offset; /* offset to write to in page */
+ unsigned int buf_size;
int pagecnt_bias; /* expected pagecnt if only the driver has a ref */
u16 pad; /* adjustment for rx padding */
u8 can_flip; /* tracks if the networking stack is using the page */
@@ -206,6 +229,11 @@ struct gve_rx_cnts {
/* Contains datapath state used to represent an RX queue. */
struct gve_rx_ring {
struct gve_priv *gve;
+
+ u16 packet_buffer_size; /* Size of buffer posted to NIC */
+ u16 packet_buffer_truesize; /* Total size of RX buffer */
+ u16 rx_headroom;
+
union {
/* GQI fields */
struct {
@@ -214,7 +242,6 @@ struct gve_rx_ring {
/* threshold for posting new buffs and descs */
u32 db_threshold;
- u16 packet_buffer_size;
u32 qpl_copy_pool_mask;
u32 qpl_copy_pool_head;
@@ -265,6 +292,8 @@ struct gve_rx_ring {
/* Address info of the buffers for header-split */
struct gve_header_buf hdr_bufs;
+
+ struct page_pool *page_pool;
} dqo;
};
@@ -590,8 +619,6 @@ struct gve_tx_ring {
dma_addr_t complq_bus_dqo; /* dma address of the dqo.compl_ring */
struct u64_stats_sync statss; /* sync stats for 32bit archs */
struct xsk_buff_pool *xsk_pool;
- u32 xdp_xsk_wakeup;
- u32 xdp_xsk_done;
u64 xdp_xsk_sent;
u64 xdp_xmit;
u64 xdp_xmit_errors;
@@ -607,12 +634,21 @@ struct gve_notify_block {
struct gve_priv *priv;
struct gve_tx_ring *tx; /* tx rings on this block */
struct gve_rx_ring *rx; /* rx rings on this block */
+ u32 irq;
+};
+
+/* Tracks allowed and current rx queue settings */
+struct gve_rx_queue_config {
+ u16 max_queues;
+ u16 num_queues;
+ u16 packet_buffer_size;
};
-/* Tracks allowed and current queue settings */
-struct gve_queue_config {
+/* Tracks allowed and current tx queue settings */
+struct gve_tx_queue_config {
u16 max_queues;
- u16 num_queues; /* current */
+ u16 num_queues; /* number of TX queues, excluding XDP queues */
+ u16 num_xdp_queues;
};
/* Tracks the available and used qpl IDs */
@@ -621,11 +657,6 @@ struct gve_qpl_config {
unsigned long *qpl_id_map; /* bitmap of used qpl ids */
};
-struct gve_options_dqo_rda {
- u16 tx_comp_ring_entries; /* number of tx_comp descriptors */
- u16 rx_buff_ring_entries; /* number of rx_buff descriptors */
-};
-
struct gve_irq_db {
__be32 index;
} ____cacheline_aligned;
@@ -639,31 +670,13 @@ struct gve_ptype_lut {
struct gve_ptype ptypes[GVE_NUM_PTYPES];
};
-/* Parameters for allocating queue page lists */
-struct gve_qpls_alloc_cfg {
- struct gve_qpl_config *qpl_cfg;
- struct gve_queue_config *tx_cfg;
- struct gve_queue_config *rx_cfg;
-
- u16 num_xdp_queues;
- bool raw_addressing;
- bool is_gqi;
-
- /* Allocated resources are returned here */
- struct gve_queue_page_list *qpls;
-};
-
/* Parameters for allocating resources for tx queues */
struct gve_tx_alloc_rings_cfg {
- struct gve_queue_config *qcfg;
+ struct gve_tx_queue_config *qcfg;
- /* qpls and qpl_cfg must already be allocated */
- struct gve_queue_page_list *qpls;
- struct gve_qpl_config *qpl_cfg;
+ u16 num_xdp_rings;
u16 ring_size;
- u16 start_idx;
- u16 num_rings;
bool raw_addressing;
/* Allocated resources are returned here */
@@ -673,17 +686,15 @@ struct gve_tx_alloc_rings_cfg {
/* Parameters for allocating resources for rx queues */
struct gve_rx_alloc_rings_cfg {
/* tx config is also needed to determine QPL ids */
- struct gve_queue_config *qcfg;
- struct gve_queue_config *qcfg_tx;
-
- /* qpls and qpl_cfg must already be allocated */
- struct gve_queue_page_list *qpls;
- struct gve_qpl_config *qpl_cfg;
+ struct gve_rx_queue_config *qcfg_rx;
+ struct gve_tx_queue_config *qcfg_tx;
u16 ring_size;
u16 packet_buffer_size;
bool raw_addressing;
bool enable_header_split;
+ bool reset_rss;
+ bool xdp;
/* Allocated resources are returned here */
struct gve_rx_ring *rx;
@@ -701,11 +712,48 @@ enum gve_queue_format {
GVE_DQO_QPL_FORMAT = 0x4,
};
+struct gve_flow_spec {
+ __be32 src_ip[4];
+ __be32 dst_ip[4];
+ union {
+ struct {
+ __be16 src_port;
+ __be16 dst_port;
+ };
+ __be32 spi;
+ };
+ union {
+ u8 tos;
+ u8 tclass;
+ };
+};
+
+struct gve_flow_rule {
+ u32 location;
+ u16 flow_type;
+ u16 action;
+ struct gve_flow_spec key;
+ struct gve_flow_spec mask;
+};
+
+struct gve_flow_rules_cache {
+ bool rules_cache_synced; /* False if the driver's rules_cache is outdated */
+ struct gve_adminq_queried_flow_rule *rules_cache;
+ __be32 *rule_ids_cache;
+ /* The total number of queried rules that stored in the caches */
+ u32 rules_cache_num;
+ u32 rule_ids_cache_num;
+};
+
+struct gve_rss_config {
+ u8 *hash_key;
+ u32 *hash_lut;
+};
+
struct gve_priv {
struct net_device *dev;
struct gve_tx_ring *tx; /* array of tx_cfg.num_queues */
struct gve_rx_ring *rx; /* array of rx_cfg.num_queues */
- struct gve_queue_page_list *qpls; /* array of num qpls */
struct gve_notify_block *ntfy_blocks; /* array of num_ntfy_blks */
struct gve_irq_db *irq_db_indices; /* array of num_ntfy_blks */
dma_addr_t irq_db_indices_bus;
@@ -718,19 +766,21 @@ struct gve_priv {
u16 num_event_counters;
u16 tx_desc_cnt; /* num desc per ring */
u16 rx_desc_cnt; /* num desc per ring */
+ u16 max_tx_desc_cnt;
+ u16 max_rx_desc_cnt;
+ u16 min_tx_desc_cnt;
+ u16 min_rx_desc_cnt;
+ bool modify_ring_size_enabled;
+ bool default_min_ring_size;
u16 tx_pages_per_qpl; /* Suggested number of pages per qpl for TX queues by NIC */
- u16 rx_pages_per_qpl; /* Suggested number of pages per qpl for RX queues by NIC */
- u16 rx_data_slot_cnt; /* rx buffer length */
u64 max_registered_pages;
u64 num_registered_pages; /* num pages registered with NIC */
struct bpf_prog *xdp_prog; /* XDP BPF program */
u32 rx_copybreak; /* copy packets smaller than this */
u16 default_num_queues; /* default num queues to set up */
- u16 num_xdp_queues;
- struct gve_queue_config tx_cfg;
- struct gve_queue_config rx_cfg;
- struct gve_qpl_config qpl_cfg; /* map used QPL ids */
+ struct gve_tx_queue_config tx_cfg;
+ struct gve_rx_queue_config rx_cfg;
u32 num_ntfy_blks; /* spilt between TX and RX so must be even */
struct gve_registers __iomem *reg_bar0; /* see gve_register.h */
@@ -745,6 +795,7 @@ struct gve_priv {
union gve_adminq_command *adminq;
dma_addr_t adminq_bus_addr;
struct dma_pool *adminq_pool;
+ struct mutex adminq_lock; /* Protects adminq command execution */
u32 adminq_mask; /* masks prod_cnt to adminq size */
u32 adminq_prod_cnt; /* free-running count of AQ cmds executed */
u32 adminq_cmd_fail; /* free-running count of AQ cmds failed */
@@ -764,6 +815,10 @@ struct gve_priv {
u32 adminq_report_link_speed_cnt;
u32 adminq_get_ptype_map_cnt;
u32 adminq_verify_driver_compatibility_cnt;
+ u32 adminq_query_flow_rules_cnt;
+ u32 adminq_cfg_flow_rule_cnt;
+ u32 adminq_cfg_rss_cnt;
+ u32 adminq_query_rss_cnt;
/* Global stats */
u32 interface_up_cnt; /* count of times interface turned up since last reset */
@@ -792,11 +847,9 @@ struct gve_priv {
u64 link_speed;
bool up_before_suspend; /* True if dev was up before suspend */
- struct gve_options_dqo_rda options_dqo_rda;
struct gve_ptype_lut *ptype_lut_dqo;
/* Must be a power of two. */
- u16 data_buffer_size_dqo;
u16 max_rx_buffer_size; /* device limit */
enum gve_queue_format queue_format;
@@ -807,6 +860,16 @@ struct gve_priv {
u16 header_buf_size; /* device configured, header-split supported if non-zero */
bool header_split_enabled; /* True if the header split is enabled by the user */
+
+ u32 max_flow_rules;
+ u32 num_flow_rules;
+
+ struct gve_flow_rules_cache flow_rules_cache;
+
+ u16 rss_key_size;
+ u16 rss_lut_size;
+ bool cache_rss_config;
+ struct gve_rss_config rss_config;
};
enum gve_service_task_flags_bit {
@@ -989,27 +1052,16 @@ static inline bool gve_is_qpl(struct gve_priv *priv)
}
/* Returns the number of tx queue page lists */
-static inline u32 gve_num_tx_qpls(const struct gve_queue_config *tx_cfg,
- int num_xdp_queues,
+static inline u32 gve_num_tx_qpls(const struct gve_tx_queue_config *tx_cfg,
bool is_qpl)
{
if (!is_qpl)
return 0;
- return tx_cfg->num_queues + num_xdp_queues;
-}
-
-/* Returns the number of XDP tx queue page lists
- */
-static inline u32 gve_num_xdp_qpls(struct gve_priv *priv)
-{
- if (priv->queue_format != GVE_GQI_QPL_FORMAT)
- return 0;
-
- return priv->num_xdp_queues;
+ return tx_cfg->num_queues + tx_cfg->num_xdp_queues;
}
/* Returns the number of rx queue page lists */
-static inline u32 gve_num_rx_qpls(const struct gve_queue_config *rx_cfg,
+static inline u32 gve_num_rx_qpls(const struct gve_rx_queue_config *rx_cfg,
bool is_qpl)
{
if (!is_qpl)
@@ -1027,8 +1079,8 @@ static inline u32 gve_rx_qpl_id(struct gve_priv *priv, int rx_qid)
return priv->tx_cfg.max_queues + rx_qid;
}
-/* Returns the index into priv->qpls where a certain rx queue's QPL resides */
-static inline u32 gve_get_rx_qpl_id(const struct gve_queue_config *tx_cfg, int rx_qid)
+static inline u32 gve_get_rx_qpl_id(const struct gve_tx_queue_config *tx_cfg,
+ int rx_qid)
{
return tx_cfg->max_queues + rx_qid;
}
@@ -1038,41 +1090,17 @@ static inline u32 gve_tx_start_qpl_id(struct gve_priv *priv)
return gve_tx_qpl_id(priv, 0);
}
-/* Returns the index into priv->qpls where the first rx queue's QPL resides */
-static inline u32 gve_rx_start_qpl_id(const struct gve_queue_config *tx_cfg)
+static inline u32 gve_rx_start_qpl_id(const struct gve_tx_queue_config *tx_cfg)
{
return gve_get_rx_qpl_id(tx_cfg, 0);
}
-/* Returns a pointer to the next available tx qpl in the list of qpls */
-static inline
-struct gve_queue_page_list *gve_assign_tx_qpl(struct gve_tx_alloc_rings_cfg *cfg,
- int tx_qid)
+static inline u32 gve_get_rx_pages_per_qpl_dqo(u32 rx_desc_cnt)
{
- /* QPL already in use */
- if (test_bit(tx_qid, cfg->qpl_cfg->qpl_id_map))
- return NULL;
- set_bit(tx_qid, cfg->qpl_cfg->qpl_id_map);
- return &cfg->qpls[tx_qid];
-}
-
-/* Returns a pointer to the next available rx qpl in the list of qpls */
-static inline
-struct gve_queue_page_list *gve_assign_rx_qpl(struct gve_rx_alloc_rings_cfg *cfg,
- int rx_qid)
-{
- int id = gve_get_rx_qpl_id(cfg->qcfg_tx, rx_qid);
- /* QPL already in use */
- if (test_bit(id, cfg->qpl_cfg->qpl_id_map))
- return NULL;
- set_bit(id, cfg->qpl_cfg->qpl_id_map);
- return &cfg->qpls[id];
-}
-
-/* Unassigns the qpl with the given id */
-static inline void gve_unassign_qpl(struct gve_qpl_config *qpl_cfg, int id)
-{
- clear_bit(id, qpl_cfg->qpl_id_map);
+ /* For DQO, page count should be more than ring size for
+ * out-of-order completions. Set it to two times of ring size.
+ */
+ return 2 * rx_desc_cnt;
}
/* Returns the correct dma direction for tx and rx qpls */
@@ -1093,7 +1121,7 @@ static inline bool gve_is_gqi(struct gve_priv *priv)
static inline u32 gve_num_tx_queues(struct gve_priv *priv)
{
- return priv->tx_cfg.num_queues + priv->num_xdp_queues;
+ return priv->tx_cfg.num_queues + priv->tx_cfg.num_xdp_queues;
}
static inline u32 gve_xdp_tx_queue_id(struct gve_priv *priv, u32 queue_id)
@@ -1106,6 +1134,16 @@ static inline u32 gve_xdp_tx_start_queue_id(struct gve_priv *priv)
return gve_xdp_tx_queue_id(priv, 0);
}
+static inline bool gve_supports_xdp_xmit(struct gve_priv *priv)
+{
+ switch (priv->queue_format) {
+ case GVE_GQI_QPL_FORMAT:
+ return true;
+ default:
+ return false;
+ }
+}
+
/* gqi napi handler defined in gve_main.c */
int gve_napi_poll(struct napi_struct *napi, int budget);
@@ -1115,6 +1153,12 @@ int gve_alloc_page(struct gve_priv *priv, struct device *dev,
enum dma_data_direction, gfp_t gfp_flags);
void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma,
enum dma_data_direction);
+/* qpls */
+struct gve_queue_page_list *gve_alloc_queue_page_list(struct gve_priv *priv,
+ u32 id, int pages);
+void gve_free_queue_page_list(struct gve_priv *priv,
+ struct gve_queue_page_list *qpl,
+ u32 id);
/* tx handling */
netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev);
int gve_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
@@ -1124,6 +1168,7 @@ int gve_xdp_xmit_one(struct gve_priv *priv, struct gve_tx_ring *tx,
void gve_xdp_tx_flush(struct gve_priv *priv, u32 xdp_qid);
bool gve_tx_poll(struct gve_notify_block *block, int budget);
bool gve_xdp_poll(struct gve_notify_block *block, int budget);
+int gve_xsk_tx_poll(struct gve_notify_block *block, int budget);
int gve_tx_alloc_rings_gqi(struct gve_priv *priv,
struct gve_tx_alloc_rings_cfg *cfg);
void gve_tx_free_rings_gqi(struct gve_priv *priv,
@@ -1137,7 +1182,12 @@ bool gve_tx_clean_pending(struct gve_priv *priv, struct gve_tx_ring *tx);
void gve_rx_write_doorbell(struct gve_priv *priv, struct gve_rx_ring *rx);
int gve_rx_poll(struct gve_notify_block *block, int budget);
bool gve_rx_work_pending(struct gve_rx_ring *rx);
-int gve_rx_alloc_rings(struct gve_priv *priv);
+int gve_rx_alloc_ring_gqi(struct gve_priv *priv,
+ struct gve_rx_alloc_rings_cfg *cfg,
+ struct gve_rx_ring *rx,
+ int idx);
+void gve_rx_free_ring_gqi(struct gve_priv *priv, struct gve_rx_ring *rx,
+ struct gve_rx_alloc_rings_cfg *cfg);
int gve_rx_alloc_rings_gqi(struct gve_priv *priv,
struct gve_rx_alloc_rings_cfg *cfg);
void gve_rx_free_rings_gqi(struct gve_priv *priv,
@@ -1147,12 +1197,58 @@ void gve_rx_stop_ring_gqi(struct gve_priv *priv, int idx);
u16 gve_get_pkt_buf_size(const struct gve_priv *priv, bool enable_hplit);
bool gve_header_split_supported(const struct gve_priv *priv);
int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split);
+/* rx buffer handling */
+int gve_buf_ref_cnt(struct gve_rx_buf_state_dqo *bs);
+void gve_free_page_dqo(struct gve_priv *priv, struct gve_rx_buf_state_dqo *bs,
+ bool free_page);
+struct gve_rx_buf_state_dqo *gve_alloc_buf_state(struct gve_rx_ring *rx);
+bool gve_buf_state_is_allocated(struct gve_rx_ring *rx,
+ struct gve_rx_buf_state_dqo *buf_state);
+void gve_free_buf_state(struct gve_rx_ring *rx,
+ struct gve_rx_buf_state_dqo *buf_state);
+struct gve_rx_buf_state_dqo *gve_dequeue_buf_state(struct gve_rx_ring *rx,
+ struct gve_index_list *list);
+void gve_enqueue_buf_state(struct gve_rx_ring *rx, struct gve_index_list *list,
+ struct gve_rx_buf_state_dqo *buf_state);
+struct gve_rx_buf_state_dqo *gve_get_recycled_buf_state(struct gve_rx_ring *rx);
+void gve_try_recycle_buf(struct gve_priv *priv, struct gve_rx_ring *rx,
+ struct gve_rx_buf_state_dqo *buf_state);
+void gve_free_to_page_pool(struct gve_rx_ring *rx,
+ struct gve_rx_buf_state_dqo *buf_state,
+ bool allow_direct);
+int gve_alloc_qpl_page_dqo(struct gve_rx_ring *rx,
+ struct gve_rx_buf_state_dqo *buf_state);
+void gve_free_qpl_page_dqo(struct gve_rx_buf_state_dqo *buf_state);
+void gve_reuse_buffer(struct gve_rx_ring *rx,
+ struct gve_rx_buf_state_dqo *buf_state);
+void gve_free_buffer(struct gve_rx_ring *rx,
+ struct gve_rx_buf_state_dqo *buf_state);
+int gve_alloc_buffer(struct gve_rx_ring *rx, struct gve_rx_desc_dqo *desc);
+struct page_pool *gve_rx_create_page_pool(struct gve_priv *priv,
+ struct gve_rx_ring *rx,
+ bool xdp);
+
/* Reset */
void gve_schedule_reset(struct gve_priv *priv);
int gve_reset(struct gve_priv *priv, bool attempt_teardown);
+void gve_get_curr_alloc_cfgs(struct gve_priv *priv,
+ struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
+ struct gve_rx_alloc_rings_cfg *rx_alloc_cfg);
+int gve_adjust_config(struct gve_priv *priv,
+ struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
+ struct gve_rx_alloc_rings_cfg *rx_alloc_cfg);
int gve_adjust_queues(struct gve_priv *priv,
- struct gve_queue_config new_rx_config,
- struct gve_queue_config new_tx_config);
+ struct gve_rx_queue_config new_rx_config,
+ struct gve_tx_queue_config new_tx_config,
+ bool reset_rss);
+/* flow steering rule */
+int gve_get_flow_rule_entry(struct gve_priv *priv, struct ethtool_rxnfc *cmd);
+int gve_get_flow_rule_ids(struct gve_priv *priv, struct ethtool_rxnfc *cmd, u32 *rule_locs);
+int gve_add_flow_rule(struct gve_priv *priv, struct ethtool_rxnfc *cmd);
+int gve_del_flow_rule(struct gve_priv *priv, struct ethtool_rxnfc *cmd);
+int gve_flow_rules_reset(struct gve_priv *priv);
+/* RSS config */
+int gve_init_rss_config(struct gve_priv *priv, u16 num_queues);
/* report stats handling */
void gve_handle_report_stats(struct gve_priv *priv);
/* exported by ethtool.c */
diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c
index ae12ac38e18b..3e8fc33cc11f 100644
--- a/drivers/net/ethernet/google/gve/gve_adminq.c
+++ b/drivers/net/ethernet/google/gve/gve_adminq.c
@@ -32,6 +32,8 @@ struct gve_device_option *gve_get_next_option(struct gve_device_descriptor *desc
return option_end > descriptor_end ? NULL : (struct gve_device_option *)option_end;
}
+#define GVE_DEVICE_OPTION_NO_MIN_RING_SIZE 8
+
static
void gve_parse_device_option(struct gve_priv *priv,
struct gve_device_descriptor *device_descriptor,
@@ -41,7 +43,10 @@ void gve_parse_device_option(struct gve_priv *priv,
struct gve_device_option_dqo_rda **dev_op_dqo_rda,
struct gve_device_option_jumbo_frames **dev_op_jumbo_frames,
struct gve_device_option_dqo_qpl **dev_op_dqo_qpl,
- struct gve_device_option_buffer_sizes **dev_op_buffer_sizes)
+ struct gve_device_option_buffer_sizes **dev_op_buffer_sizes,
+ struct gve_device_option_flow_steering **dev_op_flow_steering,
+ struct gve_device_option_rss_config **dev_op_rss_config,
+ struct gve_device_option_modify_ring **dev_op_modify_ring)
{
u32 req_feat_mask = be32_to_cpu(option->required_features_mask);
u16 option_length = be16_to_cpu(option->option_length);
@@ -165,6 +170,61 @@ void gve_parse_device_option(struct gve_priv *priv,
"Buffer Sizes");
*dev_op_buffer_sizes = (void *)(option + 1);
break;
+ case GVE_DEV_OPT_ID_MODIFY_RING:
+ if (option_length < GVE_DEVICE_OPTION_NO_MIN_RING_SIZE ||
+ req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_MODIFY_RING) {
+ dev_warn(&priv->pdev->dev, GVE_DEVICE_OPTION_ERROR_FMT,
+ "Modify Ring", (int)sizeof(**dev_op_modify_ring),
+ GVE_DEV_OPT_REQ_FEAT_MASK_MODIFY_RING,
+ option_length, req_feat_mask);
+ break;
+ }
+
+ if (option_length > sizeof(**dev_op_modify_ring)) {
+ dev_warn(&priv->pdev->dev,
+ GVE_DEVICE_OPTION_TOO_BIG_FMT, "Modify Ring");
+ }
+
+ *dev_op_modify_ring = (void *)(option + 1);
+
+ /* device has not provided min ring size */
+ if (option_length == GVE_DEVICE_OPTION_NO_MIN_RING_SIZE)
+ priv->default_min_ring_size = true;
+ break;
+ case GVE_DEV_OPT_ID_FLOW_STEERING:
+ if (option_length < sizeof(**dev_op_flow_steering) ||
+ req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_FLOW_STEERING) {
+ dev_warn(&priv->pdev->dev, GVE_DEVICE_OPTION_ERROR_FMT,
+ "Flow Steering",
+ (int)sizeof(**dev_op_flow_steering),
+ GVE_DEV_OPT_REQ_FEAT_MASK_FLOW_STEERING,
+ option_length, req_feat_mask);
+ break;
+ }
+
+ if (option_length > sizeof(**dev_op_flow_steering))
+ dev_warn(&priv->pdev->dev,
+ GVE_DEVICE_OPTION_TOO_BIG_FMT,
+ "Flow Steering");
+ *dev_op_flow_steering = (void *)(option + 1);
+ break;
+ case GVE_DEV_OPT_ID_RSS_CONFIG:
+ if (option_length < sizeof(**dev_op_rss_config) ||
+ req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_RSS_CONFIG) {
+ dev_warn(&priv->pdev->dev, GVE_DEVICE_OPTION_ERROR_FMT,
+ "RSS config",
+ (int)sizeof(**dev_op_rss_config),
+ GVE_DEV_OPT_REQ_FEAT_MASK_RSS_CONFIG,
+ option_length, req_feat_mask);
+ break;
+ }
+
+ if (option_length > sizeof(**dev_op_rss_config))
+ dev_warn(&priv->pdev->dev,
+ GVE_DEVICE_OPTION_TOO_BIG_FMT,
+ "RSS config");
+ *dev_op_rss_config = (void *)(option + 1);
+ break;
default:
/* If we don't recognize the option just continue
* without doing anything.
@@ -183,7 +243,10 @@ gve_process_device_options(struct gve_priv *priv,
struct gve_device_option_dqo_rda **dev_op_dqo_rda,
struct gve_device_option_jumbo_frames **dev_op_jumbo_frames,
struct gve_device_option_dqo_qpl **dev_op_dqo_qpl,
- struct gve_device_option_buffer_sizes **dev_op_buffer_sizes)
+ struct gve_device_option_buffer_sizes **dev_op_buffer_sizes,
+ struct gve_device_option_flow_steering **dev_op_flow_steering,
+ struct gve_device_option_rss_config **dev_op_rss_config,
+ struct gve_device_option_modify_ring **dev_op_modify_ring)
{
const int num_options = be16_to_cpu(descriptor->num_device_options);
struct gve_device_option *dev_opt;
@@ -204,7 +267,9 @@ gve_process_device_options(struct gve_priv *priv,
gve_parse_device_option(priv, descriptor, dev_opt,
dev_op_gqi_rda, dev_op_gqi_qpl,
dev_op_dqo_rda, dev_op_jumbo_frames,
- dev_op_dqo_qpl, dev_op_buffer_sizes);
+ dev_op_dqo_qpl, dev_op_buffer_sizes,
+ dev_op_flow_steering, dev_op_rss_config,
+ dev_op_modify_ring);
dev_opt = next_opt;
}
@@ -242,6 +307,10 @@ int gve_adminq_alloc(struct device *dev, struct gve_priv *priv)
priv->adminq_report_stats_cnt = 0;
priv->adminq_report_link_speed_cnt = 0;
priv->adminq_get_ptype_map_cnt = 0;
+ priv->adminq_query_flow_rules_cnt = 0;
+ priv->adminq_cfg_flow_rule_cnt = 0;
+ priv->adminq_cfg_rss_cnt = 0;
+ priv->adminq_query_rss_cnt = 0;
/* Setup Admin queue with the device */
if (priv->pdev->revision < 0x1) {
@@ -258,6 +327,7 @@ int gve_adminq_alloc(struct device *dev, struct gve_priv *priv)
&priv->reg_bar0->adminq_base_address_lo);
iowrite32be(GVE_DRIVER_STATUS_RUN_MASK, &priv->reg_bar0->driver_status);
}
+ mutex_init(&priv->adminq_lock);
gve_set_admin_queue_ok(priv);
return 0;
}
@@ -434,6 +504,8 @@ static int gve_adminq_issue_cmd(struct gve_priv *priv,
memcpy(cmd, cmd_orig, sizeof(*cmd_orig));
opcode = be32_to_cpu(READ_ONCE(cmd->opcode));
+ if (opcode == GVE_ADMINQ_EXTENDED_COMMAND)
+ opcode = be32_to_cpu(cmd->extended_command.inner_opcode);
switch (opcode) {
case GVE_ADMINQ_DESCRIBE_DEVICE:
@@ -478,6 +550,18 @@ static int gve_adminq_issue_cmd(struct gve_priv *priv,
case GVE_ADMINQ_VERIFY_DRIVER_COMPATIBILITY:
priv->adminq_verify_driver_compatibility_cnt++;
break;
+ case GVE_ADMINQ_QUERY_FLOW_RULES:
+ priv->adminq_query_flow_rules_cnt++;
+ break;
+ case GVE_ADMINQ_CONFIGURE_FLOW_RULE:
+ priv->adminq_cfg_flow_rule_cnt++;
+ break;
+ case GVE_ADMINQ_CONFIGURE_RSS:
+ priv->adminq_cfg_rss_cnt++;
+ break;
+ case GVE_ADMINQ_QUERY_RSS:
+ priv->adminq_query_rss_cnt++;
+ break;
default:
dev_err(&priv->pdev->dev, "unknown AQ command opcode %d\n", opcode);
}
@@ -485,28 +569,58 @@ static int gve_adminq_issue_cmd(struct gve_priv *priv,
return 0;
}
-/* This function is not threadsafe - the caller is responsible for any
- * necessary locks.
- * The caller is also responsible for making sure there are no commands
- * waiting to be executed.
- */
static int gve_adminq_execute_cmd(struct gve_priv *priv,
union gve_adminq_command *cmd_orig)
{
u32 tail, head;
int err;
+ mutex_lock(&priv->adminq_lock);
tail = ioread32be(&priv->reg_bar0->adminq_event_counter);
head = priv->adminq_prod_cnt;
- if (tail != head)
- // This is not a valid path
- return -EINVAL;
+ if (tail != head) {
+ err = -EINVAL;
+ goto out;
+ }
err = gve_adminq_issue_cmd(priv, cmd_orig);
if (err)
- return err;
+ goto out;
- return gve_adminq_kick_and_wait(priv);
+ err = gve_adminq_kick_and_wait(priv);
+
+out:
+ mutex_unlock(&priv->adminq_lock);
+ return err;
+}
+
+static int gve_adminq_execute_extended_cmd(struct gve_priv *priv, u32 opcode,
+ size_t cmd_size, void *cmd_orig)
+{
+ union gve_adminq_command cmd;
+ dma_addr_t inner_cmd_bus;
+ void *inner_cmd;
+ int err;
+
+ inner_cmd = dma_alloc_coherent(&priv->pdev->dev, cmd_size,
+ &inner_cmd_bus, GFP_KERNEL);
+ if (!inner_cmd)
+ return -ENOMEM;
+
+ memcpy(inner_cmd, cmd_orig, cmd_size);
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.opcode = cpu_to_be32(GVE_ADMINQ_EXTENDED_COMMAND);
+ cmd.extended_command = (struct gve_adminq_extended_command) {
+ .inner_opcode = cpu_to_be32(opcode),
+ .inner_length = cpu_to_be32(cmd_size),
+ .inner_command_addr = cpu_to_be64(inner_cmd_bus),
+ };
+
+ err = gve_adminq_execute_cmd(priv, &cmd);
+
+ dma_free_coherent(&priv->pdev->dev, cmd_size, inner_cmd, inner_cmd_bus);
+ return err;
}
/* The device specifies that the management vector can either be the first irq
@@ -565,6 +679,7 @@ static int gve_adminq_create_tx_queue(struct gve_priv *priv, u32 queue_index)
cpu_to_be64(tx->q_resources_bus),
.tx_ring_addr = cpu_to_be64(tx->bus),
.ntfy_id = cpu_to_be32(tx->ntfy_id),
+ .tx_ring_size = cpu_to_be16(priv->tx_desc_cnt),
};
if (gve_is_gqi(priv)) {
@@ -573,24 +688,17 @@ static int gve_adminq_create_tx_queue(struct gve_priv *priv, u32 queue_index)
cmd.create_tx_queue.queue_page_list_id = cpu_to_be32(qpl_id);
} else {
- u16 comp_ring_size;
u32 qpl_id = 0;
- if (priv->queue_format == GVE_DQO_RDA_FORMAT) {
+ if (priv->queue_format == GVE_DQO_RDA_FORMAT)
qpl_id = GVE_RAW_ADDRESSING_QPL_ID;
- comp_ring_size =
- priv->options_dqo_rda.tx_comp_ring_entries;
- } else {
+ else
qpl_id = tx->dqo.qpl->id;
- comp_ring_size = priv->tx_desc_cnt;
- }
cmd.create_tx_queue.queue_page_list_id = cpu_to_be32(qpl_id);
- cmd.create_tx_queue.tx_ring_size =
- cpu_to_be16(priv->tx_desc_cnt);
cmd.create_tx_queue.tx_comp_ring_addr =
cpu_to_be64(tx->complq_bus_dqo);
cmd.create_tx_queue.tx_comp_ring_size =
- cpu_to_be16(comp_ring_size);
+ cpu_to_be16(priv->tx_desc_cnt);
}
return gve_adminq_issue_cmd(priv, &cmd);
@@ -610,63 +718,71 @@ int gve_adminq_create_tx_queues(struct gve_priv *priv, u32 start_id, u32 num_que
return gve_adminq_kick_and_wait(priv);
}
-static int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_index)
+static void gve_adminq_get_create_rx_queue_cmd(struct gve_priv *priv,
+ union gve_adminq_command *cmd,
+ u32 queue_index)
{
struct gve_rx_ring *rx = &priv->rx[queue_index];
- union gve_adminq_command cmd;
- memset(&cmd, 0, sizeof(cmd));
- cmd.opcode = cpu_to_be32(GVE_ADMINQ_CREATE_RX_QUEUE);
- cmd.create_rx_queue = (struct gve_adminq_create_rx_queue) {
+ memset(cmd, 0, sizeof(*cmd));
+ cmd->opcode = cpu_to_be32(GVE_ADMINQ_CREATE_RX_QUEUE);
+ cmd->create_rx_queue = (struct gve_adminq_create_rx_queue) {
.queue_id = cpu_to_be32(queue_index),
.ntfy_id = cpu_to_be32(rx->ntfy_id),
.queue_resources_addr = cpu_to_be64(rx->q_resources_bus),
+ .rx_ring_size = cpu_to_be16(priv->rx_desc_cnt),
+ .packet_buffer_size = cpu_to_be16(rx->packet_buffer_size),
};
if (gve_is_gqi(priv)) {
u32 qpl_id = priv->queue_format == GVE_GQI_RDA_FORMAT ?
GVE_RAW_ADDRESSING_QPL_ID : rx->data.qpl->id;
- cmd.create_rx_queue.rx_desc_ring_addr =
- cpu_to_be64(rx->desc.bus),
- cmd.create_rx_queue.rx_data_ring_addr =
- cpu_to_be64(rx->data.data_bus),
- cmd.create_rx_queue.index = cpu_to_be32(queue_index);
- cmd.create_rx_queue.queue_page_list_id = cpu_to_be32(qpl_id);
- cmd.create_rx_queue.packet_buffer_size = cpu_to_be16(rx->packet_buffer_size);
+ cmd->create_rx_queue.rx_desc_ring_addr =
+ cpu_to_be64(rx->desc.bus);
+ cmd->create_rx_queue.rx_data_ring_addr =
+ cpu_to_be64(rx->data.data_bus);
+ cmd->create_rx_queue.index = cpu_to_be32(queue_index);
+ cmd->create_rx_queue.queue_page_list_id = cpu_to_be32(qpl_id);
} else {
- u16 rx_buff_ring_entries;
u32 qpl_id = 0;
- if (priv->queue_format == GVE_DQO_RDA_FORMAT) {
+ if (priv->queue_format == GVE_DQO_RDA_FORMAT)
qpl_id = GVE_RAW_ADDRESSING_QPL_ID;
- rx_buff_ring_entries =
- priv->options_dqo_rda.rx_buff_ring_entries;
- } else {
+ else
qpl_id = rx->dqo.qpl->id;
- rx_buff_ring_entries = priv->rx_desc_cnt;
- }
- cmd.create_rx_queue.queue_page_list_id = cpu_to_be32(qpl_id);
- cmd.create_rx_queue.rx_ring_size =
- cpu_to_be16(priv->rx_desc_cnt);
- cmd.create_rx_queue.rx_desc_ring_addr =
+ cmd->create_rx_queue.queue_page_list_id = cpu_to_be32(qpl_id);
+ cmd->create_rx_queue.rx_desc_ring_addr =
cpu_to_be64(rx->dqo.complq.bus);
- cmd.create_rx_queue.rx_data_ring_addr =
+ cmd->create_rx_queue.rx_data_ring_addr =
cpu_to_be64(rx->dqo.bufq.bus);
- cmd.create_rx_queue.packet_buffer_size =
- cpu_to_be16(priv->data_buffer_size_dqo);
- cmd.create_rx_queue.rx_buff_ring_size =
- cpu_to_be16(rx_buff_ring_entries);
- cmd.create_rx_queue.enable_rsc =
+ cmd->create_rx_queue.rx_buff_ring_size =
+ cpu_to_be16(priv->rx_desc_cnt);
+ cmd->create_rx_queue.enable_rsc =
!!(priv->dev->features & NETIF_F_LRO);
if (priv->header_split_enabled)
- cmd.create_rx_queue.header_buffer_size =
+ cmd->create_rx_queue.header_buffer_size =
cpu_to_be16(priv->header_buf_size);
}
+}
+
+static int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_index)
+{
+ union gve_adminq_command cmd;
+ gve_adminq_get_create_rx_queue_cmd(priv, &cmd, queue_index);
return gve_adminq_issue_cmd(priv, &cmd);
}
+/* Unlike gve_adminq_create_rx_queue, this actually rings the doorbell */
+int gve_adminq_create_single_rx_queue(struct gve_priv *priv, u32 queue_index)
+{
+ union gve_adminq_command cmd;
+
+ gve_adminq_get_create_rx_queue_cmd(priv, &cmd, queue_index);
+ return gve_adminq_execute_cmd(priv, &cmd);
+}
+
int gve_adminq_create_rx_queues(struct gve_priv *priv, u32 num_queues)
{
int err;
@@ -713,22 +829,31 @@ int gve_adminq_destroy_tx_queues(struct gve_priv *priv, u32 start_id, u32 num_qu
return gve_adminq_kick_and_wait(priv);
}
+static void gve_adminq_make_destroy_rx_queue_cmd(union gve_adminq_command *cmd,
+ u32 queue_index)
+{
+ memset(cmd, 0, sizeof(*cmd));
+ cmd->opcode = cpu_to_be32(GVE_ADMINQ_DESTROY_RX_QUEUE);
+ cmd->destroy_rx_queue = (struct gve_adminq_destroy_rx_queue) {
+ .queue_id = cpu_to_be32(queue_index),
+ };
+}
+
static int gve_adminq_destroy_rx_queue(struct gve_priv *priv, u32 queue_index)
{
union gve_adminq_command cmd;
- int err;
- memset(&cmd, 0, sizeof(cmd));
- cmd.opcode = cpu_to_be32(GVE_ADMINQ_DESTROY_RX_QUEUE);
- cmd.destroy_rx_queue = (struct gve_adminq_destroy_rx_queue) {
- .queue_id = cpu_to_be32(queue_index),
- };
+ gve_adminq_make_destroy_rx_queue_cmd(&cmd, queue_index);
+ return gve_adminq_issue_cmd(priv, &cmd);
+}
- err = gve_adminq_issue_cmd(priv, &cmd);
- if (err)
- return err;
+/* Unlike gve_adminq_destroy_rx_queue, this actually rings the doorbell */
+int gve_adminq_destroy_single_rx_queue(struct gve_priv *priv, u32 queue_index)
+{
+ union gve_adminq_command cmd;
- return 0;
+ gve_adminq_make_destroy_rx_queue_cmd(&cmd, queue_index);
+ return gve_adminq_execute_cmd(priv, &cmd);
}
int gve_adminq_destroy_rx_queues(struct gve_priv *priv, u32 num_queues)
@@ -745,31 +870,26 @@ int gve_adminq_destroy_rx_queues(struct gve_priv *priv, u32 num_queues)
return gve_adminq_kick_and_wait(priv);
}
-static int gve_set_desc_cnt(struct gve_priv *priv,
- struct gve_device_descriptor *descriptor)
+static void gve_set_default_desc_cnt(struct gve_priv *priv,
+ const struct gve_device_descriptor *descriptor)
{
priv->tx_desc_cnt = be16_to_cpu(descriptor->tx_queue_entries);
priv->rx_desc_cnt = be16_to_cpu(descriptor->rx_queue_entries);
- return 0;
+
+ /* set default ranges */
+ priv->max_tx_desc_cnt = priv->tx_desc_cnt;
+ priv->max_rx_desc_cnt = priv->rx_desc_cnt;
+ priv->min_tx_desc_cnt = priv->tx_desc_cnt;
+ priv->min_rx_desc_cnt = priv->rx_desc_cnt;
}
-static int
-gve_set_desc_cnt_dqo(struct gve_priv *priv,
- const struct gve_device_descriptor *descriptor,
- const struct gve_device_option_dqo_rda *dev_op_dqo_rda)
+static void gve_set_default_rss_sizes(struct gve_priv *priv)
{
- priv->tx_desc_cnt = be16_to_cpu(descriptor->tx_queue_entries);
- priv->rx_desc_cnt = be16_to_cpu(descriptor->rx_queue_entries);
-
- if (priv->queue_format == GVE_DQO_QPL_FORMAT)
- return 0;
-
- priv->options_dqo_rda.tx_comp_ring_entries =
- be16_to_cpu(dev_op_dqo_rda->tx_comp_ring_entries);
- priv->options_dqo_rda.rx_buff_ring_entries =
- be16_to_cpu(dev_op_dqo_rda->rx_buff_ring_entries);
-
- return 0;
+ if (!gve_is_gqi(priv)) {
+ priv->rss_key_size = GVE_RSS_KEY_SIZE;
+ priv->rss_lut_size = GVE_RSS_INDIR_SIZE;
+ priv->cache_rss_config = true;
+ }
}
static void gve_enable_supported_features(struct gve_priv *priv,
@@ -779,7 +899,13 @@ static void gve_enable_supported_features(struct gve_priv *priv,
const struct gve_device_option_dqo_qpl
*dev_op_dqo_qpl,
const struct gve_device_option_buffer_sizes
- *dev_op_buffer_sizes)
+ *dev_op_buffer_sizes,
+ const struct gve_device_option_flow_steering
+ *dev_op_flow_steering,
+ const struct gve_device_option_rss_config
+ *dev_op_rss_config,
+ const struct gve_device_option_modify_ring
+ *dev_op_modify_ring)
{
/* Before control reaches this point, the page-size-capped max MTU from
* the gve_device_descriptor field has already been stored in
@@ -796,12 +922,8 @@ static void gve_enable_supported_features(struct gve_priv *priv,
if (dev_op_dqo_qpl) {
priv->tx_pages_per_qpl =
be16_to_cpu(dev_op_dqo_qpl->tx_pages_per_qpl);
- priv->rx_pages_per_qpl =
- be16_to_cpu(dev_op_dqo_qpl->rx_pages_per_qpl);
if (priv->tx_pages_per_qpl == 0)
priv->tx_pages_per_qpl = DQO_QPL_DEFAULT_TX_PAGES;
- if (priv->rx_pages_per_qpl == 0)
- priv->rx_pages_per_qpl = DQO_QPL_DEFAULT_RX_PAGES;
}
if (dev_op_buffer_sizes &&
@@ -814,12 +936,59 @@ static void gve_enable_supported_features(struct gve_priv *priv,
"BUFFER SIZES device option enabled with max_rx_buffer_size of %u, header_buf_size of %u.\n",
priv->max_rx_buffer_size, priv->header_buf_size);
}
+
+ /* Read and store ring size ranges given by device */
+ if (dev_op_modify_ring &&
+ (supported_features_mask & GVE_SUP_MODIFY_RING_MASK)) {
+ priv->modify_ring_size_enabled = true;
+
+ /* max ring size for DQO QPL should not be overwritten because of device limit */
+ if (priv->queue_format != GVE_DQO_QPL_FORMAT) {
+ priv->max_rx_desc_cnt = be16_to_cpu(dev_op_modify_ring->max_rx_ring_size);
+ priv->max_tx_desc_cnt = be16_to_cpu(dev_op_modify_ring->max_tx_ring_size);
+ }
+ if (priv->default_min_ring_size) {
+ /* If device hasn't provided minimums, use default minimums */
+ priv->min_tx_desc_cnt = GVE_DEFAULT_MIN_TX_RING_SIZE;
+ priv->min_rx_desc_cnt = GVE_DEFAULT_MIN_RX_RING_SIZE;
+ } else {
+ priv->min_rx_desc_cnt = be16_to_cpu(dev_op_modify_ring->min_rx_ring_size);
+ priv->min_tx_desc_cnt = be16_to_cpu(dev_op_modify_ring->min_tx_ring_size);
+ }
+ }
+
+ if (dev_op_flow_steering &&
+ (supported_features_mask & GVE_SUP_FLOW_STEERING_MASK)) {
+ if (dev_op_flow_steering->max_flow_rules) {
+ priv->max_flow_rules =
+ be32_to_cpu(dev_op_flow_steering->max_flow_rules);
+ priv->dev->hw_features |= NETIF_F_NTUPLE;
+ dev_info(&priv->pdev->dev,
+ "FLOW STEERING device option enabled with max rule limit of %u.\n",
+ priv->max_flow_rules);
+ }
+ }
+
+ if (dev_op_rss_config &&
+ (supported_features_mask & GVE_SUP_RSS_CONFIG_MASK)) {
+ priv->rss_key_size =
+ be16_to_cpu(dev_op_rss_config->hash_key_size);
+ priv->rss_lut_size =
+ be16_to_cpu(dev_op_rss_config->hash_lut_size);
+ priv->cache_rss_config = false;
+ dev_dbg(&priv->pdev->dev,
+ "RSS device option enabled with key size of %u, lut size of %u.\n",
+ priv->rss_key_size, priv->rss_lut_size);
+ }
}
int gve_adminq_describe_device(struct gve_priv *priv)
{
+ struct gve_device_option_flow_steering *dev_op_flow_steering = NULL;
struct gve_device_option_buffer_sizes *dev_op_buffer_sizes = NULL;
struct gve_device_option_jumbo_frames *dev_op_jumbo_frames = NULL;
+ struct gve_device_option_modify_ring *dev_op_modify_ring = NULL;
+ struct gve_device_option_rss_config *dev_op_rss_config = NULL;
struct gve_device_option_gqi_rda *dev_op_gqi_rda = NULL;
struct gve_device_option_gqi_qpl *dev_op_gqi_qpl = NULL;
struct gve_device_option_dqo_rda *dev_op_dqo_rda = NULL;
@@ -851,9 +1020,11 @@ int gve_adminq_describe_device(struct gve_priv *priv)
err = gve_process_device_options(priv, descriptor, &dev_op_gqi_rda,
&dev_op_gqi_qpl, &dev_op_dqo_rda,
- &dev_op_jumbo_frames,
- &dev_op_dqo_qpl,
- &dev_op_buffer_sizes);
+ &dev_op_jumbo_frames, &dev_op_dqo_qpl,
+ &dev_op_buffer_sizes,
+ &dev_op_flow_steering,
+ &dev_op_rss_config,
+ &dev_op_modify_ring);
if (err)
goto free_device_descriptor;
@@ -888,15 +1059,15 @@ int gve_adminq_describe_device(struct gve_priv *priv)
dev_info(&priv->pdev->dev,
"Driver is running with GQI QPL queue format.\n");
}
- if (gve_is_gqi(priv)) {
- err = gve_set_desc_cnt(priv, descriptor);
- } else {
- /* DQO supports LRO. */
+
+ /* set default descriptor counts */
+ gve_set_default_desc_cnt(priv, descriptor);
+
+ gve_set_default_rss_sizes(priv);
+
+ /* DQO supports LRO. */
+ if (!gve_is_gqi(priv))
priv->dev->hw_features |= NETIF_F_LRO;
- err = gve_set_desc_cnt_dqo(priv, descriptor, dev_op_dqo_rda);
- }
- if (err)
- goto free_device_descriptor;
priv->max_registered_pages =
be64_to_cpu(descriptor->max_registered_pages);
@@ -912,18 +1083,12 @@ int gve_adminq_describe_device(struct gve_priv *priv)
mac = descriptor->mac;
dev_info(&priv->pdev->dev, "MAC addr: %pM\n", mac);
priv->tx_pages_per_qpl = be16_to_cpu(descriptor->tx_pages_per_qpl);
- priv->rx_data_slot_cnt = be16_to_cpu(descriptor->rx_pages_per_qpl);
-
- if (gve_is_gqi(priv) && priv->rx_data_slot_cnt < priv->rx_desc_cnt) {
- dev_err(&priv->pdev->dev, "rx_data_slot_cnt cannot be smaller than rx_desc_cnt, setting rx_desc_cnt down to %d.\n",
- priv->rx_data_slot_cnt);
- priv->rx_desc_cnt = priv->rx_data_slot_cnt;
- }
priv->default_num_queues = be16_to_cpu(descriptor->default_num_queues);
gve_enable_supported_features(priv, supported_features_mask,
dev_op_jumbo_frames, dev_op_dqo_qpl,
- dev_op_buffer_sizes);
+ dev_op_buffer_sizes, dev_op_flow_steering,
+ dev_op_rss_config, dev_op_modify_ring);
free_device_descriptor:
dma_pool_free(priv->adminq_pool, descriptor, descriptor_bus);
@@ -976,20 +1141,6 @@ int gve_adminq_unregister_page_list(struct gve_priv *priv, u32 page_list_id)
return gve_adminq_execute_cmd(priv, &cmd);
}
-int gve_adminq_set_mtu(struct gve_priv *priv, u64 mtu)
-{
- union gve_adminq_command cmd;
-
- memset(&cmd, 0, sizeof(cmd));
- cmd.opcode = cpu_to_be32(GVE_ADMINQ_SET_DRIVER_PARAMETER);
- cmd.set_driver_param = (struct gve_adminq_set_driver_parameter) {
- .parameter_type = cpu_to_be32(GVE_SET_PARAM_MTU),
- .parameter_value = cpu_to_be64(mtu),
- };
-
- return gve_adminq_execute_cmd(priv, &cmd);
-}
-
int gve_adminq_report_stats(struct gve_priv *priv, u64 stats_report_len,
dma_addr_t stats_report_addr, u64 interval)
{
@@ -1086,3 +1237,293 @@ err:
ptype_map_bus);
return err;
}
+
+static int
+gve_adminq_configure_flow_rule(struct gve_priv *priv,
+ struct gve_adminq_configure_flow_rule *flow_rule_cmd)
+{
+ int err = gve_adminq_execute_extended_cmd(priv,
+ GVE_ADMINQ_CONFIGURE_FLOW_RULE,
+ sizeof(struct gve_adminq_configure_flow_rule),
+ flow_rule_cmd);
+
+ if (err == -ETIME) {
+ dev_err(&priv->pdev->dev, "Timeout to configure the flow rule, trigger reset");
+ gve_reset(priv, true);
+ } else if (!err) {
+ priv->flow_rules_cache.rules_cache_synced = false;
+ }
+
+ return err;
+}
+
+int gve_adminq_add_flow_rule(struct gve_priv *priv, struct gve_adminq_flow_rule *rule, u32 loc)
+{
+ struct gve_adminq_configure_flow_rule flow_rule_cmd = {
+ .opcode = cpu_to_be16(GVE_FLOW_RULE_CFG_ADD),
+ .location = cpu_to_be32(loc),
+ .rule = *rule,
+ };
+
+ return gve_adminq_configure_flow_rule(priv, &flow_rule_cmd);
+}
+
+int gve_adminq_del_flow_rule(struct gve_priv *priv, u32 loc)
+{
+ struct gve_adminq_configure_flow_rule flow_rule_cmd = {
+ .opcode = cpu_to_be16(GVE_FLOW_RULE_CFG_DEL),
+ .location = cpu_to_be32(loc),
+ };
+
+ return gve_adminq_configure_flow_rule(priv, &flow_rule_cmd);
+}
+
+int gve_adminq_reset_flow_rules(struct gve_priv *priv)
+{
+ struct gve_adminq_configure_flow_rule flow_rule_cmd = {
+ .opcode = cpu_to_be16(GVE_FLOW_RULE_CFG_RESET),
+ };
+
+ return gve_adminq_configure_flow_rule(priv, &flow_rule_cmd);
+}
+
+int gve_adminq_configure_rss(struct gve_priv *priv, struct ethtool_rxfh_param *rxfh)
+{
+ const u32 *hash_lut_to_config = NULL;
+ const u8 *hash_key_to_config = NULL;
+ dma_addr_t lut_bus = 0, key_bus = 0;
+ union gve_adminq_command cmd;
+ __be32 *lut = NULL;
+ u8 hash_alg = 0;
+ u8 *key = NULL;
+ int err = 0;
+ u16 i;
+
+ switch (rxfh->hfunc) {
+ case ETH_RSS_HASH_NO_CHANGE:
+ fallthrough;
+ case ETH_RSS_HASH_TOP:
+ hash_alg = ETH_RSS_HASH_TOP;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ if (rxfh->indir) {
+ if (rxfh->indir_size != priv->rss_lut_size)
+ return -EINVAL;
+
+ hash_lut_to_config = rxfh->indir;
+ } else if (priv->cache_rss_config) {
+ hash_lut_to_config = priv->rss_config.hash_lut;
+ }
+
+ if (hash_lut_to_config) {
+ lut = dma_alloc_coherent(&priv->pdev->dev,
+ priv->rss_lut_size * sizeof(*lut),
+ &lut_bus, GFP_KERNEL);
+ if (!lut)
+ return -ENOMEM;
+
+ for (i = 0; i < priv->rss_lut_size; i++)
+ lut[i] = cpu_to_be32(hash_lut_to_config[i]);
+ }
+
+ if (rxfh->key) {
+ if (rxfh->key_size != priv->rss_key_size) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ hash_key_to_config = rxfh->key;
+ } else if (priv->cache_rss_config) {
+ hash_key_to_config = priv->rss_config.hash_key;
+ }
+
+ if (hash_key_to_config) {
+ key = dma_alloc_coherent(&priv->pdev->dev,
+ priv->rss_key_size,
+ &key_bus, GFP_KERNEL);
+ if (!key) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ memcpy(key, hash_key_to_config, priv->rss_key_size);
+ }
+
+ /* Zero-valued fields in the cmd.configure_rss instruct the device to
+ * not update those fields.
+ */
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.opcode = cpu_to_be32(GVE_ADMINQ_CONFIGURE_RSS);
+ cmd.configure_rss = (struct gve_adminq_configure_rss) {
+ .hash_types = cpu_to_be16(BIT(GVE_RSS_HASH_TCPV4) |
+ BIT(GVE_RSS_HASH_UDPV4) |
+ BIT(GVE_RSS_HASH_TCPV6) |
+ BIT(GVE_RSS_HASH_UDPV6)),
+ .hash_alg = hash_alg,
+ .hash_key_size =
+ cpu_to_be16((key_bus) ? priv->rss_key_size : 0),
+ .hash_lut_size =
+ cpu_to_be16((lut_bus) ? priv->rss_lut_size : 0),
+ .hash_key_addr = cpu_to_be64(key_bus),
+ .hash_lut_addr = cpu_to_be64(lut_bus),
+ };
+
+ err = gve_adminq_execute_cmd(priv, &cmd);
+
+out:
+ if (lut)
+ dma_free_coherent(&priv->pdev->dev,
+ priv->rss_lut_size * sizeof(*lut),
+ lut, lut_bus);
+ if (key)
+ dma_free_coherent(&priv->pdev->dev,
+ priv->rss_key_size, key, key_bus);
+ return err;
+}
+
+/* In the dma memory that the driver allocated for the device to query the flow rules, the device
+ * will first write it with a struct of gve_query_flow_rules_descriptor. Next to it, the device
+ * will write an array of rules or rule ids with the count that specified in the descriptor.
+ * For GVE_FLOW_RULE_QUERY_STATS, the device will only write the descriptor.
+ */
+static int gve_adminq_process_flow_rules_query(struct gve_priv *priv, u16 query_opcode,
+ struct gve_query_flow_rules_descriptor *descriptor)
+{
+ struct gve_flow_rules_cache *flow_rules_cache = &priv->flow_rules_cache;
+ u32 num_queried_rules, total_memory_len, rule_info_len;
+ void *rule_info;
+
+ total_memory_len = be32_to_cpu(descriptor->total_length);
+ num_queried_rules = be32_to_cpu(descriptor->num_queried_rules);
+ rule_info = (void *)(descriptor + 1);
+
+ switch (query_opcode) {
+ case GVE_FLOW_RULE_QUERY_RULES:
+ rule_info_len = num_queried_rules * sizeof(*flow_rules_cache->rules_cache);
+ if (sizeof(*descriptor) + rule_info_len != total_memory_len) {
+ dev_err(&priv->dev->dev, "flow rules query is out of memory.\n");
+ return -ENOMEM;
+ }
+
+ memcpy(flow_rules_cache->rules_cache, rule_info, rule_info_len);
+ flow_rules_cache->rules_cache_num = num_queried_rules;
+ break;
+ case GVE_FLOW_RULE_QUERY_IDS:
+ rule_info_len = num_queried_rules * sizeof(*flow_rules_cache->rule_ids_cache);
+ if (sizeof(*descriptor) + rule_info_len != total_memory_len) {
+ dev_err(&priv->dev->dev, "flow rule ids query is out of memory.\n");
+ return -ENOMEM;
+ }
+
+ memcpy(flow_rules_cache->rule_ids_cache, rule_info, rule_info_len);
+ flow_rules_cache->rule_ids_cache_num = num_queried_rules;
+ break;
+ case GVE_FLOW_RULE_QUERY_STATS:
+ priv->num_flow_rules = be32_to_cpu(descriptor->num_flow_rules);
+ priv->max_flow_rules = be32_to_cpu(descriptor->max_flow_rules);
+ return 0;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int gve_adminq_query_flow_rules(struct gve_priv *priv, u16 query_opcode, u32 starting_loc)
+{
+ struct gve_query_flow_rules_descriptor *descriptor;
+ union gve_adminq_command cmd;
+ dma_addr_t descriptor_bus;
+ int err = 0;
+
+ memset(&cmd, 0, sizeof(cmd));
+ descriptor = dma_pool_alloc(priv->adminq_pool, GFP_KERNEL, &descriptor_bus);
+ if (!descriptor)
+ return -ENOMEM;
+
+ cmd.opcode = cpu_to_be32(GVE_ADMINQ_QUERY_FLOW_RULES);
+ cmd.query_flow_rules = (struct gve_adminq_query_flow_rules) {
+ .opcode = cpu_to_be16(query_opcode),
+ .starting_rule_id = cpu_to_be32(starting_loc),
+ .available_length = cpu_to_be64(GVE_ADMINQ_BUFFER_SIZE),
+ .rule_descriptor_addr = cpu_to_be64(descriptor_bus),
+ };
+ err = gve_adminq_execute_cmd(priv, &cmd);
+ if (err)
+ goto out;
+
+ err = gve_adminq_process_flow_rules_query(priv, query_opcode, descriptor);
+
+out:
+ dma_pool_free(priv->adminq_pool, descriptor, descriptor_bus);
+ return err;
+}
+
+static int gve_adminq_process_rss_query(struct gve_priv *priv,
+ struct gve_query_rss_descriptor *descriptor,
+ struct ethtool_rxfh_param *rxfh)
+{
+ u32 total_memory_length;
+ u16 hash_lut_length;
+ void *rss_info_addr;
+ __be32 *lut;
+ u16 i;
+
+ total_memory_length = be32_to_cpu(descriptor->total_length);
+ hash_lut_length = priv->rss_lut_size * sizeof(*rxfh->indir);
+
+ if (sizeof(*descriptor) + priv->rss_key_size + hash_lut_length != total_memory_length) {
+ dev_err(&priv->dev->dev,
+ "rss query desc from device has invalid length parameter.\n");
+ return -EINVAL;
+ }
+
+ rxfh->hfunc = descriptor->hash_alg;
+
+ rss_info_addr = (void *)(descriptor + 1);
+ if (rxfh->key) {
+ rxfh->key_size = priv->rss_key_size;
+ memcpy(rxfh->key, rss_info_addr, priv->rss_key_size);
+ }
+
+ rss_info_addr += priv->rss_key_size;
+ lut = (__be32 *)rss_info_addr;
+ if (rxfh->indir) {
+ rxfh->indir_size = priv->rss_lut_size;
+ for (i = 0; i < priv->rss_lut_size; i++)
+ rxfh->indir[i] = be32_to_cpu(lut[i]);
+ }
+
+ return 0;
+}
+
+int gve_adminq_query_rss_config(struct gve_priv *priv, struct ethtool_rxfh_param *rxfh)
+{
+ struct gve_query_rss_descriptor *descriptor;
+ union gve_adminq_command cmd;
+ dma_addr_t descriptor_bus;
+ int err = 0;
+
+ descriptor = dma_pool_alloc(priv->adminq_pool, GFP_KERNEL, &descriptor_bus);
+ if (!descriptor)
+ return -ENOMEM;
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.opcode = cpu_to_be32(GVE_ADMINQ_QUERY_RSS);
+ cmd.query_rss = (struct gve_adminq_query_rss) {
+ .available_length = cpu_to_be64(GVE_ADMINQ_BUFFER_SIZE),
+ .rss_descriptor_addr = cpu_to_be64(descriptor_bus),
+ };
+ err = gve_adminq_execute_cmd(priv, &cmd);
+ if (err)
+ goto out;
+
+ err = gve_adminq_process_rss_query(priv, descriptor, rxfh);
+
+out:
+ dma_pool_free(priv->adminq_pool, descriptor, descriptor_bus);
+ return err;
+}
diff --git a/drivers/net/ethernet/google/gve/gve_adminq.h b/drivers/net/ethernet/google/gve/gve_adminq.h
index 5ac972e45ff8..228217458275 100644
--- a/drivers/net/ethernet/google/gve/gve_adminq.h
+++ b/drivers/net/ethernet/google/gve/gve_adminq.h
@@ -20,11 +20,26 @@ enum gve_adminq_opcodes {
GVE_ADMINQ_DESTROY_TX_QUEUE = 0x7,
GVE_ADMINQ_DESTROY_RX_QUEUE = 0x8,
GVE_ADMINQ_DECONFIGURE_DEVICE_RESOURCES = 0x9,
+ GVE_ADMINQ_CONFIGURE_RSS = 0xA,
GVE_ADMINQ_SET_DRIVER_PARAMETER = 0xB,
GVE_ADMINQ_REPORT_STATS = 0xC,
GVE_ADMINQ_REPORT_LINK_SPEED = 0xD,
GVE_ADMINQ_GET_PTYPE_MAP = 0xE,
GVE_ADMINQ_VERIFY_DRIVER_COMPATIBILITY = 0xF,
+ GVE_ADMINQ_QUERY_FLOW_RULES = 0x10,
+ GVE_ADMINQ_QUERY_RSS = 0x12,
+
+ /* For commands that are larger than 56 bytes */
+ GVE_ADMINQ_EXTENDED_COMMAND = 0xFF,
+};
+
+/* The normal adminq command is restricted to be 56 bytes at maximum. For the
+ * longer adminq command, it is wrapped by GVE_ADMINQ_EXTENDED_COMMAND with
+ * inner opcode of gve_adminq_extended_cmd_opcodes specified. The inner command
+ * is written in the dma memory allocated by GVE_ADMINQ_EXTENDED_COMMAND.
+ */
+enum gve_adminq_extended_cmd_opcodes {
+ GVE_ADMINQ_CONFIGURE_FLOW_RULE = 0x101,
};
/* Admin queue status codes */
@@ -103,8 +118,7 @@ static_assert(sizeof(struct gve_device_option_gqi_qpl) == 4);
struct gve_device_option_dqo_rda {
__be32 supported_features_mask;
- __be16 tx_comp_ring_entries;
- __be16 rx_buff_ring_entries;
+ __be32 reserved;
};
static_assert(sizeof(struct gve_device_option_dqo_rda) == 8);
@@ -134,6 +148,32 @@ struct gve_device_option_buffer_sizes {
static_assert(sizeof(struct gve_device_option_buffer_sizes) == 8);
+struct gve_device_option_modify_ring {
+ __be32 supported_featured_mask;
+ __be16 max_rx_ring_size;
+ __be16 max_tx_ring_size;
+ __be16 min_rx_ring_size;
+ __be16 min_tx_ring_size;
+};
+
+static_assert(sizeof(struct gve_device_option_modify_ring) == 12);
+
+struct gve_device_option_flow_steering {
+ __be32 supported_features_mask;
+ __be32 reserved;
+ __be32 max_flow_rules;
+};
+
+static_assert(sizeof(struct gve_device_option_flow_steering) == 12);
+
+struct gve_device_option_rss_config {
+ __be32 supported_features_mask;
+ __be16 hash_key_size;
+ __be16 hash_lut_size;
+};
+
+static_assert(sizeof(struct gve_device_option_rss_config) == 8);
+
/* Terminology:
*
* RDA - Raw DMA Addressing - Buffers associated with SKBs are directly DMA
@@ -143,28 +183,37 @@ static_assert(sizeof(struct gve_device_option_buffer_sizes) == 8);
* the device for read/write and data is copied from/to SKBs.
*/
enum gve_dev_opt_id {
- GVE_DEV_OPT_ID_GQI_RAW_ADDRESSING = 0x1,
- GVE_DEV_OPT_ID_GQI_RDA = 0x2,
- GVE_DEV_OPT_ID_GQI_QPL = 0x3,
- GVE_DEV_OPT_ID_DQO_RDA = 0x4,
- GVE_DEV_OPT_ID_DQO_QPL = 0x7,
- GVE_DEV_OPT_ID_JUMBO_FRAMES = 0x8,
- GVE_DEV_OPT_ID_BUFFER_SIZES = 0xa,
+ GVE_DEV_OPT_ID_GQI_RAW_ADDRESSING = 0x1,
+ GVE_DEV_OPT_ID_GQI_RDA = 0x2,
+ GVE_DEV_OPT_ID_GQI_QPL = 0x3,
+ GVE_DEV_OPT_ID_DQO_RDA = 0x4,
+ GVE_DEV_OPT_ID_MODIFY_RING = 0x6,
+ GVE_DEV_OPT_ID_DQO_QPL = 0x7,
+ GVE_DEV_OPT_ID_JUMBO_FRAMES = 0x8,
+ GVE_DEV_OPT_ID_BUFFER_SIZES = 0xa,
+ GVE_DEV_OPT_ID_FLOW_STEERING = 0xb,
+ GVE_DEV_OPT_ID_RSS_CONFIG = 0xe,
};
enum gve_dev_opt_req_feat_mask {
- GVE_DEV_OPT_REQ_FEAT_MASK_GQI_RAW_ADDRESSING = 0x0,
- GVE_DEV_OPT_REQ_FEAT_MASK_GQI_RDA = 0x0,
- GVE_DEV_OPT_REQ_FEAT_MASK_GQI_QPL = 0x0,
- GVE_DEV_OPT_REQ_FEAT_MASK_DQO_RDA = 0x0,
- GVE_DEV_OPT_REQ_FEAT_MASK_JUMBO_FRAMES = 0x0,
- GVE_DEV_OPT_REQ_FEAT_MASK_DQO_QPL = 0x0,
- GVE_DEV_OPT_REQ_FEAT_MASK_BUFFER_SIZES = 0x0,
+ GVE_DEV_OPT_REQ_FEAT_MASK_GQI_RAW_ADDRESSING = 0x0,
+ GVE_DEV_OPT_REQ_FEAT_MASK_GQI_RDA = 0x0,
+ GVE_DEV_OPT_REQ_FEAT_MASK_GQI_QPL = 0x0,
+ GVE_DEV_OPT_REQ_FEAT_MASK_DQO_RDA = 0x0,
+ GVE_DEV_OPT_REQ_FEAT_MASK_JUMBO_FRAMES = 0x0,
+ GVE_DEV_OPT_REQ_FEAT_MASK_DQO_QPL = 0x0,
+ GVE_DEV_OPT_REQ_FEAT_MASK_BUFFER_SIZES = 0x0,
+ GVE_DEV_OPT_REQ_FEAT_MASK_MODIFY_RING = 0x0,
+ GVE_DEV_OPT_REQ_FEAT_MASK_FLOW_STEERING = 0x0,
+ GVE_DEV_OPT_REQ_FEAT_MASK_RSS_CONFIG = 0x0,
};
enum gve_sup_feature_mask {
- GVE_SUP_JUMBO_FRAMES_MASK = 1 << 2,
- GVE_SUP_BUFFER_SIZES_MASK = 1 << 4,
+ GVE_SUP_MODIFY_RING_MASK = 1 << 0,
+ GVE_SUP_JUMBO_FRAMES_MASK = 1 << 2,
+ GVE_SUP_BUFFER_SIZES_MASK = 1 << 4,
+ GVE_SUP_FLOW_STEERING_MASK = 1 << 5,
+ GVE_SUP_RSS_CONFIG_MASK = 1 << 7,
};
#define GVE_DEV_OPT_LEN_GQI_RAW_ADDRESSING 0x0
@@ -178,6 +227,7 @@ enum gve_driver_capbility {
gve_driver_capability_dqo_rda = 3,
gve_driver_capability_alt_miss_compl = 4,
gve_driver_capability_flexible_buffer_size = 5,
+ gve_driver_capability_flexible_rss_size = 6,
};
#define GVE_CAP1(a) BIT((int)a)
@@ -190,12 +240,21 @@ enum gve_driver_capbility {
GVE_CAP1(gve_driver_capability_gqi_rda) | \
GVE_CAP1(gve_driver_capability_dqo_rda) | \
GVE_CAP1(gve_driver_capability_alt_miss_compl) | \
- GVE_CAP1(gve_driver_capability_flexible_buffer_size))
+ GVE_CAP1(gve_driver_capability_flexible_buffer_size) | \
+ GVE_CAP1(gve_driver_capability_flexible_rss_size))
#define GVE_DRIVER_CAPABILITY_FLAGS2 0x0
#define GVE_DRIVER_CAPABILITY_FLAGS3 0x0
#define GVE_DRIVER_CAPABILITY_FLAGS4 0x0
+struct gve_adminq_extended_command {
+ __be32 inner_opcode;
+ __be32 inner_length;
+ __be64 inner_command_addr;
+};
+
+static_assert(sizeof(struct gve_adminq_extended_command) == 16);
+
struct gve_driver_info {
u8 os_type; /* 0x01 = Linux */
u8 driver_major;
@@ -400,6 +459,109 @@ struct gve_adminq_get_ptype_map {
__be64 ptype_map_addr;
};
+/* Flow-steering related definitions */
+enum gve_adminq_flow_rule_cfg_opcode {
+ GVE_FLOW_RULE_CFG_ADD = 0,
+ GVE_FLOW_RULE_CFG_DEL = 1,
+ GVE_FLOW_RULE_CFG_RESET = 2,
+};
+
+enum gve_adminq_flow_rule_query_opcode {
+ GVE_FLOW_RULE_QUERY_RULES = 0,
+ GVE_FLOW_RULE_QUERY_IDS = 1,
+ GVE_FLOW_RULE_QUERY_STATS = 2,
+};
+
+enum gve_adminq_flow_type {
+ GVE_FLOW_TYPE_TCPV4,
+ GVE_FLOW_TYPE_UDPV4,
+ GVE_FLOW_TYPE_SCTPV4,
+ GVE_FLOW_TYPE_AHV4,
+ GVE_FLOW_TYPE_ESPV4,
+ GVE_FLOW_TYPE_TCPV6,
+ GVE_FLOW_TYPE_UDPV6,
+ GVE_FLOW_TYPE_SCTPV6,
+ GVE_FLOW_TYPE_AHV6,
+ GVE_FLOW_TYPE_ESPV6,
+};
+
+/* Flow-steering command */
+struct gve_adminq_flow_rule {
+ __be16 flow_type;
+ __be16 action; /* RX queue id */
+ struct gve_flow_spec key;
+ struct gve_flow_spec mask;
+};
+
+struct gve_adminq_configure_flow_rule {
+ __be16 opcode;
+ u8 padding[2];
+ struct gve_adminq_flow_rule rule;
+ __be32 location;
+};
+
+static_assert(sizeof(struct gve_adminq_configure_flow_rule) == 92);
+
+struct gve_query_flow_rules_descriptor {
+ __be32 num_flow_rules;
+ __be32 max_flow_rules;
+ __be32 num_queried_rules;
+ __be32 total_length;
+};
+
+struct gve_adminq_queried_flow_rule {
+ __be32 location;
+ struct gve_adminq_flow_rule flow_rule;
+};
+
+struct gve_adminq_query_flow_rules {
+ __be16 opcode;
+ u8 padding[2];
+ __be32 starting_rule_id;
+ __be64 available_length; /* The dma memory length that the driver allocated */
+ __be64 rule_descriptor_addr; /* The dma memory address */
+};
+
+static_assert(sizeof(struct gve_adminq_query_flow_rules) == 24);
+
+enum gve_rss_hash_type {
+ GVE_RSS_HASH_IPV4,
+ GVE_RSS_HASH_TCPV4,
+ GVE_RSS_HASH_IPV6,
+ GVE_RSS_HASH_IPV6_EX,
+ GVE_RSS_HASH_TCPV6,
+ GVE_RSS_HASH_TCPV6_EX,
+ GVE_RSS_HASH_UDPV4,
+ GVE_RSS_HASH_UDPV6,
+ GVE_RSS_HASH_UDPV6_EX,
+};
+
+struct gve_adminq_configure_rss {
+ __be16 hash_types;
+ u8 hash_alg;
+ u8 reserved;
+ __be16 hash_key_size;
+ __be16 hash_lut_size;
+ __be64 hash_key_addr;
+ __be64 hash_lut_addr;
+};
+
+static_assert(sizeof(struct gve_adminq_configure_rss) == 24);
+
+struct gve_query_rss_descriptor {
+ __be32 total_length;
+ __be16 hash_types;
+ u8 hash_alg;
+ u8 reserved;
+};
+
+struct gve_adminq_query_rss {
+ __be64 available_length;
+ __be64 rss_descriptor_addr;
+};
+
+static_assert(sizeof(struct gve_adminq_query_rss) == 16);
+
union gve_adminq_command {
struct {
__be32 opcode;
@@ -420,6 +582,10 @@ union gve_adminq_command {
struct gve_adminq_get_ptype_map get_ptype_map;
struct gve_adminq_verify_driver_compatibility
verify_driver_compatibility;
+ struct gve_adminq_query_flow_rules query_flow_rules;
+ struct gve_adminq_configure_rss configure_rss;
+ struct gve_adminq_query_rss query_rss;
+ struct gve_adminq_extended_command extended_command;
};
};
u8 reserved[64];
@@ -439,18 +605,25 @@ int gve_adminq_configure_device_resources(struct gve_priv *priv,
int gve_adminq_deconfigure_device_resources(struct gve_priv *priv);
int gve_adminq_create_tx_queues(struct gve_priv *priv, u32 start_id, u32 num_queues);
int gve_adminq_destroy_tx_queues(struct gve_priv *priv, u32 start_id, u32 num_queues);
+int gve_adminq_create_single_rx_queue(struct gve_priv *priv, u32 queue_index);
int gve_adminq_create_rx_queues(struct gve_priv *priv, u32 num_queues);
+int gve_adminq_destroy_single_rx_queue(struct gve_priv *priv, u32 queue_index);
int gve_adminq_destroy_rx_queues(struct gve_priv *priv, u32 queue_id);
int gve_adminq_register_page_list(struct gve_priv *priv,
struct gve_queue_page_list *qpl);
int gve_adminq_unregister_page_list(struct gve_priv *priv, u32 page_list_id);
-int gve_adminq_set_mtu(struct gve_priv *priv, u64 mtu);
int gve_adminq_report_stats(struct gve_priv *priv, u64 stats_report_len,
dma_addr_t stats_report_addr, u64 interval);
int gve_adminq_verify_driver_compatibility(struct gve_priv *priv,
u64 driver_info_len,
dma_addr_t driver_info_addr);
int gve_adminq_report_link_speed(struct gve_priv *priv);
+int gve_adminq_add_flow_rule(struct gve_priv *priv, struct gve_adminq_flow_rule *rule, u32 loc);
+int gve_adminq_del_flow_rule(struct gve_priv *priv, u32 loc);
+int gve_adminq_reset_flow_rules(struct gve_priv *priv);
+int gve_adminq_query_flow_rules(struct gve_priv *priv, u16 query_opcode, u32 starting_loc);
+int gve_adminq_configure_rss(struct gve_priv *priv, struct ethtool_rxfh_param *rxfh);
+int gve_adminq_query_rss_config(struct gve_priv *priv, struct ethtool_rxfh_param *rxfh);
struct gve_ptype_lut;
int gve_adminq_get_ptype_map_dqo(struct gve_priv *priv,
diff --git a/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c b/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c
new file mode 100644
index 000000000000..a71883e1d920
--- /dev/null
+++ b/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c
@@ -0,0 +1,316 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Google virtual Ethernet (gve) driver
+ *
+ * Copyright (C) 2015-2024 Google, Inc.
+ */
+
+#include "gve.h"
+#include "gve_utils.h"
+
+int gve_buf_ref_cnt(struct gve_rx_buf_state_dqo *bs)
+{
+ return page_count(bs->page_info.page) - bs->page_info.pagecnt_bias;
+}
+
+struct gve_rx_buf_state_dqo *gve_alloc_buf_state(struct gve_rx_ring *rx)
+{
+ struct gve_rx_buf_state_dqo *buf_state;
+ s16 buffer_id;
+
+ buffer_id = rx->dqo.free_buf_states;
+ if (unlikely(buffer_id == -1))
+ return NULL;
+
+ buf_state = &rx->dqo.buf_states[buffer_id];
+
+ /* Remove buf_state from free list */
+ rx->dqo.free_buf_states = buf_state->next;
+
+ /* Point buf_state to itself to mark it as allocated */
+ buf_state->next = buffer_id;
+
+ return buf_state;
+}
+
+bool gve_buf_state_is_allocated(struct gve_rx_ring *rx,
+ struct gve_rx_buf_state_dqo *buf_state)
+{
+ s16 buffer_id = buf_state - rx->dqo.buf_states;
+
+ return buf_state->next == buffer_id;
+}
+
+void gve_free_buf_state(struct gve_rx_ring *rx,
+ struct gve_rx_buf_state_dqo *buf_state)
+{
+ s16 buffer_id = buf_state - rx->dqo.buf_states;
+
+ buf_state->next = rx->dqo.free_buf_states;
+ rx->dqo.free_buf_states = buffer_id;
+}
+
+struct gve_rx_buf_state_dqo *gve_dequeue_buf_state(struct gve_rx_ring *rx,
+ struct gve_index_list *list)
+{
+ struct gve_rx_buf_state_dqo *buf_state;
+ s16 buffer_id;
+
+ buffer_id = list->head;
+ if (unlikely(buffer_id == -1))
+ return NULL;
+
+ buf_state = &rx->dqo.buf_states[buffer_id];
+
+ /* Remove buf_state from list */
+ list->head = buf_state->next;
+ if (buf_state->next == -1)
+ list->tail = -1;
+
+ /* Point buf_state to itself to mark it as allocated */
+ buf_state->next = buffer_id;
+
+ return buf_state;
+}
+
+void gve_enqueue_buf_state(struct gve_rx_ring *rx, struct gve_index_list *list,
+ struct gve_rx_buf_state_dqo *buf_state)
+{
+ s16 buffer_id = buf_state - rx->dqo.buf_states;
+
+ buf_state->next = -1;
+
+ if (list->head == -1) {
+ list->head = buffer_id;
+ list->tail = buffer_id;
+ } else {
+ int tail = list->tail;
+
+ rx->dqo.buf_states[tail].next = buffer_id;
+ list->tail = buffer_id;
+ }
+}
+
+struct gve_rx_buf_state_dqo *gve_get_recycled_buf_state(struct gve_rx_ring *rx)
+{
+ struct gve_rx_buf_state_dqo *buf_state;
+ int i;
+
+ /* Recycled buf states are immediately usable. */
+ buf_state = gve_dequeue_buf_state(rx, &rx->dqo.recycled_buf_states);
+ if (likely(buf_state))
+ return buf_state;
+
+ if (unlikely(rx->dqo.used_buf_states.head == -1))
+ return NULL;
+
+ /* Used buf states are only usable when ref count reaches 0, which means
+ * no SKBs refer to them.
+ *
+ * Search a limited number before giving up.
+ */
+ for (i = 0; i < 5; i++) {
+ buf_state = gve_dequeue_buf_state(rx, &rx->dqo.used_buf_states);
+ if (gve_buf_ref_cnt(buf_state) == 0) {
+ rx->dqo.used_buf_states_cnt--;
+ return buf_state;
+ }
+
+ gve_enqueue_buf_state(rx, &rx->dqo.used_buf_states, buf_state);
+ }
+
+ return NULL;
+}
+
+int gve_alloc_qpl_page_dqo(struct gve_rx_ring *rx,
+ struct gve_rx_buf_state_dqo *buf_state)
+{
+ struct gve_priv *priv = rx->gve;
+ u32 idx;
+
+ idx = rx->dqo.next_qpl_page_idx;
+ if (idx >= gve_get_rx_pages_per_qpl_dqo(priv->rx_desc_cnt)) {
+ net_err_ratelimited("%s: Out of QPL pages\n",
+ priv->dev->name);
+ return -ENOMEM;
+ }
+ buf_state->page_info.page = rx->dqo.qpl->pages[idx];
+ buf_state->addr = rx->dqo.qpl->page_buses[idx];
+ rx->dqo.next_qpl_page_idx++;
+ buf_state->page_info.page_offset = 0;
+ buf_state->page_info.page_address =
+ page_address(buf_state->page_info.page);
+ buf_state->page_info.buf_size = rx->packet_buffer_truesize;
+ buf_state->page_info.pad = rx->rx_headroom;
+ buf_state->last_single_ref_offset = 0;
+
+ /* The page already has 1 ref. */
+ page_ref_add(buf_state->page_info.page, INT_MAX - 1);
+ buf_state->page_info.pagecnt_bias = INT_MAX;
+
+ return 0;
+}
+
+void gve_free_qpl_page_dqo(struct gve_rx_buf_state_dqo *buf_state)
+{
+ if (!buf_state->page_info.page)
+ return;
+
+ page_ref_sub(buf_state->page_info.page,
+ buf_state->page_info.pagecnt_bias - 1);
+ buf_state->page_info.page = NULL;
+}
+
+void gve_try_recycle_buf(struct gve_priv *priv, struct gve_rx_ring *rx,
+ struct gve_rx_buf_state_dqo *buf_state)
+{
+ const u16 data_buffer_size = rx->packet_buffer_truesize;
+ int pagecount;
+
+ /* Can't reuse if we only fit one buffer per page */
+ if (data_buffer_size * 2 > PAGE_SIZE)
+ goto mark_used;
+
+ pagecount = gve_buf_ref_cnt(buf_state);
+
+ /* Record the offset when we have a single remaining reference.
+ *
+ * When this happens, we know all of the other offsets of the page are
+ * usable.
+ */
+ if (pagecount == 1) {
+ buf_state->last_single_ref_offset =
+ buf_state->page_info.page_offset;
+ }
+
+ /* Use the next buffer sized chunk in the page. */
+ buf_state->page_info.page_offset += data_buffer_size;
+ buf_state->page_info.page_offset &= (PAGE_SIZE - 1);
+
+ /* If we wrap around to the same offset without ever dropping to 1
+ * reference, then we don't know if this offset was ever freed.
+ */
+ if (buf_state->page_info.page_offset ==
+ buf_state->last_single_ref_offset) {
+ goto mark_used;
+ }
+
+ gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state);
+ return;
+
+mark_used:
+ gve_enqueue_buf_state(rx, &rx->dqo.used_buf_states, buf_state);
+ rx->dqo.used_buf_states_cnt++;
+}
+
+void gve_free_to_page_pool(struct gve_rx_ring *rx,
+ struct gve_rx_buf_state_dqo *buf_state,
+ bool allow_direct)
+{
+ netmem_ref netmem = buf_state->page_info.netmem;
+
+ if (!netmem)
+ return;
+
+ page_pool_put_full_netmem(netmem_get_pp(netmem), netmem, allow_direct);
+ buf_state->page_info.netmem = 0;
+}
+
+static int gve_alloc_from_page_pool(struct gve_rx_ring *rx,
+ struct gve_rx_buf_state_dqo *buf_state)
+{
+ netmem_ref netmem;
+
+ buf_state->page_info.buf_size = rx->packet_buffer_truesize;
+ netmem = page_pool_alloc_netmem(rx->dqo.page_pool,
+ &buf_state->page_info.page_offset,
+ &buf_state->page_info.buf_size,
+ GFP_ATOMIC);
+
+ if (!netmem)
+ return -ENOMEM;
+
+ buf_state->page_info.netmem = netmem;
+ buf_state->page_info.page_address = netmem_address(netmem);
+ buf_state->addr = page_pool_get_dma_addr_netmem(netmem);
+ buf_state->page_info.pad = rx->dqo.page_pool->p.offset;
+
+ return 0;
+}
+
+struct page_pool *gve_rx_create_page_pool(struct gve_priv *priv,
+ struct gve_rx_ring *rx,
+ bool xdp)
+{
+ u32 ntfy_id = gve_rx_idx_to_ntfy(priv, rx->q_num);
+ struct page_pool_params pp = {
+ .flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV,
+ .order = 0,
+ .pool_size = GVE_PAGE_POOL_SIZE_MULTIPLIER * priv->rx_desc_cnt,
+ .dev = &priv->pdev->dev,
+ .netdev = priv->dev,
+ .napi = &priv->ntfy_blocks[ntfy_id].napi,
+ .max_len = PAGE_SIZE,
+ .dma_dir = xdp ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE,
+ .offset = xdp ? XDP_PACKET_HEADROOM : 0,
+ };
+
+ return page_pool_create(&pp);
+}
+
+void gve_free_buffer(struct gve_rx_ring *rx,
+ struct gve_rx_buf_state_dqo *buf_state)
+{
+ if (rx->dqo.page_pool) {
+ gve_free_to_page_pool(rx, buf_state, true);
+ gve_free_buf_state(rx, buf_state);
+ } else {
+ gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states,
+ buf_state);
+ }
+}
+
+void gve_reuse_buffer(struct gve_rx_ring *rx,
+ struct gve_rx_buf_state_dqo *buf_state)
+{
+ if (rx->dqo.page_pool) {
+ buf_state->page_info.netmem = 0;
+ gve_free_buf_state(rx, buf_state);
+ } else {
+ gve_dec_pagecnt_bias(&buf_state->page_info);
+ gve_try_recycle_buf(rx->gve, rx, buf_state);
+ }
+}
+
+int gve_alloc_buffer(struct gve_rx_ring *rx, struct gve_rx_desc_dqo *desc)
+{
+ struct gve_rx_buf_state_dqo *buf_state;
+
+ if (rx->dqo.page_pool) {
+ buf_state = gve_alloc_buf_state(rx);
+ if (WARN_ON_ONCE(!buf_state))
+ return -ENOMEM;
+
+ if (gve_alloc_from_page_pool(rx, buf_state))
+ goto free_buf_state;
+ } else {
+ buf_state = gve_get_recycled_buf_state(rx);
+ if (unlikely(!buf_state)) {
+ buf_state = gve_alloc_buf_state(rx);
+ if (unlikely(!buf_state))
+ return -ENOMEM;
+
+ if (unlikely(gve_alloc_qpl_page_dqo(rx, buf_state)))
+ goto free_buf_state;
+ }
+ }
+ desc->buf_id = cpu_to_le16(buf_state - rx->dqo.buf_states);
+ desc->buf_addr = cpu_to_le64(buf_state->addr +
+ buf_state->page_info.page_offset +
+ buf_state->page_info.pad);
+
+ return 0;
+
+free_buf_state:
+ gve_free_buf_state(rx, buf_state);
+ return -ENOMEM;
+}
diff --git a/drivers/net/ethernet/google/gve/gve_dqo.h b/drivers/net/ethernet/google/gve/gve_dqo.h
index b81584829c40..e83773fb891f 100644
--- a/drivers/net/ethernet/google/gve/gve_dqo.h
+++ b/drivers/net/ethernet/google/gve/gve_dqo.h
@@ -44,6 +44,12 @@ void gve_tx_free_rings_dqo(struct gve_priv *priv,
struct gve_tx_alloc_rings_cfg *cfg);
void gve_tx_start_ring_dqo(struct gve_priv *priv, int idx);
void gve_tx_stop_ring_dqo(struct gve_priv *priv, int idx);
+int gve_rx_alloc_ring_dqo(struct gve_priv *priv,
+ struct gve_rx_alloc_rings_cfg *cfg,
+ struct gve_rx_ring *rx,
+ int idx);
+void gve_rx_free_ring_dqo(struct gve_priv *priv, struct gve_rx_ring *rx,
+ struct gve_rx_alloc_rings_cfg *cfg);
int gve_rx_alloc_rings_dqo(struct gve_priv *priv,
struct gve_rx_alloc_rings_cfg *cfg);
void gve_rx_free_rings_dqo(struct gve_priv *priv,
diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c
index 9aebfb843d9d..3c1da0cf3f61 100644
--- a/drivers/net/ethernet/google/gve/gve_ethtool.c
+++ b/drivers/net/ethernet/google/gve/gve_ethtool.c
@@ -1,13 +1,14 @@
// SPDX-License-Identifier: (GPL-2.0 OR MIT)
/* Google virtual Ethernet (gve) driver
*
- * Copyright (C) 2015-2021 Google, Inc.
+ * Copyright (C) 2015-2024 Google LLC
*/
#include <linux/rtnetlink.h>
#include "gve.h"
#include "gve_adminq.h"
#include "gve_dqo.h"
+#include "gve_utils.h"
static void gve_get_drvinfo(struct net_device *netdev,
struct ethtool_drvinfo *info)
@@ -62,18 +63,20 @@ static const char gve_gstrings_rx_stats[][ETH_GSTRING_LEN] = {
static const char gve_gstrings_tx_stats[][ETH_GSTRING_LEN] = {
"tx_posted_desc[%u]", "tx_completed_desc[%u]", "tx_consumed_desc[%u]", "tx_bytes[%u]",
"tx_wake[%u]", "tx_stop[%u]", "tx_event_counter[%u]",
- "tx_dma_mapping_error[%u]", "tx_xsk_wakeup[%u]",
- "tx_xsk_done[%u]", "tx_xsk_sent[%u]", "tx_xdp_xmit[%u]", "tx_xdp_xmit_errors[%u]"
+ "tx_dma_mapping_error[%u]",
+ "tx_xsk_sent[%u]", "tx_xdp_xmit[%u]", "tx_xdp_xmit_errors[%u]"
};
-static const char gve_gstrings_adminq_stats[][ETH_GSTRING_LEN] = {
+static const char gve_gstrings_adminq_stats[][ETH_GSTRING_LEN] __nonstring_array = {
"adminq_prod_cnt", "adminq_cmd_fail", "adminq_timeouts",
"adminq_describe_device_cnt", "adminq_cfg_device_resources_cnt",
"adminq_register_page_list_cnt", "adminq_unregister_page_list_cnt",
"adminq_create_tx_queue_cnt", "adminq_create_rx_queue_cnt",
"adminq_destroy_tx_queue_cnt", "adminq_destroy_rx_queue_cnt",
"adminq_dcfg_device_resources_cnt", "adminq_set_driver_parameter_cnt",
- "adminq_report_stats_cnt", "adminq_report_link_speed_cnt"
+ "adminq_report_stats_cnt", "adminq_report_link_speed_cnt", "adminq_get_ptype_map_cnt",
+ "adminq_query_flow_rules", "adminq_cfg_flow_rule", "adminq_cfg_rss_cnt",
+ "adminq_query_rss_cnt",
};
static const char gve_gstrings_priv_flags[][ETH_GSTRING_LEN] = {
@@ -89,42 +92,34 @@ static const char gve_gstrings_priv_flags[][ETH_GSTRING_LEN] = {
static void gve_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
{
struct gve_priv *priv = netdev_priv(netdev);
- char *s = (char *)data;
+ u8 *s = (char *)data;
int num_tx_queues;
int i, j;
num_tx_queues = gve_num_tx_queues(priv);
switch (stringset) {
case ETH_SS_STATS:
- memcpy(s, *gve_gstrings_main_stats,
- sizeof(gve_gstrings_main_stats));
- s += sizeof(gve_gstrings_main_stats);
-
- for (i = 0; i < priv->rx_cfg.num_queues; i++) {
- for (j = 0; j < NUM_GVE_RX_CNTS; j++) {
- snprintf(s, ETH_GSTRING_LEN,
- gve_gstrings_rx_stats[j], i);
- s += ETH_GSTRING_LEN;
- }
- }
+ for (i = 0; i < ARRAY_SIZE(gve_gstrings_main_stats); i++)
+ ethtool_puts(&s, gve_gstrings_main_stats[i]);
- for (i = 0; i < num_tx_queues; i++) {
- for (j = 0; j < NUM_GVE_TX_CNTS; j++) {
- snprintf(s, ETH_GSTRING_LEN,
- gve_gstrings_tx_stats[j], i);
- s += ETH_GSTRING_LEN;
- }
- }
+ for (i = 0; i < priv->rx_cfg.num_queues; i++)
+ for (j = 0; j < NUM_GVE_RX_CNTS; j++)
+ ethtool_sprintf(&s, gve_gstrings_rx_stats[j],
+ i);
+
+ for (i = 0; i < num_tx_queues; i++)
+ for (j = 0; j < NUM_GVE_TX_CNTS; j++)
+ ethtool_sprintf(&s, gve_gstrings_tx_stats[j],
+ i);
+
+ for (i = 0; i < ARRAY_SIZE(gve_gstrings_adminq_stats); i++)
+ ethtool_cpy(&s, gve_gstrings_adminq_stats[i]);
- memcpy(s, *gve_gstrings_adminq_stats,
- sizeof(gve_gstrings_adminq_stats));
- s += sizeof(gve_gstrings_adminq_stats);
break;
case ETH_SS_PRIV_FLAGS:
- memcpy(s, *gve_gstrings_priv_flags,
- sizeof(gve_gstrings_priv_flags));
- s += sizeof(gve_gstrings_priv_flags);
+ for (i = 0; i < ARRAY_SIZE(gve_gstrings_priv_flags); i++)
+ ethtool_puts(&s, gve_gstrings_priv_flags[i]);
break;
default:
@@ -165,6 +160,8 @@ gve_get_ethtool_stats(struct net_device *netdev,
struct stats *report_stats;
int *rx_qid_to_stats_idx;
int *tx_qid_to_stats_idx;
+ int num_stopped_rxqs = 0;
+ int num_stopped_txqs = 0;
struct gve_priv *priv;
bool skip_nic_stats;
unsigned int start;
@@ -181,12 +178,23 @@ gve_get_ethtool_stats(struct net_device *netdev,
sizeof(int), GFP_KERNEL);
if (!rx_qid_to_stats_idx)
return;
+ for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) {
+ rx_qid_to_stats_idx[ring] = -1;
+ if (!gve_rx_was_added_to_block(priv, ring))
+ num_stopped_rxqs++;
+ }
tx_qid_to_stats_idx = kmalloc_array(num_tx_queues,
sizeof(int), GFP_KERNEL);
if (!tx_qid_to_stats_idx) {
kfree(rx_qid_to_stats_idx);
return;
}
+ for (ring = 0; ring < num_tx_queues; ring++) {
+ tx_qid_to_stats_idx[ring] = -1;
+ if (!gve_tx_was_added_to_block(priv, ring))
+ num_stopped_txqs++;
+ }
+
for (rx_pkts = 0, rx_bytes = 0, rx_hsplit_pkt = 0,
rx_skb_alloc_fail = 0, rx_buf_alloc_fail = 0,
rx_desc_err_dropped_pkt = 0, rx_hsplit_unsplit_pkt = 0,
@@ -260,7 +268,13 @@ gve_get_ethtool_stats(struct net_device *netdev,
/* For rx cross-reporting stats, start from nic rx stats in report */
base_stats_idx = GVE_TX_STATS_REPORT_NUM * num_tx_queues +
GVE_RX_STATS_REPORT_NUM * priv->rx_cfg.num_queues;
- max_stats_idx = NIC_RX_STATS_REPORT_NUM * priv->rx_cfg.num_queues +
+ /* The boundary between driver stats and NIC stats shifts if there are
+ * stopped queues.
+ */
+ base_stats_idx += NIC_RX_STATS_REPORT_NUM * num_stopped_rxqs +
+ NIC_TX_STATS_REPORT_NUM * num_stopped_txqs;
+ max_stats_idx = NIC_RX_STATS_REPORT_NUM *
+ (priv->rx_cfg.num_queues - num_stopped_rxqs) +
base_stats_idx;
/* Preprocess the stats report for rx, map queue id to start index */
skip_nic_stats = false;
@@ -274,6 +288,10 @@ gve_get_ethtool_stats(struct net_device *netdev,
skip_nic_stats = true;
break;
}
+ if (queue_id < 0 || queue_id >= priv->rx_cfg.num_queues) {
+ net_err_ratelimited("Invalid rxq id in NIC stats\n");
+ continue;
+ }
rx_qid_to_stats_idx[queue_id] = stats_idx;
}
/* walk RX rings */
@@ -308,11 +326,11 @@ gve_get_ethtool_stats(struct net_device *netdev,
data[i++] = rx->rx_copybreak_pkt;
data[i++] = rx->rx_copied_pkt;
/* stats from NIC */
- if (skip_nic_stats) {
+ stats_idx = rx_qid_to_stats_idx[ring];
+ if (skip_nic_stats || stats_idx < 0) {
/* skip NIC rx stats */
i += NIC_RX_STATS_REPORT_NUM;
} else {
- stats_idx = rx_qid_to_stats_idx[ring];
for (j = 0; j < NIC_RX_STATS_REPORT_NUM; j++) {
u64 value =
be64_to_cpu(report_stats[stats_idx + j].value);
@@ -338,7 +356,8 @@ gve_get_ethtool_stats(struct net_device *netdev,
/* For tx cross-reporting stats, start from nic tx stats in report */
base_stats_idx = max_stats_idx;
- max_stats_idx = NIC_TX_STATS_REPORT_NUM * num_tx_queues +
+ max_stats_idx = NIC_TX_STATS_REPORT_NUM *
+ (num_tx_queues - num_stopped_txqs) +
max_stats_idx;
/* Preprocess the stats report for tx, map queue id to start index */
skip_nic_stats = false;
@@ -352,6 +371,10 @@ gve_get_ethtool_stats(struct net_device *netdev,
skip_nic_stats = true;
break;
}
+ if (queue_id < 0 || queue_id >= num_tx_queues) {
+ net_err_ratelimited("Invalid txq id in NIC stats\n");
+ continue;
+ }
tx_qid_to_stats_idx[queue_id] = stats_idx;
}
/* walk TX rings */
@@ -369,7 +392,9 @@ gve_get_ethtool_stats(struct net_device *netdev,
*/
data[i++] = 0;
data[i++] = 0;
- data[i++] = tx->dqo_tx.tail - tx->dqo_tx.head;
+ data[i++] =
+ (tx->dqo_tx.tail - tx->dqo_tx.head) &
+ tx->mask;
}
do {
start =
@@ -383,20 +408,18 @@ gve_get_ethtool_stats(struct net_device *netdev,
data[i++] = gve_tx_load_event_counter(priv, tx);
data[i++] = tx->dma_mapping_error;
/* stats from NIC */
- if (skip_nic_stats) {
+ stats_idx = tx_qid_to_stats_idx[ring];
+ if (skip_nic_stats || stats_idx < 0) {
/* skip NIC tx stats */
i += NIC_TX_STATS_REPORT_NUM;
} else {
- stats_idx = tx_qid_to_stats_idx[ring];
for (j = 0; j < NIC_TX_STATS_REPORT_NUM; j++) {
u64 value =
be64_to_cpu(report_stats[stats_idx + j].value);
data[i++] = value;
}
}
- /* XDP xsk counters */
- data[i++] = tx->xdp_xsk_wakeup;
- data[i++] = tx->xdp_xsk_done;
+ /* XDP counters */
do {
start = u64_stats_fetch_begin(&priv->tx[ring].statss);
data[i] = tx->xdp_xsk_sent;
@@ -428,6 +451,11 @@ gve_get_ethtool_stats(struct net_device *netdev,
data[i++] = priv->adminq_set_driver_parameter_cnt;
data[i++] = priv->adminq_report_stats_cnt;
data[i++] = priv->adminq_report_link_speed_cnt;
+ data[i++] = priv->adminq_get_ptype_map_cnt;
+ data[i++] = priv->adminq_query_flow_rules_cnt;
+ data[i++] = priv->adminq_cfg_flow_rule_cnt;
+ data[i++] = priv->adminq_cfg_rss_cnt;
+ data[i++] = priv->adminq_query_rss_cnt;
}
static void gve_get_channels(struct net_device *netdev,
@@ -449,11 +477,12 @@ static int gve_set_channels(struct net_device *netdev,
struct ethtool_channels *cmd)
{
struct gve_priv *priv = netdev_priv(netdev);
- struct gve_queue_config new_tx_cfg = priv->tx_cfg;
- struct gve_queue_config new_rx_cfg = priv->rx_cfg;
+ struct gve_tx_queue_config new_tx_cfg = priv->tx_cfg;
+ struct gve_rx_queue_config new_rx_cfg = priv->rx_cfg;
struct ethtool_channels old_settings;
int new_tx = cmd->tx_count;
int new_rx = cmd->rx_count;
+ bool reset_rss = false;
gve_get_channels(netdev, &old_settings);
@@ -464,22 +493,27 @@ static int gve_set_channels(struct net_device *netdev,
if (!new_rx || !new_tx)
return -EINVAL;
- if (priv->num_xdp_queues &&
- (new_tx != new_rx || (2 * new_tx > priv->tx_cfg.max_queues))) {
- dev_err(&priv->pdev->dev, "XDP load failed: The number of configured RX queues should be equal to the number of configured TX queues and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues");
- return -EINVAL;
- }
+ if (priv->xdp_prog) {
+ if (new_tx != new_rx ||
+ (2 * new_tx > priv->tx_cfg.max_queues)) {
+ dev_err(&priv->pdev->dev, "The number of configured RX queues should be equal to the number of configured TX queues and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues when XDP program is installed");
+ return -EINVAL;
+ }
- if (!netif_carrier_ok(netdev)) {
- priv->tx_cfg.num_queues = new_tx;
- priv->rx_cfg.num_queues = new_rx;
- return 0;
+ /* One XDP TX queue per RX queue. */
+ new_tx_cfg.num_xdp_queues = new_rx;
+ } else {
+ new_tx_cfg.num_xdp_queues = 0;
}
+ if (new_rx != priv->rx_cfg.num_queues &&
+ priv->cache_rss_config && !netif_is_rxfh_configured(netdev))
+ reset_rss = true;
+
new_tx_cfg.num_queues = new_tx;
new_rx_cfg.num_queues = new_rx;
- return gve_adjust_queues(priv, new_rx_cfg, new_tx_cfg);
+ return gve_adjust_queues(priv, new_rx_cfg, new_tx_cfg, reset_rss);
}
static void gve_get_ringparam(struct net_device *netdev,
@@ -489,8 +523,8 @@ static void gve_get_ringparam(struct net_device *netdev,
{
struct gve_priv *priv = netdev_priv(netdev);
- cmd->rx_max_pending = priv->rx_desc_cnt;
- cmd->tx_max_pending = priv->tx_desc_cnt;
+ cmd->rx_max_pending = priv->max_rx_desc_cnt;
+ cmd->tx_max_pending = priv->max_tx_desc_cnt;
cmd->rx_pending = priv->rx_desc_cnt;
cmd->tx_pending = priv->tx_desc_cnt;
@@ -502,20 +536,81 @@ static void gve_get_ringparam(struct net_device *netdev,
kernel_cmd->tcp_data_split = ETHTOOL_TCP_DATA_SPLIT_DISABLED;
}
+static int gve_adjust_ring_sizes(struct gve_priv *priv,
+ u16 new_tx_desc_cnt,
+ u16 new_rx_desc_cnt)
+{
+ struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0};
+ struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0};
+ int err;
+
+ /* get current queue configuration */
+ gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg);
+
+ /* copy over the new ring_size from ethtool */
+ tx_alloc_cfg.ring_size = new_tx_desc_cnt;
+ rx_alloc_cfg.ring_size = new_rx_desc_cnt;
+
+ if (netif_running(priv->dev)) {
+ err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg);
+ if (err)
+ return err;
+ }
+
+ /* Set new ring_size for the next up */
+ priv->tx_desc_cnt = new_tx_desc_cnt;
+ priv->rx_desc_cnt = new_rx_desc_cnt;
+
+ return 0;
+}
+
+static int gve_validate_req_ring_size(struct gve_priv *priv, u16 new_tx_desc_cnt,
+ u16 new_rx_desc_cnt)
+{
+ /* check for valid range */
+ if (new_tx_desc_cnt < priv->min_tx_desc_cnt ||
+ new_tx_desc_cnt > priv->max_tx_desc_cnt ||
+ new_rx_desc_cnt < priv->min_rx_desc_cnt ||
+ new_rx_desc_cnt > priv->max_rx_desc_cnt) {
+ dev_err(&priv->pdev->dev, "Requested descriptor count out of range\n");
+ return -EINVAL;
+ }
+
+ if (!is_power_of_2(new_tx_desc_cnt) || !is_power_of_2(new_rx_desc_cnt)) {
+ dev_err(&priv->pdev->dev, "Requested descriptor count has to be a power of 2\n");
+ return -EINVAL;
+ }
+ return 0;
+}
+
static int gve_set_ringparam(struct net_device *netdev,
struct ethtool_ringparam *cmd,
struct kernel_ethtool_ringparam *kernel_cmd,
struct netlink_ext_ack *extack)
{
struct gve_priv *priv = netdev_priv(netdev);
+ u16 new_tx_cnt, new_rx_cnt;
+ int err;
- if (priv->tx_desc_cnt != cmd->tx_pending ||
- priv->rx_desc_cnt != cmd->rx_pending) {
- dev_info(&priv->pdev->dev, "Modify ring size is not supported.\n");
+ err = gve_set_hsplit_config(priv, kernel_cmd->tcp_data_split);
+ if (err)
+ return err;
+
+ if (cmd->tx_pending == priv->tx_desc_cnt && cmd->rx_pending == priv->rx_desc_cnt)
+ return 0;
+
+ if (!priv->modify_ring_size_enabled) {
+ dev_err(&priv->pdev->dev, "Modify ring size is not supported.\n");
return -EOPNOTSUPP;
}
- return gve_set_hsplit_config(priv, kernel_cmd->tcp_data_split);
+ new_tx_cnt = cmd->tx_pending;
+ new_rx_cnt = cmd->rx_pending;
+
+ if (gve_validate_req_ring_size(priv, new_tx_cnt, new_rx_cnt))
+ return -EINVAL;
+
+ return gve_adjust_ring_sizes(priv, new_tx_cnt, new_rx_cnt);
}
static int gve_user_reset(struct net_device *netdev, u32 *flags)
@@ -554,8 +649,7 @@ static int gve_set_tunable(struct net_device *netdev,
switch (etuna->id) {
case ETHTOOL_RX_COPYBREAK:
{
- u32 max_copybreak = gve_is_gqi(priv) ?
- GVE_DEFAULT_RX_BUFFER_SIZE : priv->data_buffer_size_dqo;
+ u32 max_copybreak = priv->rx_cfg.packet_buffer_size;
len = *(u32 *)value;
if (len > max_copybreak)
@@ -611,7 +705,7 @@ static int gve_set_priv_flags(struct net_device *netdev, u32 flags)
memset(priv->stats_report->stats, 0, (tx_stats_num + rx_stats_num) *
sizeof(struct stats));
- del_timer_sync(&priv->stats_report_timer);
+ timer_delete_sync(&priv->stats_report_timer);
}
return 0;
}
@@ -689,6 +783,151 @@ static int gve_set_coalesce(struct net_device *netdev,
return 0;
}
+static int gve_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd)
+{
+ struct gve_priv *priv = netdev_priv(netdev);
+ int err = 0;
+
+ if (!(netdev->features & NETIF_F_NTUPLE))
+ return -EOPNOTSUPP;
+
+ switch (cmd->cmd) {
+ case ETHTOOL_SRXCLSRLINS:
+ err = gve_add_flow_rule(priv, cmd);
+ break;
+ case ETHTOOL_SRXCLSRLDEL:
+ err = gve_del_flow_rule(priv, cmd);
+ break;
+ case ETHTOOL_SRXFH:
+ err = -EOPNOTSUPP;
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ break;
+ }
+
+ return err;
+}
+
+static int gve_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd, u32 *rule_locs)
+{
+ struct gve_priv *priv = netdev_priv(netdev);
+ int err = 0;
+
+ switch (cmd->cmd) {
+ case ETHTOOL_GRXRINGS:
+ cmd->data = priv->rx_cfg.num_queues;
+ break;
+ case ETHTOOL_GRXCLSRLCNT:
+ if (!priv->max_flow_rules)
+ return -EOPNOTSUPP;
+
+ err = gve_adminq_query_flow_rules(priv, GVE_FLOW_RULE_QUERY_STATS, 0);
+ if (err)
+ return err;
+
+ cmd->rule_cnt = priv->num_flow_rules;
+ cmd->data = priv->max_flow_rules;
+ break;
+ case ETHTOOL_GRXCLSRULE:
+ err = gve_get_flow_rule_entry(priv, cmd);
+ break;
+ case ETHTOOL_GRXCLSRLALL:
+ err = gve_get_flow_rule_ids(priv, cmd, (u32 *)rule_locs);
+ break;
+ case ETHTOOL_GRXFH:
+ err = -EOPNOTSUPP;
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ break;
+ }
+
+ return err;
+}
+
+static u32 gve_get_rxfh_key_size(struct net_device *netdev)
+{
+ struct gve_priv *priv = netdev_priv(netdev);
+
+ return priv->rss_key_size;
+}
+
+static u32 gve_get_rxfh_indir_size(struct net_device *netdev)
+{
+ struct gve_priv *priv = netdev_priv(netdev);
+
+ return priv->rss_lut_size;
+}
+
+static void gve_get_rss_config_cache(struct gve_priv *priv,
+ struct ethtool_rxfh_param *rxfh)
+{
+ struct gve_rss_config *rss_config = &priv->rss_config;
+
+ rxfh->hfunc = ETH_RSS_HASH_TOP;
+
+ if (rxfh->key) {
+ rxfh->key_size = priv->rss_key_size;
+ memcpy(rxfh->key, rss_config->hash_key, priv->rss_key_size);
+ }
+
+ if (rxfh->indir) {
+ rxfh->indir_size = priv->rss_lut_size;
+ memcpy(rxfh->indir, rss_config->hash_lut,
+ priv->rss_lut_size * sizeof(*rxfh->indir));
+ }
+}
+
+static int gve_get_rxfh(struct net_device *netdev, struct ethtool_rxfh_param *rxfh)
+{
+ struct gve_priv *priv = netdev_priv(netdev);
+
+ if (!priv->rss_key_size || !priv->rss_lut_size)
+ return -EOPNOTSUPP;
+
+ if (priv->cache_rss_config) {
+ gve_get_rss_config_cache(priv, rxfh);
+ return 0;
+ }
+
+ return gve_adminq_query_rss_config(priv, rxfh);
+}
+
+static void gve_set_rss_config_cache(struct gve_priv *priv,
+ struct ethtool_rxfh_param *rxfh)
+{
+ struct gve_rss_config *rss_config = &priv->rss_config;
+
+ if (rxfh->key)
+ memcpy(rss_config->hash_key, rxfh->key, priv->rss_key_size);
+
+ if (rxfh->indir)
+ memcpy(rss_config->hash_lut, rxfh->indir,
+ priv->rss_lut_size * sizeof(*rxfh->indir));
+}
+
+static int gve_set_rxfh(struct net_device *netdev, struct ethtool_rxfh_param *rxfh,
+ struct netlink_ext_ack *extack)
+{
+ struct gve_priv *priv = netdev_priv(netdev);
+ int err;
+
+ if (!priv->rss_key_size || !priv->rss_lut_size)
+ return -EOPNOTSUPP;
+
+ err = gve_adminq_configure_rss(priv, rxfh);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Fail to configure RSS config");
+ return err;
+ }
+
+ if (priv->cache_rss_config)
+ gve_set_rss_config_cache(priv, rxfh);
+
+ return 0;
+}
+
const struct ethtool_ops gve_ethtool_ops = {
.supported_coalesce_params = ETHTOOL_COALESCE_USECS,
.supported_ring_params = ETHTOOL_RING_USE_TCP_DATA_SPLIT,
@@ -700,6 +939,12 @@ const struct ethtool_ops gve_ethtool_ops = {
.get_msglevel = gve_get_msglevel,
.set_channels = gve_set_channels,
.get_channels = gve_get_channels,
+ .set_rxnfc = gve_set_rxnfc,
+ .get_rxnfc = gve_get_rxnfc,
+ .get_rxfh_indir_size = gve_get_rxfh_indir_size,
+ .get_rxfh_key_size = gve_get_rxfh_key_size,
+ .get_rxfh = gve_get_rxfh,
+ .set_rxfh = gve_set_rxfh,
.get_link = ethtool_op_get_link,
.get_coalesce = gve_get_coalesce,
.set_coalesce = gve_set_coalesce,
@@ -710,5 +955,6 @@ const struct ethtool_ops gve_ethtool_ops = {
.set_tunable = gve_set_tunable,
.get_priv_flags = gve_get_priv_flags,
.set_priv_flags = gve_set_priv_flags,
- .get_link_ksettings = gve_get_link_ksettings
+ .get_link_ksettings = gve_get_link_ksettings,
+ .get_ts_info = ethtool_op_get_ts_info,
};
diff --git a/drivers/net/ethernet/google/gve/gve_flow_rule.c b/drivers/net/ethernet/google/gve/gve_flow_rule.c
new file mode 100644
index 000000000000..0bb8cd1876a3
--- /dev/null
+++ b/drivers/net/ethernet/google/gve/gve_flow_rule.c
@@ -0,0 +1,298 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Google virtual Ethernet (gve) driver
+ *
+ * Copyright (C) 2015-2024 Google LLC
+ */
+
+#include "gve.h"
+#include "gve_adminq.h"
+
+static
+int gve_fill_ethtool_flow_spec(struct ethtool_rx_flow_spec *fsp,
+ struct gve_adminq_queried_flow_rule *rule)
+{
+ struct gve_adminq_flow_rule *flow_rule = &rule->flow_rule;
+ static const u16 flow_type_lut[] = {
+ [GVE_FLOW_TYPE_TCPV4] = TCP_V4_FLOW,
+ [GVE_FLOW_TYPE_UDPV4] = UDP_V4_FLOW,
+ [GVE_FLOW_TYPE_SCTPV4] = SCTP_V4_FLOW,
+ [GVE_FLOW_TYPE_AHV4] = AH_V4_FLOW,
+ [GVE_FLOW_TYPE_ESPV4] = ESP_V4_FLOW,
+ [GVE_FLOW_TYPE_TCPV6] = TCP_V6_FLOW,
+ [GVE_FLOW_TYPE_UDPV6] = UDP_V6_FLOW,
+ [GVE_FLOW_TYPE_SCTPV6] = SCTP_V6_FLOW,
+ [GVE_FLOW_TYPE_AHV6] = AH_V6_FLOW,
+ [GVE_FLOW_TYPE_ESPV6] = ESP_V6_FLOW,
+ };
+
+ if (be16_to_cpu(flow_rule->flow_type) >= ARRAY_SIZE(flow_type_lut))
+ return -EINVAL;
+
+ fsp->flow_type = flow_type_lut[be16_to_cpu(flow_rule->flow_type)];
+
+ memset(&fsp->h_u, 0, sizeof(fsp->h_u));
+ memset(&fsp->h_ext, 0, sizeof(fsp->h_ext));
+ memset(&fsp->m_u, 0, sizeof(fsp->m_u));
+ memset(&fsp->m_ext, 0, sizeof(fsp->m_ext));
+
+ switch (fsp->flow_type) {
+ case TCP_V4_FLOW:
+ case UDP_V4_FLOW:
+ case SCTP_V4_FLOW:
+ fsp->h_u.tcp_ip4_spec.ip4src = flow_rule->key.src_ip[0];
+ fsp->h_u.tcp_ip4_spec.ip4dst = flow_rule->key.dst_ip[0];
+ fsp->h_u.tcp_ip4_spec.psrc = flow_rule->key.src_port;
+ fsp->h_u.tcp_ip4_spec.pdst = flow_rule->key.dst_port;
+ fsp->h_u.tcp_ip4_spec.tos = flow_rule->key.tos;
+ fsp->m_u.tcp_ip4_spec.ip4src = flow_rule->mask.src_ip[0];
+ fsp->m_u.tcp_ip4_spec.ip4dst = flow_rule->mask.dst_ip[0];
+ fsp->m_u.tcp_ip4_spec.psrc = flow_rule->mask.src_port;
+ fsp->m_u.tcp_ip4_spec.pdst = flow_rule->mask.dst_port;
+ fsp->m_u.tcp_ip4_spec.tos = flow_rule->mask.tos;
+ break;
+ case AH_V4_FLOW:
+ case ESP_V4_FLOW:
+ fsp->h_u.ah_ip4_spec.ip4src = flow_rule->key.src_ip[0];
+ fsp->h_u.ah_ip4_spec.ip4dst = flow_rule->key.dst_ip[0];
+ fsp->h_u.ah_ip4_spec.spi = flow_rule->key.spi;
+ fsp->h_u.ah_ip4_spec.tos = flow_rule->key.tos;
+ fsp->m_u.ah_ip4_spec.ip4src = flow_rule->mask.src_ip[0];
+ fsp->m_u.ah_ip4_spec.ip4dst = flow_rule->mask.dst_ip[0];
+ fsp->m_u.ah_ip4_spec.spi = flow_rule->mask.spi;
+ fsp->m_u.ah_ip4_spec.tos = flow_rule->mask.tos;
+ break;
+ case TCP_V6_FLOW:
+ case UDP_V6_FLOW:
+ case SCTP_V6_FLOW:
+ memcpy(fsp->h_u.tcp_ip6_spec.ip6src, &flow_rule->key.src_ip,
+ sizeof(struct in6_addr));
+ memcpy(fsp->h_u.tcp_ip6_spec.ip6dst, &flow_rule->key.dst_ip,
+ sizeof(struct in6_addr));
+ fsp->h_u.tcp_ip6_spec.psrc = flow_rule->key.src_port;
+ fsp->h_u.tcp_ip6_spec.pdst = flow_rule->key.dst_port;
+ fsp->h_u.tcp_ip6_spec.tclass = flow_rule->key.tclass;
+ memcpy(fsp->m_u.tcp_ip6_spec.ip6src, &flow_rule->mask.src_ip,
+ sizeof(struct in6_addr));
+ memcpy(fsp->m_u.tcp_ip6_spec.ip6dst, &flow_rule->mask.dst_ip,
+ sizeof(struct in6_addr));
+ fsp->m_u.tcp_ip6_spec.psrc = flow_rule->mask.src_port;
+ fsp->m_u.tcp_ip6_spec.pdst = flow_rule->mask.dst_port;
+ fsp->m_u.tcp_ip6_spec.tclass = flow_rule->mask.tclass;
+ break;
+ case AH_V6_FLOW:
+ case ESP_V6_FLOW:
+ memcpy(fsp->h_u.ah_ip6_spec.ip6src, &flow_rule->key.src_ip,
+ sizeof(struct in6_addr));
+ memcpy(fsp->h_u.ah_ip6_spec.ip6dst, &flow_rule->key.dst_ip,
+ sizeof(struct in6_addr));
+ fsp->h_u.ah_ip6_spec.spi = flow_rule->key.spi;
+ fsp->h_u.ah_ip6_spec.tclass = flow_rule->key.tclass;
+ memcpy(fsp->m_u.ah_ip6_spec.ip6src, &flow_rule->mask.src_ip,
+ sizeof(struct in6_addr));
+ memcpy(fsp->m_u.ah_ip6_spec.ip6dst, &flow_rule->mask.dst_ip,
+ sizeof(struct in6_addr));
+ fsp->m_u.ah_ip6_spec.spi = flow_rule->mask.spi;
+ fsp->m_u.ah_ip6_spec.tclass = flow_rule->mask.tclass;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ fsp->ring_cookie = be16_to_cpu(flow_rule->action);
+
+ return 0;
+}
+
+static int gve_generate_flow_rule(struct gve_priv *priv, struct ethtool_rx_flow_spec *fsp,
+ struct gve_adminq_flow_rule *rule)
+{
+ static const u16 flow_type_lut[] = {
+ [TCP_V4_FLOW] = GVE_FLOW_TYPE_TCPV4,
+ [UDP_V4_FLOW] = GVE_FLOW_TYPE_UDPV4,
+ [SCTP_V4_FLOW] = GVE_FLOW_TYPE_SCTPV4,
+ [AH_V4_FLOW] = GVE_FLOW_TYPE_AHV4,
+ [ESP_V4_FLOW] = GVE_FLOW_TYPE_ESPV4,
+ [TCP_V6_FLOW] = GVE_FLOW_TYPE_TCPV6,
+ [UDP_V6_FLOW] = GVE_FLOW_TYPE_UDPV6,
+ [SCTP_V6_FLOW] = GVE_FLOW_TYPE_SCTPV6,
+ [AH_V6_FLOW] = GVE_FLOW_TYPE_AHV6,
+ [ESP_V6_FLOW] = GVE_FLOW_TYPE_ESPV6,
+ };
+ u32 flow_type;
+
+ if (fsp->ring_cookie == RX_CLS_FLOW_DISC)
+ return -EOPNOTSUPP;
+
+ if (fsp->ring_cookie >= priv->rx_cfg.num_queues)
+ return -EINVAL;
+
+ rule->action = cpu_to_be16(fsp->ring_cookie);
+
+ flow_type = fsp->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT | FLOW_RSS);
+ if (!flow_type || flow_type >= ARRAY_SIZE(flow_type_lut))
+ return -EINVAL;
+
+ rule->flow_type = cpu_to_be16(flow_type_lut[flow_type]);
+
+ switch (flow_type) {
+ case TCP_V4_FLOW:
+ case UDP_V4_FLOW:
+ case SCTP_V4_FLOW:
+ rule->key.src_ip[0] = fsp->h_u.tcp_ip4_spec.ip4src;
+ rule->key.dst_ip[0] = fsp->h_u.tcp_ip4_spec.ip4dst;
+ rule->key.src_port = fsp->h_u.tcp_ip4_spec.psrc;
+ rule->key.dst_port = fsp->h_u.tcp_ip4_spec.pdst;
+ rule->mask.src_ip[0] = fsp->m_u.tcp_ip4_spec.ip4src;
+ rule->mask.dst_ip[0] = fsp->m_u.tcp_ip4_spec.ip4dst;
+ rule->mask.src_port = fsp->m_u.tcp_ip4_spec.psrc;
+ rule->mask.dst_port = fsp->m_u.tcp_ip4_spec.pdst;
+ break;
+ case AH_V4_FLOW:
+ case ESP_V4_FLOW:
+ rule->key.src_ip[0] = fsp->h_u.tcp_ip4_spec.ip4src;
+ rule->key.dst_ip[0] = fsp->h_u.tcp_ip4_spec.ip4dst;
+ rule->key.spi = fsp->h_u.ah_ip4_spec.spi;
+ rule->mask.src_ip[0] = fsp->m_u.tcp_ip4_spec.ip4src;
+ rule->mask.dst_ip[0] = fsp->m_u.tcp_ip4_spec.ip4dst;
+ rule->mask.spi = fsp->m_u.ah_ip4_spec.spi;
+ break;
+ case TCP_V6_FLOW:
+ case UDP_V6_FLOW:
+ case SCTP_V6_FLOW:
+ memcpy(&rule->key.src_ip, fsp->h_u.tcp_ip6_spec.ip6src,
+ sizeof(struct in6_addr));
+ memcpy(&rule->key.dst_ip, fsp->h_u.tcp_ip6_spec.ip6dst,
+ sizeof(struct in6_addr));
+ rule->key.src_port = fsp->h_u.tcp_ip6_spec.psrc;
+ rule->key.dst_port = fsp->h_u.tcp_ip6_spec.pdst;
+ memcpy(&rule->mask.src_ip, fsp->m_u.tcp_ip6_spec.ip6src,
+ sizeof(struct in6_addr));
+ memcpy(&rule->mask.dst_ip, fsp->m_u.tcp_ip6_spec.ip6dst,
+ sizeof(struct in6_addr));
+ rule->mask.src_port = fsp->m_u.tcp_ip6_spec.psrc;
+ rule->mask.dst_port = fsp->m_u.tcp_ip6_spec.pdst;
+ break;
+ case AH_V6_FLOW:
+ case ESP_V6_FLOW:
+ memcpy(&rule->key.src_ip, fsp->h_u.usr_ip6_spec.ip6src,
+ sizeof(struct in6_addr));
+ memcpy(&rule->key.dst_ip, fsp->h_u.usr_ip6_spec.ip6dst,
+ sizeof(struct in6_addr));
+ rule->key.spi = fsp->h_u.ah_ip6_spec.spi;
+ memcpy(&rule->mask.src_ip, fsp->m_u.usr_ip6_spec.ip6src,
+ sizeof(struct in6_addr));
+ memcpy(&rule->mask.dst_ip, fsp->m_u.usr_ip6_spec.ip6dst,
+ sizeof(struct in6_addr));
+ rule->key.spi = fsp->h_u.ah_ip6_spec.spi;
+ break;
+ default:
+ /* not doing un-parsed flow types */
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int gve_get_flow_rule_entry(struct gve_priv *priv, struct ethtool_rxnfc *cmd)
+{
+ struct gve_adminq_queried_flow_rule *rules_cache = priv->flow_rules_cache.rules_cache;
+ struct ethtool_rx_flow_spec *fsp = (struct ethtool_rx_flow_spec *)&cmd->fs;
+ u32 *cache_num = &priv->flow_rules_cache.rules_cache_num;
+ struct gve_adminq_queried_flow_rule *rule = NULL;
+ int err = 0;
+ u32 i;
+
+ if (!priv->max_flow_rules)
+ return -EOPNOTSUPP;
+
+ if (!priv->flow_rules_cache.rules_cache_synced ||
+ fsp->location < be32_to_cpu(rules_cache[0].location) ||
+ fsp->location > be32_to_cpu(rules_cache[*cache_num - 1].location)) {
+ err = gve_adminq_query_flow_rules(priv, GVE_FLOW_RULE_QUERY_RULES, fsp->location);
+ if (err)
+ return err;
+
+ priv->flow_rules_cache.rules_cache_synced = true;
+ }
+
+ for (i = 0; i < *cache_num; i++) {
+ if (fsp->location == be32_to_cpu(rules_cache[i].location)) {
+ rule = &rules_cache[i];
+ break;
+ }
+ }
+
+ if (!rule)
+ return -EINVAL;
+
+ err = gve_fill_ethtool_flow_spec(fsp, rule);
+
+ return err;
+}
+
+int gve_get_flow_rule_ids(struct gve_priv *priv, struct ethtool_rxnfc *cmd, u32 *rule_locs)
+{
+ __be32 *rule_ids_cache = priv->flow_rules_cache.rule_ids_cache;
+ u32 *cache_num = &priv->flow_rules_cache.rule_ids_cache_num;
+ u32 starting_rule_id = 0;
+ u32 i = 0, j = 0;
+ int err = 0;
+
+ if (!priv->max_flow_rules)
+ return -EOPNOTSUPP;
+
+ do {
+ err = gve_adminq_query_flow_rules(priv, GVE_FLOW_RULE_QUERY_IDS,
+ starting_rule_id);
+ if (err)
+ return err;
+
+ for (i = 0; i < *cache_num; i++) {
+ if (j >= cmd->rule_cnt)
+ return -EMSGSIZE;
+
+ rule_locs[j++] = be32_to_cpu(rule_ids_cache[i]);
+ starting_rule_id = be32_to_cpu(rule_ids_cache[i]) + 1;
+ }
+ } while (*cache_num != 0);
+ cmd->data = priv->max_flow_rules;
+
+ return err;
+}
+
+int gve_add_flow_rule(struct gve_priv *priv, struct ethtool_rxnfc *cmd)
+{
+ struct ethtool_rx_flow_spec *fsp = &cmd->fs;
+ struct gve_adminq_flow_rule *rule = NULL;
+ int err;
+
+ if (!priv->max_flow_rules)
+ return -EOPNOTSUPP;
+
+ rule = kvzalloc(sizeof(*rule), GFP_KERNEL);
+ if (!rule)
+ return -ENOMEM;
+
+ err = gve_generate_flow_rule(priv, fsp, rule);
+ if (err)
+ goto out;
+
+ err = gve_adminq_add_flow_rule(priv, rule, fsp->location);
+
+out:
+ kvfree(rule);
+ if (err)
+ dev_err(&priv->pdev->dev, "Failed to add the flow rule: %u", fsp->location);
+
+ return err;
+}
+
+int gve_del_flow_rule(struct gve_priv *priv, struct ethtool_rxnfc *cmd)
+{
+ struct ethtool_rx_flow_spec *fsp = (struct ethtool_rx_flow_spec *)&cmd->fs;
+
+ if (!priv->max_flow_rules)
+ return -EOPNOTSUPP;
+
+ return gve_adminq_del_flow_rule(priv, fsp->location);
+}
diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
index 166bd827a6d7..e1ffbd561fac 100644
--- a/drivers/net/ethernet/google/gve/gve_main.c
+++ b/drivers/net/ethernet/google/gve/gve_main.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: (GPL-2.0 OR MIT)
/* Google virtual Ethernet (gve) driver
*
- * Copyright (C) 2015-2021 Google, Inc.
+ * Copyright (C) 2015-2024 Google LLC
*/
#include <linux/bpf.h>
@@ -9,6 +9,7 @@
#include <linux/etherdevice.h>
#include <linux/filter.h>
#include <linux/interrupt.h>
+#include <linux/irq.h>
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/sched.h>
@@ -16,6 +17,7 @@
#include <linux/workqueue.h>
#include <linux/utsname.h>
#include <linux/version.h>
+#include <net/netdev_queues.h>
#include <net/sch_generic.h>
#include <net/xdp_sock_drv.h>
#include "gve.h"
@@ -139,6 +141,86 @@ static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s)
}
}
+static int gve_alloc_flow_rule_caches(struct gve_priv *priv)
+{
+ struct gve_flow_rules_cache *flow_rules_cache = &priv->flow_rules_cache;
+ int err = 0;
+
+ if (!priv->max_flow_rules)
+ return 0;
+
+ flow_rules_cache->rules_cache =
+ kvcalloc(GVE_FLOW_RULES_CACHE_SIZE, sizeof(*flow_rules_cache->rules_cache),
+ GFP_KERNEL);
+ if (!flow_rules_cache->rules_cache) {
+ dev_err(&priv->pdev->dev, "Cannot alloc flow rules cache\n");
+ return -ENOMEM;
+ }
+
+ flow_rules_cache->rule_ids_cache =
+ kvcalloc(GVE_FLOW_RULE_IDS_CACHE_SIZE, sizeof(*flow_rules_cache->rule_ids_cache),
+ GFP_KERNEL);
+ if (!flow_rules_cache->rule_ids_cache) {
+ dev_err(&priv->pdev->dev, "Cannot alloc flow rule ids cache\n");
+ err = -ENOMEM;
+ goto free_rules_cache;
+ }
+
+ return 0;
+
+free_rules_cache:
+ kvfree(flow_rules_cache->rules_cache);
+ flow_rules_cache->rules_cache = NULL;
+ return err;
+}
+
+static void gve_free_flow_rule_caches(struct gve_priv *priv)
+{
+ struct gve_flow_rules_cache *flow_rules_cache = &priv->flow_rules_cache;
+
+ kvfree(flow_rules_cache->rule_ids_cache);
+ flow_rules_cache->rule_ids_cache = NULL;
+ kvfree(flow_rules_cache->rules_cache);
+ flow_rules_cache->rules_cache = NULL;
+}
+
+static int gve_alloc_rss_config_cache(struct gve_priv *priv)
+{
+ struct gve_rss_config *rss_config = &priv->rss_config;
+
+ if (!priv->cache_rss_config)
+ return 0;
+
+ rss_config->hash_key = kcalloc(priv->rss_key_size,
+ sizeof(rss_config->hash_key[0]),
+ GFP_KERNEL);
+ if (!rss_config->hash_key)
+ return -ENOMEM;
+
+ rss_config->hash_lut = kcalloc(priv->rss_lut_size,
+ sizeof(rss_config->hash_lut[0]),
+ GFP_KERNEL);
+ if (!rss_config->hash_lut)
+ goto free_rss_key_cache;
+
+ return 0;
+
+free_rss_key_cache:
+ kfree(rss_config->hash_key);
+ rss_config->hash_key = NULL;
+ return -ENOMEM;
+}
+
+static void gve_free_rss_config_cache(struct gve_priv *priv)
+{
+ struct gve_rss_config *rss_config = &priv->rss_config;
+
+ kfree(rss_config->hash_key);
+ kfree(rss_config->hash_lut);
+
+ memset(rss_config, 0, sizeof(*rss_config));
+}
+
static int gve_alloc_counter_array(struct gve_priv *priv)
{
priv->counter_array =
@@ -220,7 +302,7 @@ static void gve_free_stats_report(struct gve_priv *priv)
if (!priv->stats_report)
return;
- del_timer_sync(&priv->stats_report_timer);
+ timer_delete_sync(&priv->stats_report_timer);
dma_free_coherent(&priv->pdev->dev, priv->stats_report_len,
priv->stats_report, priv->stats_report_bus);
priv->stats_report = NULL;
@@ -253,6 +335,18 @@ static irqreturn_t gve_intr_dqo(int irq, void *arg)
return IRQ_HANDLED;
}
+static int gve_is_napi_on_home_cpu(struct gve_priv *priv, u32 irq)
+{
+ int cpu_curr = smp_processor_id();
+ const struct cpumask *aff_mask;
+
+ aff_mask = irq_get_effective_affinity_mask(irq);
+ if (unlikely(!aff_mask))
+ return 1;
+
+ return cpumask_test_cpu(cpu_curr, aff_mask);
+}
+
int gve_napi_poll(struct napi_struct *napi, int budget)
{
struct gve_notify_block *block;
@@ -276,6 +370,14 @@ int gve_napi_poll(struct napi_struct *napi, int budget)
if (block->rx) {
work_done = gve_rx_poll(block, budget);
+
+ /* Poll XSK TX as part of RX NAPI. Setup re-poll based on max of
+ * TX and RX work done.
+ */
+ if (priv->xdp_prog)
+ work_done = max_t(int, work_done,
+ gve_xsk_tx_poll(block, budget));
+
reschedule |= work_done == budget;
}
@@ -322,8 +424,21 @@ int gve_napi_poll_dqo(struct napi_struct *napi, int budget)
reschedule |= work_done == budget;
}
- if (reschedule)
- return budget;
+ if (reschedule) {
+ /* Reschedule by returning budget only if already on the correct
+ * cpu.
+ */
+ if (likely(gve_is_napi_on_home_cpu(priv, block->irq)))
+ return budget;
+
+ /* If not on the cpu with which this queue's irq has affinity
+ * with, we avoid rescheduling napi and arm the irq instead so
+ * that napi gets rescheduled back eventually onto the right
+ * cpu.
+ */
+ if (work_done == budget)
+ work_done--;
+ }
if (likely(napi_complete_done(napi, work_done))) {
/* Enable interrupts again.
@@ -428,6 +543,7 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv)
"Failed to receive msix vector %d\n", i);
goto abort_with_some_ntfy_blocks;
}
+ block->irq = priv->msix_vectors[msix_idx].vector;
irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
get_cpu_mask(i % active_cpus));
block->irq_db_index = &priv->irq_db_indices[i].index;
@@ -441,6 +557,7 @@ abort_with_some_ntfy_blocks:
irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
NULL);
free_irq(priv->msix_vectors[msix_idx].vector, block);
+ block->irq = 0;
}
kvfree(priv->ntfy_blocks);
priv->ntfy_blocks = NULL;
@@ -474,6 +591,7 @@ static void gve_free_notify_blocks(struct gve_priv *priv)
irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
NULL);
free_irq(priv->msix_vectors[msix_idx].vector, block);
+ block->irq = 0;
}
free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
kvfree(priv->ntfy_blocks);
@@ -491,9 +609,15 @@ static int gve_setup_device_resources(struct gve_priv *priv)
{
int err;
- err = gve_alloc_counter_array(priv);
+ err = gve_alloc_flow_rule_caches(priv);
if (err)
return err;
+ err = gve_alloc_rss_config_cache(priv);
+ if (err)
+ goto abort_with_flow_rule_caches;
+ err = gve_alloc_counter_array(priv);
+ if (err)
+ goto abort_with_rss_config_cache;
err = gve_alloc_notify_blocks(priv);
if (err)
goto abort_with_counter;
@@ -527,6 +651,12 @@ static int gve_setup_device_resources(struct gve_priv *priv)
}
}
+ err = gve_init_rss_config(priv, priv->rx_cfg.num_queues);
+ if (err) {
+ dev_err(&priv->pdev->dev, "Failed to init RSS config");
+ goto abort_with_ptype_lut;
+ }
+
err = gve_adminq_report_stats(priv, priv->stats_report_len,
priv->stats_report_bus,
GVE_STATS_REPORT_TIMER_PERIOD);
@@ -545,6 +675,10 @@ abort_with_ntfy_blocks:
gve_free_notify_blocks(priv);
abort_with_counter:
gve_free_counter_array(priv);
+abort_with_rss_config_cache:
+ gve_free_rss_config_cache(priv);
+abort_with_flow_rule_caches:
+ gve_free_flow_rule_caches(priv);
return err;
}
@@ -557,6 +691,12 @@ static void gve_teardown_device_resources(struct gve_priv *priv)
/* Tell device its resources are being freed */
if (gve_get_device_resources_ok(priv)) {
+ err = gve_flow_rules_reset(priv);
+ if (err) {
+ dev_err(&priv->pdev->dev,
+ "Failed to reset flow rules: err=%d\n", err);
+ gve_trigger_reset(priv);
+ }
/* detach the stats report */
err = gve_adminq_report_stats(priv, 0, 0x0, GVE_STATS_REPORT_TIMER_PERIOD);
if (err) {
@@ -576,43 +716,44 @@ static void gve_teardown_device_resources(struct gve_priv *priv)
kvfree(priv->ptype_lut_dqo);
priv->ptype_lut_dqo = NULL;
+ gve_free_flow_rule_caches(priv);
+ gve_free_rss_config_cache(priv);
gve_free_counter_array(priv);
gve_free_notify_blocks(priv);
gve_free_stats_report(priv);
gve_clear_device_resources_ok(priv);
}
-static int gve_unregister_qpl(struct gve_priv *priv, u32 i)
+static int gve_unregister_qpl(struct gve_priv *priv,
+ struct gve_queue_page_list *qpl)
{
int err;
- err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id);
+ if (!qpl)
+ return 0;
+
+ err = gve_adminq_unregister_page_list(priv, qpl->id);
if (err) {
netif_err(priv, drv, priv->dev,
"Failed to unregister queue page list %d\n",
- priv->qpls[i].id);
+ qpl->id);
return err;
}
- priv->num_registered_pages -= priv->qpls[i].num_entries;
+ priv->num_registered_pages -= qpl->num_entries;
return 0;
}
-static int gve_register_qpl(struct gve_priv *priv, u32 i)
+static int gve_register_qpl(struct gve_priv *priv,
+ struct gve_queue_page_list *qpl)
{
- int num_rx_qpls;
int pages;
int err;
- /* Rx QPLs succeed Tx QPLs in the priv->qpls array. */
- num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv));
- if (i >= gve_rx_start_qpl_id(&priv->tx_cfg) + num_rx_qpls) {
- netif_err(priv, drv, priv->dev,
- "Cannot register nonexisting QPL at index %d\n", i);
- return -EINVAL;
- }
+ if (!qpl)
+ return 0;
- pages = priv->qpls[i].num_entries;
+ pages = qpl->num_entries;
if (pages + priv->num_registered_pages > priv->max_registered_pages) {
netif_err(priv, drv, priv->dev,
@@ -622,14 +763,11 @@ static int gve_register_qpl(struct gve_priv *priv, u32 i)
return -EINVAL;
}
- err = gve_adminq_register_page_list(priv, &priv->qpls[i]);
+ err = gve_adminq_register_page_list(priv, qpl);
if (err) {
netif_err(priv, drv, priv->dev,
"failed to register queue page list %d\n",
- priv->qpls[i].id);
- /* This failure will trigger a reset - no need to clean
- * up
- */
+ qpl->id);
return err;
}
@@ -637,43 +775,43 @@ static int gve_register_qpl(struct gve_priv *priv, u32 i)
return 0;
}
-static int gve_register_xdp_qpls(struct gve_priv *priv)
+static struct gve_queue_page_list *gve_tx_get_qpl(struct gve_priv *priv, int idx)
{
- int start_id;
- int err;
- int i;
+ struct gve_tx_ring *tx = &priv->tx[idx];
- start_id = gve_xdp_tx_start_queue_id(priv);
- for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) {
- err = gve_register_qpl(priv, i);
- /* This failure will trigger a reset - no need to clean up */
- if (err)
- return err;
- }
- return 0;
+ if (gve_is_gqi(priv))
+ return tx->tx_fifo.qpl;
+ else
+ return tx->dqo.qpl;
+}
+
+static struct gve_queue_page_list *gve_rx_get_qpl(struct gve_priv *priv, int idx)
+{
+ struct gve_rx_ring *rx = &priv->rx[idx];
+
+ if (gve_is_gqi(priv))
+ return rx->data.qpl;
+ else
+ return rx->dqo.qpl;
}
static int gve_register_qpls(struct gve_priv *priv)
{
int num_tx_qpls, num_rx_qpls;
- int start_id;
int err;
int i;
- num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_num_xdp_qpls(priv),
- gve_is_qpl(priv));
+ num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_is_qpl(priv));
num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv));
for (i = 0; i < num_tx_qpls; i++) {
- err = gve_register_qpl(priv, i);
+ err = gve_register_qpl(priv, gve_tx_get_qpl(priv, i));
if (err)
return err;
}
- /* there might be a gap between the tx and rx qpl ids */
- start_id = gve_rx_start_qpl_id(&priv->tx_cfg);
for (i = 0; i < num_rx_qpls; i++) {
- err = gve_register_qpl(priv, start_id + i);
+ err = gve_register_qpl(priv, gve_rx_get_qpl(priv, i));
if (err)
return err;
}
@@ -681,43 +819,24 @@ static int gve_register_qpls(struct gve_priv *priv)
return 0;
}
-static int gve_unregister_xdp_qpls(struct gve_priv *priv)
-{
- int start_id;
- int err;
- int i;
-
- start_id = gve_xdp_tx_start_queue_id(priv);
- for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) {
- err = gve_unregister_qpl(priv, i);
- /* This failure will trigger a reset - no need to clean */
- if (err)
- return err;
- }
- return 0;
-}
-
static int gve_unregister_qpls(struct gve_priv *priv)
{
int num_tx_qpls, num_rx_qpls;
- int start_id;
int err;
int i;
- num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_num_xdp_qpls(priv),
- gve_is_qpl(priv));
+ num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_is_qpl(priv));
num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv));
for (i = 0; i < num_tx_qpls; i++) {
- err = gve_unregister_qpl(priv, i);
+ err = gve_unregister_qpl(priv, gve_tx_get_qpl(priv, i));
/* This failure will trigger a reset - no need to clean */
if (err)
return err;
}
- start_id = gve_rx_start_qpl_id(&priv->tx_cfg);
for (i = 0; i < num_rx_qpls; i++) {
- err = gve_unregister_qpl(priv, start_id + i);
+ err = gve_unregister_qpl(priv, gve_rx_get_qpl(priv, i));
/* This failure will trigger a reset - no need to clean */
if (err)
return err;
@@ -725,27 +844,6 @@ static int gve_unregister_qpls(struct gve_priv *priv)
return 0;
}
-static int gve_create_xdp_rings(struct gve_priv *priv)
-{
- int err;
-
- err = gve_adminq_create_tx_queues(priv,
- gve_xdp_tx_start_queue_id(priv),
- priv->num_xdp_queues);
- if (err) {
- netif_err(priv, drv, priv->dev, "failed to create %d XDP tx queues\n",
- priv->num_xdp_queues);
- /* This failure will trigger a reset - no need to clean
- * up
- */
- return err;
- }
- netif_dbg(priv, drv, priv->dev, "created %d XDP tx queues\n",
- priv->num_xdp_queues);
-
- return 0;
-}
-
static int gve_create_rings(struct gve_priv *priv)
{
int num_tx_queues = gve_num_tx_queues(priv);
@@ -801,7 +899,7 @@ static void init_xdp_sync_stats(struct gve_priv *priv)
int i;
/* Init stats */
- for (i = start_id; i < start_id + priv->num_xdp_queues; i++) {
+ for (i = start_id; i < start_id + priv->tx_cfg.num_xdp_queues; i++) {
int ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
u64_stats_init(&priv->tx[i].statss);
@@ -828,22 +926,19 @@ static void gve_tx_get_curr_alloc_cfg(struct gve_priv *priv,
{
cfg->qcfg = &priv->tx_cfg;
cfg->raw_addressing = !gve_is_qpl(priv);
- cfg->qpls = priv->qpls;
- cfg->qpl_cfg = &priv->qpl_cfg;
cfg->ring_size = priv->tx_desc_cnt;
- cfg->start_idx = 0;
- cfg->num_rings = gve_num_tx_queues(priv);
+ cfg->num_xdp_rings = cfg->qcfg->num_xdp_queues;
cfg->tx = priv->tx;
}
-static void gve_tx_stop_rings(struct gve_priv *priv, int start_id, int num_rings)
+static void gve_tx_stop_rings(struct gve_priv *priv, int num_rings)
{
int i;
if (!priv->tx)
return;
- for (i = start_id; i < start_id + num_rings; i++) {
+ for (i = 0; i < num_rings; i++) {
if (gve_is_gqi(priv))
gve_tx_stop_ring_gqi(priv, i);
else
@@ -851,12 +946,11 @@ static void gve_tx_stop_rings(struct gve_priv *priv, int start_id, int num_rings
}
}
-static void gve_tx_start_rings(struct gve_priv *priv, int start_id,
- int num_rings)
+static void gve_tx_start_rings(struct gve_priv *priv, int num_rings)
{
int i;
- for (i = start_id; i < start_id + num_rings; i++) {
+ for (i = 0; i < num_rings; i++) {
if (gve_is_gqi(priv))
gve_tx_start_ring_gqi(priv, i);
else
@@ -864,31 +958,9 @@ static void gve_tx_start_rings(struct gve_priv *priv, int start_id,
}
}
-static int gve_alloc_xdp_rings(struct gve_priv *priv)
-{
- struct gve_tx_alloc_rings_cfg cfg = {0};
- int err = 0;
-
- if (!priv->num_xdp_queues)
- return 0;
-
- gve_tx_get_curr_alloc_cfg(priv, &cfg);
- cfg.start_idx = gve_xdp_tx_start_queue_id(priv);
- cfg.num_rings = priv->num_xdp_queues;
-
- err = gve_tx_alloc_rings_gqi(priv, &cfg);
- if (err)
- return err;
-
- gve_tx_start_rings(priv, cfg.start_idx, cfg.num_rings);
- init_xdp_sync_stats(priv);
-
- return 0;
-}
-
-static int gve_alloc_rings(struct gve_priv *priv,
- struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
- struct gve_rx_alloc_rings_cfg *rx_alloc_cfg)
+static int gve_queues_mem_alloc(struct gve_priv *priv,
+ struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
+ struct gve_rx_alloc_rings_cfg *rx_alloc_cfg)
{
int err;
@@ -916,26 +988,6 @@ free_tx:
return err;
}
-static int gve_destroy_xdp_rings(struct gve_priv *priv)
-{
- int start_id;
- int err;
-
- start_id = gve_xdp_tx_start_queue_id(priv);
- err = gve_adminq_destroy_tx_queues(priv,
- start_id,
- priv->num_xdp_queues);
- if (err) {
- netif_err(priv, drv, priv->dev,
- "failed to destroy XDP queues\n");
- /* This failure will trigger a reset - no need to clean up */
- return err;
- }
- netif_dbg(priv, drv, priv->dev, "destroyed XDP queues\n");
-
- return 0;
-}
-
static int gve_destroy_rings(struct gve_priv *priv)
{
int num_tx_queues = gve_num_tx_queues(priv);
@@ -960,23 +1012,9 @@ static int gve_destroy_rings(struct gve_priv *priv)
return 0;
}
-static void gve_free_xdp_rings(struct gve_priv *priv)
-{
- struct gve_tx_alloc_rings_cfg cfg = {0};
-
- gve_tx_get_curr_alloc_cfg(priv, &cfg);
- cfg.start_idx = gve_xdp_tx_start_queue_id(priv);
- cfg.num_rings = priv->num_xdp_queues;
-
- if (priv->tx) {
- gve_tx_stop_rings(priv, cfg.start_idx, cfg.num_rings);
- gve_tx_free_rings_gqi(priv, &cfg);
- }
-}
-
-static void gve_free_rings(struct gve_priv *priv,
- struct gve_tx_alloc_rings_cfg *tx_cfg,
- struct gve_rx_alloc_rings_cfg *rx_cfg)
+static void gve_queues_mem_free(struct gve_priv *priv,
+ struct gve_tx_alloc_rings_cfg *tx_cfg,
+ struct gve_rx_alloc_rings_cfg *rx_cfg)
{
if (gve_is_gqi(priv)) {
gve_tx_free_rings_gqi(priv, tx_cfg);
@@ -1005,35 +1043,41 @@ int gve_alloc_page(struct gve_priv *priv, struct device *dev,
return 0;
}
-static int gve_alloc_queue_page_list(struct gve_priv *priv,
- struct gve_queue_page_list *qpl,
- u32 id, int pages)
+struct gve_queue_page_list *gve_alloc_queue_page_list(struct gve_priv *priv,
+ u32 id, int pages)
{
+ struct gve_queue_page_list *qpl;
int err;
int i;
+ qpl = kvzalloc(sizeof(*qpl), GFP_KERNEL);
+ if (!qpl)
+ return NULL;
+
qpl->id = id;
qpl->num_entries = 0;
qpl->pages = kvcalloc(pages, sizeof(*qpl->pages), GFP_KERNEL);
- /* caller handles clean up */
if (!qpl->pages)
- return -ENOMEM;
+ goto abort;
+
qpl->page_buses = kvcalloc(pages, sizeof(*qpl->page_buses), GFP_KERNEL);
- /* caller handles clean up */
if (!qpl->page_buses)
- return -ENOMEM;
+ goto abort;
for (i = 0; i < pages; i++) {
err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i],
&qpl->page_buses[i],
gve_qpl_dma_dir(priv, id), GFP_KERNEL);
- /* caller handles clean up */
if (err)
- return -ENOMEM;
+ goto abort;
qpl->num_entries++;
}
- return 0;
+ return qpl;
+
+abort:
+ gve_free_queue_page_list(priv, qpl, id);
+ return NULL;
}
void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma,
@@ -1045,14 +1089,16 @@ void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma,
put_page(page);
}
-static void gve_free_queue_page_list(struct gve_priv *priv,
- struct gve_queue_page_list *qpl,
- int id)
+void gve_free_queue_page_list(struct gve_priv *priv,
+ struct gve_queue_page_list *qpl,
+ u32 id)
{
int i;
- if (!qpl->pages)
+ if (!qpl)
return;
+ if (!qpl->pages)
+ goto free_qpl;
if (!qpl->page_buses)
goto free_pages;
@@ -1065,120 +1111,8 @@ static void gve_free_queue_page_list(struct gve_priv *priv,
free_pages:
kvfree(qpl->pages);
qpl->pages = NULL;
-}
-
-static void gve_free_n_qpls(struct gve_priv *priv,
- struct gve_queue_page_list *qpls,
- int start_id,
- int num_qpls)
-{
- int i;
-
- for (i = start_id; i < start_id + num_qpls; i++)
- gve_free_queue_page_list(priv, &qpls[i], i);
-}
-
-static int gve_alloc_n_qpls(struct gve_priv *priv,
- struct gve_queue_page_list *qpls,
- int page_count,
- int start_id,
- int num_qpls)
-{
- int err;
- int i;
-
- for (i = start_id; i < start_id + num_qpls; i++) {
- err = gve_alloc_queue_page_list(priv, &qpls[i], i, page_count);
- if (err)
- goto free_qpls;
- }
-
- return 0;
-
-free_qpls:
- /* Must include the failing QPL too for gve_alloc_queue_page_list fails
- * without cleaning up.
- */
- gve_free_n_qpls(priv, qpls, start_id, i - start_id + 1);
- return err;
-}
-
-static int gve_alloc_qpls(struct gve_priv *priv,
- struct gve_qpls_alloc_cfg *cfg)
-{
- int max_queues = cfg->tx_cfg->max_queues + cfg->rx_cfg->max_queues;
- int rx_start_id, tx_num_qpls, rx_num_qpls;
- struct gve_queue_page_list *qpls;
- int page_count;
- int err;
-
- if (cfg->raw_addressing)
- return 0;
-
- qpls = kvcalloc(max_queues, sizeof(*qpls), GFP_KERNEL);
- if (!qpls)
- return -ENOMEM;
-
- cfg->qpl_cfg->qpl_map_size = BITS_TO_LONGS(max_queues) *
- sizeof(unsigned long) * BITS_PER_BYTE;
- cfg->qpl_cfg->qpl_id_map = kvcalloc(BITS_TO_LONGS(max_queues),
- sizeof(unsigned long), GFP_KERNEL);
- if (!cfg->qpl_cfg->qpl_id_map) {
- err = -ENOMEM;
- goto free_qpl_array;
- }
-
- /* Allocate TX QPLs */
- page_count = priv->tx_pages_per_qpl;
- tx_num_qpls = gve_num_tx_qpls(cfg->tx_cfg, cfg->num_xdp_queues,
- gve_is_qpl(priv));
- err = gve_alloc_n_qpls(priv, qpls, page_count, 0, tx_num_qpls);
- if (err)
- goto free_qpl_map;
-
- /* Allocate RX QPLs */
- rx_start_id = gve_rx_start_qpl_id(cfg->tx_cfg);
- /* For GQI_QPL number of pages allocated have 1:1 relationship with
- * number of descriptors. For DQO, number of pages required are
- * more than descriptors (because of out of order completions).
- */
- page_count = cfg->is_gqi ? priv->rx_data_slot_cnt : priv->rx_pages_per_qpl;
- rx_num_qpls = gve_num_rx_qpls(cfg->rx_cfg, gve_is_qpl(priv));
- err = gve_alloc_n_qpls(priv, qpls, page_count, rx_start_id, rx_num_qpls);
- if (err)
- goto free_tx_qpls;
-
- cfg->qpls = qpls;
- return 0;
-
-free_tx_qpls:
- gve_free_n_qpls(priv, qpls, 0, tx_num_qpls);
-free_qpl_map:
- kvfree(cfg->qpl_cfg->qpl_id_map);
- cfg->qpl_cfg->qpl_id_map = NULL;
-free_qpl_array:
- kvfree(qpls);
- return err;
-}
-
-static void gve_free_qpls(struct gve_priv *priv,
- struct gve_qpls_alloc_cfg *cfg)
-{
- int max_queues = cfg->tx_cfg->max_queues + cfg->rx_cfg->max_queues;
- struct gve_queue_page_list *qpls = cfg->qpls;
- int i;
-
- if (!qpls)
- return;
-
- kvfree(cfg->qpl_cfg->qpl_id_map);
- cfg->qpl_cfg->qpl_id_map = NULL;
-
- for (i = 0; i < max_queues; i++)
- gve_free_queue_page_list(priv, &qpls[i], i);
-
- kvfree(qpls);
- cfg->qpls = NULL;
+free_qpl:
+ kvfree(qpl);
}
/* Use this to schedule a reset when the device is capable of continuing
@@ -1204,7 +1138,7 @@ static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev)
int i, j;
u32 tx_qid;
- if (!priv->num_xdp_queues)
+ if (!priv->tx_cfg.num_xdp_queues)
return 0;
for (i = 0; i < priv->rx_cfg.num_queues; i++) {
@@ -1215,8 +1149,14 @@ static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev)
napi->napi_id);
if (err)
goto err;
- err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq,
- MEM_TYPE_PAGE_SHARED, NULL);
+ if (gve_is_qpl(priv))
+ err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq,
+ MEM_TYPE_PAGE_SHARED,
+ NULL);
+ else
+ err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq,
+ MEM_TYPE_PAGE_POOL,
+ rx->dqo.page_pool);
if (err)
goto err;
rx->xsk_pool = xsk_get_pool_from_qid(dev, i);
@@ -1234,7 +1174,7 @@ static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev)
}
}
- for (i = 0; i < priv->num_xdp_queues; i++) {
+ for (i = 0; i < priv->tx_cfg.num_xdp_queues; i++) {
tx_qid = gve_xdp_tx_queue_id(priv, i);
priv->tx[tx_qid].xsk_pool = xsk_get_pool_from_qid(dev, i);
}
@@ -1255,7 +1195,7 @@ static void gve_unreg_xdp_info(struct gve_priv *priv)
{
int i, tx_qid;
- if (!priv->num_xdp_queues)
+ if (!priv->tx_cfg.num_xdp_queues || !priv->rx || !priv->tx)
return;
for (i = 0; i < priv->rx_cfg.num_queues; i++) {
@@ -1268,7 +1208,7 @@ static void gve_unreg_xdp_info(struct gve_priv *priv)
}
}
- for (i = 0; i < priv->num_xdp_queues; i++) {
+ for (i = 0; i < priv->tx_cfg.num_xdp_queues; i++) {
tx_qid = gve_xdp_tx_queue_id(priv, i);
priv->tx[tx_qid].xsk_pool = NULL;
}
@@ -1282,118 +1222,69 @@ static void gve_drain_page_cache(struct gve_priv *priv)
page_frag_cache_drain(&priv->rx[i].page_cache);
}
-static void gve_qpls_get_curr_alloc_cfg(struct gve_priv *priv,
- struct gve_qpls_alloc_cfg *cfg)
-{
- cfg->raw_addressing = !gve_is_qpl(priv);
- cfg->is_gqi = gve_is_gqi(priv);
- cfg->num_xdp_queues = priv->num_xdp_queues;
- cfg->qpl_cfg = &priv->qpl_cfg;
- cfg->tx_cfg = &priv->tx_cfg;
- cfg->rx_cfg = &priv->rx_cfg;
- cfg->qpls = priv->qpls;
-}
-
static void gve_rx_get_curr_alloc_cfg(struct gve_priv *priv,
struct gve_rx_alloc_rings_cfg *cfg)
{
- cfg->qcfg = &priv->rx_cfg;
+ cfg->qcfg_rx = &priv->rx_cfg;
cfg->qcfg_tx = &priv->tx_cfg;
cfg->raw_addressing = !gve_is_qpl(priv);
cfg->enable_header_split = priv->header_split_enabled;
- cfg->qpls = priv->qpls;
- cfg->qpl_cfg = &priv->qpl_cfg;
cfg->ring_size = priv->rx_desc_cnt;
- cfg->packet_buffer_size = gve_is_gqi(priv) ?
- GVE_DEFAULT_RX_BUFFER_SIZE :
- priv->data_buffer_size_dqo;
+ cfg->packet_buffer_size = priv->rx_cfg.packet_buffer_size;
cfg->rx = priv->rx;
+ cfg->xdp = !!cfg->qcfg_tx->num_xdp_queues;
}
-static void gve_get_curr_alloc_cfgs(struct gve_priv *priv,
- struct gve_qpls_alloc_cfg *qpls_alloc_cfg,
- struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
- struct gve_rx_alloc_rings_cfg *rx_alloc_cfg)
+void gve_get_curr_alloc_cfgs(struct gve_priv *priv,
+ struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
+ struct gve_rx_alloc_rings_cfg *rx_alloc_cfg)
{
- gve_qpls_get_curr_alloc_cfg(priv, qpls_alloc_cfg);
gve_tx_get_curr_alloc_cfg(priv, tx_alloc_cfg);
gve_rx_get_curr_alloc_cfg(priv, rx_alloc_cfg);
}
-static void gve_rx_start_rings(struct gve_priv *priv, int num_rings)
+static void gve_rx_start_ring(struct gve_priv *priv, int i)
{
- int i;
-
- for (i = 0; i < num_rings; i++) {
- if (gve_is_gqi(priv))
- gve_rx_start_ring_gqi(priv, i);
- else
- gve_rx_start_ring_dqo(priv, i);
- }
+ if (gve_is_gqi(priv))
+ gve_rx_start_ring_gqi(priv, i);
+ else
+ gve_rx_start_ring_dqo(priv, i);
}
-static void gve_rx_stop_rings(struct gve_priv *priv, int num_rings)
+static void gve_rx_start_rings(struct gve_priv *priv, int num_rings)
{
int i;
- if (!priv->rx)
- return;
-
- for (i = 0; i < num_rings; i++) {
- if (gve_is_gqi(priv))
- gve_rx_stop_ring_gqi(priv, i);
- else
- gve_rx_stop_ring_dqo(priv, i);
- }
+ for (i = 0; i < num_rings; i++)
+ gve_rx_start_ring(priv, i);
}
-static void gve_queues_mem_free(struct gve_priv *priv,
- struct gve_qpls_alloc_cfg *qpls_alloc_cfg,
- struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
- struct gve_rx_alloc_rings_cfg *rx_alloc_cfg)
+static void gve_rx_stop_ring(struct gve_priv *priv, int i)
{
- gve_free_rings(priv, tx_alloc_cfg, rx_alloc_cfg);
- gve_free_qpls(priv, qpls_alloc_cfg);
+ if (gve_is_gqi(priv))
+ gve_rx_stop_ring_gqi(priv, i);
+ else
+ gve_rx_stop_ring_dqo(priv, i);
}
-static int gve_queues_mem_alloc(struct gve_priv *priv,
- struct gve_qpls_alloc_cfg *qpls_alloc_cfg,
- struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
- struct gve_rx_alloc_rings_cfg *rx_alloc_cfg)
+static void gve_rx_stop_rings(struct gve_priv *priv, int num_rings)
{
- int err;
-
- err = gve_alloc_qpls(priv, qpls_alloc_cfg);
- if (err) {
- netif_err(priv, drv, priv->dev, "Failed to alloc QPLs\n");
- return err;
- }
- tx_alloc_cfg->qpls = qpls_alloc_cfg->qpls;
- rx_alloc_cfg->qpls = qpls_alloc_cfg->qpls;
- err = gve_alloc_rings(priv, tx_alloc_cfg, rx_alloc_cfg);
- if (err) {
- netif_err(priv, drv, priv->dev, "Failed to alloc rings\n");
- goto free_qpls;
- }
+ int i;
- return 0;
+ if (!priv->rx)
+ return;
-free_qpls:
- gve_free_qpls(priv, qpls_alloc_cfg);
- return err;
+ for (i = 0; i < num_rings; i++)
+ gve_rx_stop_ring(priv, i);
}
static void gve_queues_mem_remove(struct gve_priv *priv)
{
struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0};
struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0};
- struct gve_qpls_alloc_cfg qpls_alloc_cfg = {0};
- gve_get_curr_alloc_cfgs(priv, &qpls_alloc_cfg,
- &tx_alloc_cfg, &rx_alloc_cfg);
- gve_queues_mem_free(priv, &qpls_alloc_cfg,
- &tx_alloc_cfg, &rx_alloc_cfg);
- priv->qpls = NULL;
+ gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg);
+ gve_queues_mem_free(priv, &tx_alloc_cfg, &rx_alloc_cfg);
priv->tx = NULL;
priv->rx = NULL;
}
@@ -1402,7 +1293,6 @@ static void gve_queues_mem_remove(struct gve_priv *priv)
* No memory is allocated. Passed-in memory is freed on errors.
*/
static int gve_queues_start(struct gve_priv *priv,
- struct gve_qpls_alloc_cfg *qpls_alloc_cfg,
struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
struct gve_rx_alloc_rings_cfg *rx_alloc_cfg)
{
@@ -1410,24 +1300,18 @@ static int gve_queues_start(struct gve_priv *priv,
int err;
/* Record new resources into priv */
- priv->qpls = qpls_alloc_cfg->qpls;
priv->tx = tx_alloc_cfg->tx;
priv->rx = rx_alloc_cfg->rx;
/* Record new configs into priv */
- priv->qpl_cfg = *qpls_alloc_cfg->qpl_cfg;
priv->tx_cfg = *tx_alloc_cfg->qcfg;
- priv->rx_cfg = *rx_alloc_cfg->qcfg;
+ priv->tx_cfg.num_xdp_queues = tx_alloc_cfg->num_xdp_rings;
+ priv->rx_cfg = *rx_alloc_cfg->qcfg_rx;
priv->tx_desc_cnt = tx_alloc_cfg->ring_size;
priv->rx_desc_cnt = rx_alloc_cfg->ring_size;
- if (priv->xdp_prog)
- priv->num_xdp_queues = priv->rx_cfg.num_queues;
- else
- priv->num_xdp_queues = 0;
-
- gve_tx_start_rings(priv, 0, tx_alloc_cfg->num_rings);
- gve_rx_start_rings(priv, rx_alloc_cfg->qcfg->num_queues);
+ gve_tx_start_rings(priv, gve_num_tx_queues(priv));
+ gve_rx_start_rings(priv, rx_alloc_cfg->qcfg_rx->num_queues);
gve_init_sync_stats(priv);
err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues);
@@ -1441,12 +1325,18 @@ static int gve_queues_start(struct gve_priv *priv,
if (err)
goto stop_and_free_rings;
+ if (rx_alloc_cfg->reset_rss) {
+ err = gve_init_rss_config(priv, priv->rx_cfg.num_queues);
+ if (err)
+ goto reset;
+ }
+
err = gve_register_qpls(priv);
if (err)
goto reset;
priv->header_split_enabled = rx_alloc_cfg->enable_header_split;
- priv->data_buffer_size_dqo = rx_alloc_cfg->packet_buffer_size;
+ priv->rx_cfg.packet_buffer_size = rx_alloc_cfg->packet_buffer_size;
err = gve_create_rings(priv);
if (err)
@@ -1473,7 +1363,7 @@ reset:
/* return the original error */
return err;
stop_and_free_rings:
- gve_tx_stop_rings(priv, 0, gve_num_tx_queues(priv));
+ gve_tx_stop_rings(priv, gve_num_tx_queues(priv));
gve_rx_stop_rings(priv, priv->rx_cfg.num_queues);
gve_queues_mem_remove(priv);
return err;
@@ -1483,23 +1373,19 @@ static int gve_open(struct net_device *dev)
{
struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0};
struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0};
- struct gve_qpls_alloc_cfg qpls_alloc_cfg = {0};
struct gve_priv *priv = netdev_priv(dev);
int err;
- gve_get_curr_alloc_cfgs(priv, &qpls_alloc_cfg,
- &tx_alloc_cfg, &rx_alloc_cfg);
+ gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg);
- err = gve_queues_mem_alloc(priv, &qpls_alloc_cfg,
- &tx_alloc_cfg, &rx_alloc_cfg);
+ err = gve_queues_mem_alloc(priv, &tx_alloc_cfg, &rx_alloc_cfg);
if (err)
return err;
/* No need to free on error: ownership of resources is lost after
* calling gve_queues_start.
*/
- err = gve_queues_start(priv, &qpls_alloc_cfg,
- &tx_alloc_cfg, &rx_alloc_cfg);
+ err = gve_queues_start(priv, &tx_alloc_cfg, &rx_alloc_cfg);
if (err)
return err;
@@ -1522,11 +1408,11 @@ static int gve_queues_stop(struct gve_priv *priv)
goto err;
gve_clear_device_rings_ok(priv);
}
- del_timer_sync(&priv->stats_report_timer);
+ timer_delete_sync(&priv->stats_report_timer);
gve_unreg_xdp_info(priv);
- gve_tx_stop_rings(priv, 0, gve_num_tx_queues(priv));
+ gve_tx_stop_rings(priv, gve_num_tx_queues(priv));
gve_rx_stop_rings(priv, priv->rx_cfg.num_queues);
priv->interface_down_cnt++;
@@ -1556,69 +1442,6 @@ static int gve_close(struct net_device *dev)
return 0;
}
-static int gve_remove_xdp_queues(struct gve_priv *priv)
-{
- int qpl_start_id;
- int err;
-
- qpl_start_id = gve_xdp_tx_start_queue_id(priv);
-
- err = gve_destroy_xdp_rings(priv);
- if (err)
- return err;
-
- err = gve_unregister_xdp_qpls(priv);
- if (err)
- return err;
-
- gve_unreg_xdp_info(priv);
- gve_free_xdp_rings(priv);
-
- gve_free_n_qpls(priv, priv->qpls, qpl_start_id, gve_num_xdp_qpls(priv));
- priv->num_xdp_queues = 0;
- return 0;
-}
-
-static int gve_add_xdp_queues(struct gve_priv *priv)
-{
- int start_id;
- int err;
-
- priv->num_xdp_queues = priv->rx_cfg.num_queues;
-
- start_id = gve_xdp_tx_start_queue_id(priv);
- err = gve_alloc_n_qpls(priv, priv->qpls, priv->tx_pages_per_qpl,
- start_id, gve_num_xdp_qpls(priv));
- if (err)
- goto err;
-
- err = gve_alloc_xdp_rings(priv);
- if (err)
- goto free_xdp_qpls;
-
- err = gve_reg_xdp_info(priv, priv->dev);
- if (err)
- goto free_xdp_rings;
-
- err = gve_register_xdp_qpls(priv);
- if (err)
- goto free_xdp_rings;
-
- err = gve_create_xdp_rings(priv);
- if (err)
- goto free_xdp_rings;
-
- return 0;
-
-free_xdp_rings:
- gve_free_xdp_rings(priv);
-free_xdp_qpls:
- gve_free_n_qpls(priv, priv->qpls, start_id, gve_num_xdp_qpls(priv));
-err:
- priv->num_xdp_queues = 0;
- return err;
-}
-
static void gve_handle_link_status(struct gve_priv *priv, bool link_status)
{
if (!gve_get_napi_enabled(priv))
@@ -1636,6 +1459,19 @@ static void gve_handle_link_status(struct gve_priv *priv, bool link_status)
}
}
+static int gve_configure_rings_xdp(struct gve_priv *priv,
+ u16 num_xdp_rings)
+{
+ struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0};
+ struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0};
+
+ gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg);
+ tx_alloc_cfg.num_xdp_rings = num_xdp_rings;
+
+ rx_alloc_cfg.xdp = !!num_xdp_rings;
+ return gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg);
+}
+
static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog,
struct netlink_ext_ack *extack)
{
@@ -1644,33 +1480,30 @@ static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog,
u32 status;
old_prog = READ_ONCE(priv->xdp_prog);
- if (!netif_carrier_ok(priv->dev)) {
+ if (!netif_running(priv->dev)) {
WRITE_ONCE(priv->xdp_prog, prog);
if (old_prog)
bpf_prog_put(old_prog);
+
+ /* Update priv XDP queue configuration */
+ priv->tx_cfg.num_xdp_queues = priv->xdp_prog ?
+ priv->rx_cfg.num_queues : 0;
return 0;
}
- gve_turndown(priv);
- if (!old_prog && prog) {
- // Allocate XDP TX queues if an XDP program is
- // being installed
- err = gve_add_xdp_queues(priv);
- if (err)
- goto out;
- } else if (old_prog && !prog) {
- // Remove XDP TX queues if an XDP program is
- // being uninstalled
- err = gve_remove_xdp_queues(priv);
- if (err)
- goto out;
- }
+ if (!old_prog && prog)
+ err = gve_configure_rings_xdp(priv, priv->rx_cfg.num_queues);
+ else if (old_prog && !prog)
+ err = gve_configure_rings_xdp(priv, 0);
+
+ if (err)
+ goto out;
+
WRITE_ONCE(priv->xdp_prog, prog);
if (old_prog)
bpf_prog_put(old_prog);
out:
- gve_turnup(priv);
status = ioread32be(&priv->reg_bar0->device_status);
gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
return err;
@@ -1701,8 +1534,8 @@ static int gve_xsk_pool_enable(struct net_device *dev,
if (err)
return err;
- /* If XDP prog is not installed, return */
- if (!priv->xdp_prog)
+ /* If XDP prog is not installed or interface is down, return. */
+ if (!priv->xdp_prog || !netif_running(dev))
return 0;
rx = &priv->rx[qid];
@@ -1747,21 +1580,16 @@ static int gve_xsk_pool_disable(struct net_device *dev,
if (qid >= priv->rx_cfg.num_queues)
return -EINVAL;
- /* If XDP prog is not installed, unmap DMA and return */
- if (!priv->xdp_prog)
- goto done;
-
- tx_qid = gve_xdp_tx_queue_id(priv, qid);
- if (!netif_running(dev)) {
- priv->rx[qid].xsk_pool = NULL;
- xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq);
- priv->tx[tx_qid].xsk_pool = NULL;
+ /* If XDP prog is not installed or interface is down, unmap DMA and
+ * return.
+ */
+ if (!priv->xdp_prog || !netif_running(dev))
goto done;
- }
napi_rx = &priv->ntfy_blocks[priv->rx[qid].ntfy_id].napi;
napi_disable(napi_rx); /* make sure current rx poll is done */
+ tx_qid = gve_xdp_tx_queue_id(priv, qid);
napi_tx = &priv->ntfy_blocks[priv->tx[tx_qid].ntfy_id].napi;
napi_disable(napi_tx); /* make sure current tx poll is done */
@@ -1787,24 +1615,20 @@ done:
static int gve_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags)
{
struct gve_priv *priv = netdev_priv(dev);
- int tx_queue_id = gve_xdp_tx_queue_id(priv, queue_id);
+ struct napi_struct *napi;
+
+ if (!gve_get_napi_enabled(priv))
+ return -ENETDOWN;
if (queue_id >= priv->rx_cfg.num_queues || !priv->xdp_prog)
return -EINVAL;
- if (flags & XDP_WAKEUP_TX) {
- struct gve_tx_ring *tx = &priv->tx[tx_queue_id];
- struct napi_struct *napi =
- &priv->ntfy_blocks[tx->ntfy_id].napi;
-
- if (!napi_if_scheduled_mark_missed(napi)) {
- /* Call local_bh_enable to trigger SoftIRQ processing */
- local_bh_disable();
- napi_schedule(napi);
- local_bh_enable();
- }
-
- tx->xdp_xsk_wakeup++;
+ napi = &priv->ntfy_blocks[gve_rx_idx_to_ntfy(priv, queue_id)].napi;
+ if (!napi_if_scheduled_mark_missed(napi)) {
+ /* Call local_bh_enable to trigger SoftIRQ processing */
+ local_bh_disable();
+ napi_schedule(napi);
+ local_bh_enable();
}
return 0;
@@ -1813,6 +1637,7 @@ static int gve_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags)
static int verify_xdp_configuration(struct net_device *dev)
{
struct gve_priv *priv = netdev_priv(dev);
+ u16 max_xdp_mtu;
if (dev->features & NETIF_F_LRO) {
netdev_warn(dev, "XDP is not supported when LRO is on.\n");
@@ -1825,7 +1650,11 @@ static int verify_xdp_configuration(struct net_device *dev)
return -EOPNOTSUPP;
}
- if (dev->mtu > GVE_DEFAULT_RX_BUFFER_SIZE - sizeof(struct ethhdr) - GVE_RX_PAD) {
+ max_xdp_mtu = priv->rx_cfg.packet_buffer_size - sizeof(struct ethhdr);
+ if (priv->queue_format == GVE_GQI_QPL_FORMAT)
+ max_xdp_mtu -= GVE_RX_PAD;
+
+ if (dev->mtu > max_xdp_mtu) {
netdev_warn(dev, "XDP is not supported for mtu %d.\n",
dev->mtu);
return -EOPNOTSUPP;
@@ -1863,16 +1692,42 @@ static int gve_xdp(struct net_device *dev, struct netdev_bpf *xdp)
}
}
-static int gve_adjust_config(struct gve_priv *priv,
- struct gve_qpls_alloc_cfg *qpls_alloc_cfg,
- struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
- struct gve_rx_alloc_rings_cfg *rx_alloc_cfg)
+int gve_init_rss_config(struct gve_priv *priv, u16 num_queues)
+{
+ struct gve_rss_config *rss_config = &priv->rss_config;
+ struct ethtool_rxfh_param rxfh = {0};
+ u16 i;
+
+ if (!priv->cache_rss_config)
+ return 0;
+
+ for (i = 0; i < priv->rss_lut_size; i++)
+ rss_config->hash_lut[i] =
+ ethtool_rxfh_indir_default(i, num_queues);
+
+ netdev_rss_key_fill(rss_config->hash_key, priv->rss_key_size);
+
+ rxfh.hfunc = ETH_RSS_HASH_TOP;
+
+ return gve_adminq_configure_rss(priv, &rxfh);
+}
+
+int gve_flow_rules_reset(struct gve_priv *priv)
+{
+ if (!priv->max_flow_rules)
+ return 0;
+
+ return gve_adminq_reset_flow_rules(priv);
+}
+
+int gve_adjust_config(struct gve_priv *priv,
+ struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
+ struct gve_rx_alloc_rings_cfg *rx_alloc_cfg)
{
int err;
/* Allocate resources for the new confiugration */
- err = gve_queues_mem_alloc(priv, qpls_alloc_cfg,
- tx_alloc_cfg, rx_alloc_cfg);
+ err = gve_queues_mem_alloc(priv, tx_alloc_cfg, rx_alloc_cfg);
if (err) {
netif_err(priv, drv, priv->dev,
"Adjust config failed to alloc new queues");
@@ -1884,14 +1739,12 @@ static int gve_adjust_config(struct gve_priv *priv,
if (err) {
netif_err(priv, drv, priv->dev,
"Adjust config failed to close old queues");
- gve_queues_mem_free(priv, qpls_alloc_cfg,
- tx_alloc_cfg, rx_alloc_cfg);
+ gve_queues_mem_free(priv, tx_alloc_cfg, rx_alloc_cfg);
return err;
}
/* Bring the device back up again with the new resources. */
- err = gve_queues_start(priv, qpls_alloc_cfg,
- tx_alloc_cfg, rx_alloc_cfg);
+ err = gve_queues_start(priv, tx_alloc_cfg, rx_alloc_cfg);
if (err) {
netif_err(priv, drv, priv->dev,
"Adjust config failed to start new queues, !!! DISABLING ALL QUEUES !!!\n");
@@ -1906,40 +1759,32 @@ static int gve_adjust_config(struct gve_priv *priv,
}
int gve_adjust_queues(struct gve_priv *priv,
- struct gve_queue_config new_rx_config,
- struct gve_queue_config new_tx_config)
+ struct gve_rx_queue_config new_rx_config,
+ struct gve_tx_queue_config new_tx_config,
+ bool reset_rss)
{
struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0};
struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0};
- struct gve_qpls_alloc_cfg qpls_alloc_cfg = {0};
- struct gve_qpl_config new_qpl_cfg;
int err;
- gve_get_curr_alloc_cfgs(priv, &qpls_alloc_cfg,
- &tx_alloc_cfg, &rx_alloc_cfg);
-
- /* qpl_cfg is not read-only, it contains a map that gets updated as
- * rings are allocated, which is why we cannot use the yet unreleased
- * one in priv.
- */
- qpls_alloc_cfg.qpl_cfg = &new_qpl_cfg;
- tx_alloc_cfg.qpl_cfg = &new_qpl_cfg;
- rx_alloc_cfg.qpl_cfg = &new_qpl_cfg;
+ gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg);
/* Relay the new config from ethtool */
- qpls_alloc_cfg.tx_cfg = &new_tx_config;
tx_alloc_cfg.qcfg = &new_tx_config;
rx_alloc_cfg.qcfg_tx = &new_tx_config;
- qpls_alloc_cfg.rx_cfg = &new_rx_config;
- rx_alloc_cfg.qcfg = &new_rx_config;
- tx_alloc_cfg.num_rings = new_tx_config.num_queues;
+ rx_alloc_cfg.qcfg_rx = &new_rx_config;
+ rx_alloc_cfg.reset_rss = reset_rss;
- if (netif_carrier_ok(priv->dev)) {
- err = gve_adjust_config(priv, &qpls_alloc_cfg,
- &tx_alloc_cfg, &rx_alloc_cfg);
+ if (netif_running(priv->dev)) {
+ err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg);
return err;
}
/* Set the config for the next up. */
+ if (reset_rss) {
+ err = gve_init_rss_config(priv, new_rx_config.num_queues);
+ if (err)
+ return err;
+ }
priv->tx_cfg = new_tx_config;
priv->rx_cfg = new_rx_config;
@@ -1961,20 +1806,37 @@ static void gve_turndown(struct gve_priv *priv)
int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
- napi_disable(&block->napi);
+ if (!gve_tx_was_added_to_block(priv, idx))
+ continue;
+
+ if (idx < priv->tx_cfg.num_queues)
+ netif_queue_set_napi(priv->dev, idx,
+ NETDEV_QUEUE_TYPE_TX, NULL);
+
+ napi_disable_locked(&block->napi);
}
for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
- napi_disable(&block->napi);
+ if (!gve_rx_was_added_to_block(priv, idx))
+ continue;
+
+ netif_queue_set_napi(priv->dev, idx, NETDEV_QUEUE_TYPE_RX,
+ NULL);
+ napi_disable_locked(&block->napi);
}
/* Stop tx queues */
netif_tx_disable(priv->dev);
+ xdp_features_clear_redirect_target_locked(priv->dev);
+
gve_clear_napi_enabled(priv);
gve_clear_report_stats(priv);
+
+ /* Make sure that all traffic is finished processing. */
+ synchronize_net();
}
static void gve_turnup(struct gve_priv *priv)
@@ -1989,30 +1851,71 @@ static void gve_turnup(struct gve_priv *priv)
int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
- napi_enable(&block->napi);
+ if (!gve_tx_was_added_to_block(priv, idx))
+ continue;
+
+ napi_enable_locked(&block->napi);
+
+ if (idx < priv->tx_cfg.num_queues)
+ netif_queue_set_napi(priv->dev, idx,
+ NETDEV_QUEUE_TYPE_TX,
+ &block->napi);
+
if (gve_is_gqi(priv)) {
iowrite32be(0, gve_irq_doorbell(priv, block));
} else {
gve_set_itr_coalesce_usecs_dqo(priv, block,
priv->tx_coalesce_usecs);
}
+
+ /* Any descs written by the NIC before this barrier will be
+ * handled by the one-off napi schedule below. Whereas any
+ * descs after the barrier will generate interrupts.
+ */
+ mb();
+ napi_schedule(&block->napi);
}
for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
- napi_enable(&block->napi);
+ if (!gve_rx_was_added_to_block(priv, idx))
+ continue;
+
+ napi_enable_locked(&block->napi);
+ netif_queue_set_napi(priv->dev, idx, NETDEV_QUEUE_TYPE_RX,
+ &block->napi);
+
if (gve_is_gqi(priv)) {
iowrite32be(0, gve_irq_doorbell(priv, block));
} else {
gve_set_itr_coalesce_usecs_dqo(priv, block,
priv->rx_coalesce_usecs);
}
+
+ /* Any descs written by the NIC before this barrier will be
+ * handled by the one-off napi schedule below. Whereas any
+ * descs after the barrier will generate interrupts.
+ */
+ mb();
+ napi_schedule(&block->napi);
}
+ if (priv->tx_cfg.num_xdp_queues && gve_supports_xdp_xmit(priv))
+ xdp_features_set_redirect_target_locked(priv->dev, false);
+
gve_set_napi_enabled(priv);
}
+static void gve_turnup_and_check_status(struct gve_priv *priv)
+{
+ u32 status;
+
+ gve_turnup(priv);
+ status = ioread32be(&priv->reg_bar0->device_status);
+ gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
+}
+
static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct gve_notify_block *block;
@@ -2077,7 +1980,6 @@ int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split)
{
struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0};
struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0};
- struct gve_qpls_alloc_cfg qpls_alloc_cfg = {0};
bool enable_hdr_split;
int err = 0;
@@ -2097,15 +1999,13 @@ int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split)
if (enable_hdr_split == priv->header_split_enabled)
return 0;
- gve_get_curr_alloc_cfgs(priv, &qpls_alloc_cfg,
- &tx_alloc_cfg, &rx_alloc_cfg);
+ gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg);
rx_alloc_cfg.enable_header_split = enable_hdr_split;
rx_alloc_cfg.packet_buffer_size = gve_get_pkt_buf_size(priv, enable_hdr_split);
if (netif_running(priv->dev))
- err = gve_adjust_config(priv, &qpls_alloc_cfg,
- &tx_alloc_cfg, &rx_alloc_cfg);
+ err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg);
return err;
}
@@ -2115,35 +2015,30 @@ static int gve_set_features(struct net_device *netdev,
const netdev_features_t orig_features = netdev->features;
struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0};
struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0};
- struct gve_qpls_alloc_cfg qpls_alloc_cfg = {0};
struct gve_priv *priv = netdev_priv(netdev);
- struct gve_qpl_config new_qpl_cfg;
int err;
- gve_get_curr_alloc_cfgs(priv, &qpls_alloc_cfg,
- &tx_alloc_cfg, &rx_alloc_cfg);
- /* qpl_cfg is not read-only, it contains a map that gets updated as
- * rings are allocated, which is why we cannot use the yet unreleased
- * one in priv.
- */
- qpls_alloc_cfg.qpl_cfg = &new_qpl_cfg;
- tx_alloc_cfg.qpl_cfg = &new_qpl_cfg;
- rx_alloc_cfg.qpl_cfg = &new_qpl_cfg;
+ gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg);
if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) {
netdev->features ^= NETIF_F_LRO;
- if (netif_carrier_ok(netdev)) {
- err = gve_adjust_config(priv, &qpls_alloc_cfg,
- &tx_alloc_cfg, &rx_alloc_cfg);
- if (err) {
- /* Revert the change on error. */
- netdev->features = orig_features;
- return err;
- }
+ if (netif_running(netdev)) {
+ err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg);
+ if (err)
+ goto revert_features;
}
}
+ if ((netdev->features & NETIF_F_NTUPLE) && !(features & NETIF_F_NTUPLE)) {
+ err = gve_flow_rules_reset(priv);
+ if (err)
+ goto revert_features;
+ }
return 0;
+
+revert_features:
+ netdev->features = orig_features;
+ return err;
}
static const struct net_device_ops gve_netdev_ops = {
@@ -2182,7 +2077,9 @@ static void gve_handle_reset(struct gve_priv *priv)
if (gve_get_do_reset(priv)) {
rtnl_lock();
+ netdev_lock(priv->dev);
gve_reset(priv, false);
+ netdev_unlock(priv->dev);
rtnl_unlock();
}
}
@@ -2278,14 +2175,17 @@ static void gve_service_task(struct work_struct *work)
static void gve_set_netdev_xdp_features(struct gve_priv *priv)
{
+ xdp_features_t xdp_features;
+
if (priv->queue_format == GVE_GQI_QPL_FORMAT) {
- priv->dev->xdp_features = NETDEV_XDP_ACT_BASIC;
- priv->dev->xdp_features |= NETDEV_XDP_ACT_REDIRECT;
- priv->dev->xdp_features |= NETDEV_XDP_ACT_NDO_XMIT;
- priv->dev->xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY;
+ xdp_features = NETDEV_XDP_ACT_BASIC;
+ xdp_features |= NETDEV_XDP_ACT_REDIRECT;
+ xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY;
} else {
- priv->dev->xdp_features = 0;
+ xdp_features = 0;
}
+
+ xdp_set_features_flag_locked(priv->dev, xdp_features);
}
static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
@@ -2359,6 +2259,7 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues,
priv->rx_cfg.num_queues);
}
+ priv->tx_cfg.num_xdp_queues = 0;
dev_info(&priv->pdev->dev, "TX queues %d, RX queues %d\n",
priv->tx_cfg.num_queues, priv->rx_cfg.num_queues);
@@ -2422,7 +2323,7 @@ err:
int gve_reset(struct gve_priv *priv, bool attempt_teardown)
{
- bool was_up = netif_carrier_ok(priv->dev);
+ bool was_up = netif_running(priv->dev);
int err;
dev_info(&priv->pdev->dev, "Performing reset\n");
@@ -2473,6 +2374,188 @@ static void gve_write_version(u8 __iomem *driver_version_register)
writeb('\n', driver_version_register);
}
+static int gve_rx_queue_stop(struct net_device *dev, void *per_q_mem, int idx)
+{
+ struct gve_priv *priv = netdev_priv(dev);
+ struct gve_rx_ring *gve_per_q_mem;
+ int err;
+
+ if (!priv->rx)
+ return -EAGAIN;
+
+ /* Destroying queue 0 while other queues exist is not supported in DQO */
+ if (!gve_is_gqi(priv) && idx == 0)
+ return -ERANGE;
+
+ /* Single-queue destruction requires quiescence on all queues */
+ gve_turndown(priv);
+
+ /* This failure will trigger a reset - no need to clean up */
+ err = gve_adminq_destroy_single_rx_queue(priv, idx);
+ if (err)
+ return err;
+
+ if (gve_is_qpl(priv)) {
+ /* This failure will trigger a reset - no need to clean up */
+ err = gve_unregister_qpl(priv, gve_rx_get_qpl(priv, idx));
+ if (err)
+ return err;
+ }
+
+ gve_rx_stop_ring(priv, idx);
+
+ /* Turn the unstopped queues back up */
+ gve_turnup_and_check_status(priv);
+
+ gve_per_q_mem = (struct gve_rx_ring *)per_q_mem;
+ *gve_per_q_mem = priv->rx[idx];
+ memset(&priv->rx[idx], 0, sizeof(priv->rx[idx]));
+ return 0;
+}
+
+static void gve_rx_queue_mem_free(struct net_device *dev, void *per_q_mem)
+{
+ struct gve_priv *priv = netdev_priv(dev);
+ struct gve_rx_alloc_rings_cfg cfg = {0};
+ struct gve_rx_ring *gve_per_q_mem;
+
+ gve_per_q_mem = (struct gve_rx_ring *)per_q_mem;
+ gve_rx_get_curr_alloc_cfg(priv, &cfg);
+
+ if (gve_is_gqi(priv))
+ gve_rx_free_ring_gqi(priv, gve_per_q_mem, &cfg);
+ else
+ gve_rx_free_ring_dqo(priv, gve_per_q_mem, &cfg);
+}
+
+static int gve_rx_queue_mem_alloc(struct net_device *dev, void *per_q_mem,
+ int idx)
+{
+ struct gve_priv *priv = netdev_priv(dev);
+ struct gve_rx_alloc_rings_cfg cfg = {0};
+ struct gve_rx_ring *gve_per_q_mem;
+ int err;
+
+ if (!priv->rx)
+ return -EAGAIN;
+
+ gve_per_q_mem = (struct gve_rx_ring *)per_q_mem;
+ gve_rx_get_curr_alloc_cfg(priv, &cfg);
+
+ if (gve_is_gqi(priv))
+ err = gve_rx_alloc_ring_gqi(priv, &cfg, gve_per_q_mem, idx);
+ else
+ err = gve_rx_alloc_ring_dqo(priv, &cfg, gve_per_q_mem, idx);
+
+ return err;
+}
+
+static int gve_rx_queue_start(struct net_device *dev, void *per_q_mem, int idx)
+{
+ struct gve_priv *priv = netdev_priv(dev);
+ struct gve_rx_ring *gve_per_q_mem;
+ int err;
+
+ if (!priv->rx)
+ return -EAGAIN;
+
+ gve_per_q_mem = (struct gve_rx_ring *)per_q_mem;
+ priv->rx[idx] = *gve_per_q_mem;
+
+ /* Single-queue creation requires quiescence on all queues */
+ gve_turndown(priv);
+
+ gve_rx_start_ring(priv, idx);
+
+ if (gve_is_qpl(priv)) {
+ /* This failure will trigger a reset - no need to clean up */
+ err = gve_register_qpl(priv, gve_rx_get_qpl(priv, idx));
+ if (err)
+ goto abort;
+ }
+
+ /* This failure will trigger a reset - no need to clean up */
+ err = gve_adminq_create_single_rx_queue(priv, idx);
+ if (err)
+ goto abort;
+
+ if (gve_is_gqi(priv))
+ gve_rx_write_doorbell(priv, &priv->rx[idx]);
+ else
+ gve_rx_post_buffers_dqo(&priv->rx[idx]);
+
+ /* Turn the unstopped queues back up */
+ gve_turnup_and_check_status(priv);
+ return 0;
+
+abort:
+ gve_rx_stop_ring(priv, idx);
+
+ /* All failures in this func result in a reset, by clearing the struct
+ * at idx, we prevent a double free when that reset runs. The reset,
+ * which needs the rtnl lock, will not run till this func returns and
+ * its caller gives up the lock.
+ */
+ memset(&priv->rx[idx], 0, sizeof(priv->rx[idx]));
+ return err;
+}
+
+static const struct netdev_queue_mgmt_ops gve_queue_mgmt_ops = {
+ .ndo_queue_mem_size = sizeof(struct gve_rx_ring),
+ .ndo_queue_mem_alloc = gve_rx_queue_mem_alloc,
+ .ndo_queue_mem_free = gve_rx_queue_mem_free,
+ .ndo_queue_start = gve_rx_queue_start,
+ .ndo_queue_stop = gve_rx_queue_stop,
+};
+
+static void gve_get_rx_queue_stats(struct net_device *dev, int idx,
+ struct netdev_queue_stats_rx *rx_stats)
+{
+ struct gve_priv *priv = netdev_priv(dev);
+ struct gve_rx_ring *rx = &priv->rx[idx];
+ unsigned int start;
+
+ do {
+ start = u64_stats_fetch_begin(&rx->statss);
+ rx_stats->packets = rx->rpackets;
+ rx_stats->bytes = rx->rbytes;
+ rx_stats->alloc_fail = rx->rx_skb_alloc_fail +
+ rx->rx_buf_alloc_fail;
+ } while (u64_stats_fetch_retry(&rx->statss, start));
+}
+
+static void gve_get_tx_queue_stats(struct net_device *dev, int idx,
+ struct netdev_queue_stats_tx *tx_stats)
+{
+ struct gve_priv *priv = netdev_priv(dev);
+ struct gve_tx_ring *tx = &priv->tx[idx];
+ unsigned int start;
+
+ do {
+ start = u64_stats_fetch_begin(&tx->statss);
+ tx_stats->packets = tx->pkt_done;
+ tx_stats->bytes = tx->bytes_done;
+ } while (u64_stats_fetch_retry(&tx->statss, start));
+}
+
+static void gve_get_base_stats(struct net_device *dev,
+ struct netdev_queue_stats_rx *rx,
+ struct netdev_queue_stats_tx *tx)
+{
+ rx->packets = 0;
+ rx->bytes = 0;
+ rx->alloc_fail = 0;
+
+ tx->packets = 0;
+ tx->bytes = 0;
+}
+
+static const struct netdev_stat_ops gve_stat_ops = {
+ .get_queue_stats_rx = gve_get_rx_queue_stats,
+ .get_queue_stats_tx = gve_get_tx_queue_stats,
+ .get_base_stats = gve_get_base_stats,
+};
+
static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
{
int max_tx_queues, max_rx_queues;
@@ -2527,6 +2610,8 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
pci_set_drvdata(pdev, dev);
dev->ethtool_ops = &gve_ethtool_ops;
dev->netdev_ops = &gve_netdev_ops;
+ dev->queue_mgmt_ops = &gve_queue_mgmt_ops;
+ dev->stat_ops = &gve_stat_ops;
/* Set default and supported features.
*
@@ -2555,7 +2640,7 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
priv->service_task_flags = 0x0;
priv->state_flags = 0x0;
priv->ethtool_flags = 0x0;
- priv->data_buffer_size_dqo = GVE_DEFAULT_RX_BUFFER_SIZE;
+ priv->rx_cfg.packet_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE;
priv->max_rx_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE;
gve_set_probe_in_progress(priv);
@@ -2574,6 +2659,9 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (err)
goto abort_with_wq;
+ if (!gve_is_gqi(priv) && !gve_is_qpl(priv))
+ dev->netmem_tx = true;
+
err = register_netdev(dev);
if (err)
goto abort_with_gve_init;
@@ -2628,9 +2716,10 @@ static void gve_shutdown(struct pci_dev *pdev)
{
struct net_device *netdev = pci_get_drvdata(pdev);
struct gve_priv *priv = netdev_priv(netdev);
- bool was_up = netif_carrier_ok(priv->dev);
+ bool was_up = netif_running(priv->dev);
rtnl_lock();
+ netdev_lock(netdev);
if (was_up && gve_close(priv->dev)) {
/* If the dev was up, attempt to close, if close fails, reset */
gve_reset_and_teardown(priv, was_up);
@@ -2638,6 +2727,7 @@ static void gve_shutdown(struct pci_dev *pdev)
/* If the dev wasn't up or close worked, finish tearing down */
gve_teardown_priv_resources(priv);
}
+ netdev_unlock(netdev);
rtnl_unlock();
}
@@ -2646,10 +2736,11 @@ static int gve_suspend(struct pci_dev *pdev, pm_message_t state)
{
struct net_device *netdev = pci_get_drvdata(pdev);
struct gve_priv *priv = netdev_priv(netdev);
- bool was_up = netif_carrier_ok(priv->dev);
+ bool was_up = netif_running(priv->dev);
priv->suspend_cnt++;
rtnl_lock();
+ netdev_lock(netdev);
if (was_up && gve_close(priv->dev)) {
/* If the dev was up, attempt to close, if close fails, reset */
gve_reset_and_teardown(priv, was_up);
@@ -2658,6 +2749,7 @@ static int gve_suspend(struct pci_dev *pdev, pm_message_t state)
gve_teardown_priv_resources(priv);
}
priv->up_before_suspend = was_up;
+ netdev_unlock(netdev);
rtnl_unlock();
return 0;
}
@@ -2670,7 +2762,9 @@ static int gve_resume(struct pci_dev *pdev)
priv->resume_cnt++;
rtnl_lock();
+ netdev_lock(netdev);
err = gve_reset_recovery(priv, priv->up_before_suspend);
+ netdev_unlock(netdev);
rtnl_unlock();
return err;
}
diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c
index 20f5a9e7fae9..90e875c1832f 100644
--- a/drivers/net/ethernet/google/gve/gve_rx.c
+++ b/drivers/net/ethernet/google/gve/gve_rx.c
@@ -30,6 +30,9 @@ static void gve_rx_unfill_pages(struct gve_priv *priv,
u32 slots = rx->mask + 1;
int i;
+ if (!rx->data.page_info)
+ return;
+
if (rx->data.raw_addressing) {
for (i = 0; i < slots; i++)
gve_rx_free_buffer(&priv->pdev->dev, &rx->data.page_info[i],
@@ -38,8 +41,6 @@ static void gve_rx_unfill_pages(struct gve_priv *priv,
for (i = 0; i < slots; i++)
page_ref_sub(rx->data.page_info[i].page,
rx->data.page_info[i].pagecnt_bias - 1);
- gve_unassign_qpl(cfg->qpl_cfg, rx->data.qpl->id);
- rx->data.qpl = NULL;
for (i = 0; i < rx->qpl_copy_pool_mask + 1; i++) {
page_ref_sub(rx->qpl_copy_pool[i].page,
@@ -51,6 +52,41 @@ static void gve_rx_unfill_pages(struct gve_priv *priv,
rx->data.page_info = NULL;
}
+static void gve_rx_ctx_clear(struct gve_rx_ctx *ctx)
+{
+ ctx->skb_head = NULL;
+ ctx->skb_tail = NULL;
+ ctx->total_size = 0;
+ ctx->frag_cnt = 0;
+ ctx->drop_pkt = false;
+}
+
+static void gve_rx_init_ring_state_gqi(struct gve_rx_ring *rx)
+{
+ rx->desc.seqno = 1;
+ rx->cnt = 0;
+ gve_rx_ctx_clear(&rx->ctx);
+}
+
+static void gve_rx_reset_ring_gqi(struct gve_priv *priv, int idx)
+{
+ struct gve_rx_ring *rx = &priv->rx[idx];
+ const u32 slots = priv->rx_desc_cnt;
+ size_t size;
+
+ /* Reset desc ring */
+ if (rx->desc.desc_ring) {
+ size = slots * sizeof(rx->desc.desc_ring[0]);
+ memset(rx->desc.desc_ring, 0, size);
+ }
+
+ /* Reset q_resources */
+ if (rx->q_resources)
+ memset(rx->q_resources, 0, sizeof(*rx->q_resources));
+
+ gve_rx_init_ring_state_gqi(rx);
+}
+
void gve_rx_stop_ring_gqi(struct gve_priv *priv, int idx)
{
int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
@@ -60,43 +96,60 @@ void gve_rx_stop_ring_gqi(struct gve_priv *priv, int idx)
gve_remove_napi(priv, ntfy_idx);
gve_rx_remove_from_block(priv, idx);
+ gve_rx_reset_ring_gqi(priv, idx);
}
-static void gve_rx_free_ring_gqi(struct gve_priv *priv, struct gve_rx_ring *rx,
- struct gve_rx_alloc_rings_cfg *cfg)
+void gve_rx_free_ring_gqi(struct gve_priv *priv, struct gve_rx_ring *rx,
+ struct gve_rx_alloc_rings_cfg *cfg)
{
struct device *dev = &priv->pdev->dev;
u32 slots = rx->mask + 1;
int idx = rx->q_num;
size_t bytes;
+ u32 qpl_id;
- bytes = sizeof(struct gve_rx_desc) * cfg->ring_size;
- dma_free_coherent(dev, bytes, rx->desc.desc_ring, rx->desc.bus);
- rx->desc.desc_ring = NULL;
+ if (rx->desc.desc_ring) {
+ bytes = sizeof(struct gve_rx_desc) * cfg->ring_size;
+ dma_free_coherent(dev, bytes, rx->desc.desc_ring, rx->desc.bus);
+ rx->desc.desc_ring = NULL;
+ }
- dma_free_coherent(dev, sizeof(*rx->q_resources),
- rx->q_resources, rx->q_resources_bus);
- rx->q_resources = NULL;
+ if (rx->q_resources) {
+ dma_free_coherent(dev, sizeof(*rx->q_resources),
+ rx->q_resources, rx->q_resources_bus);
+ rx->q_resources = NULL;
+ }
gve_rx_unfill_pages(priv, rx, cfg);
- bytes = sizeof(*rx->data.data_ring) * slots;
- dma_free_coherent(dev, bytes, rx->data.data_ring,
- rx->data.data_bus);
- rx->data.data_ring = NULL;
+ if (rx->data.data_ring) {
+ bytes = sizeof(*rx->data.data_ring) * slots;
+ dma_free_coherent(dev, bytes, rx->data.data_ring,
+ rx->data.data_bus);
+ rx->data.data_ring = NULL;
+ }
kvfree(rx->qpl_copy_pool);
rx->qpl_copy_pool = NULL;
+ if (rx->data.qpl) {
+ qpl_id = gve_get_rx_qpl_id(cfg->qcfg_tx, idx);
+ gve_free_queue_page_list(priv, rx->data.qpl, qpl_id);
+ rx->data.qpl = NULL;
+ }
+
netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx);
}
-static void gve_setup_rx_buffer(struct gve_rx_slot_page_info *page_info,
- dma_addr_t addr, struct page *page, __be64 *slot_addr)
+static void gve_setup_rx_buffer(struct gve_rx_ring *rx,
+ struct gve_rx_slot_page_info *page_info,
+ dma_addr_t addr, struct page *page,
+ __be64 *slot_addr)
{
page_info->page = page;
page_info->page_offset = 0;
page_info->page_address = page_address(page);
+ page_info->buf_size = rx->packet_buffer_size;
*slot_addr = cpu_to_be64(addr);
/* The page already has 1 ref */
page_ref_add(page, INT_MAX - 1);
@@ -121,7 +174,7 @@ static int gve_rx_alloc_buffer(struct gve_priv *priv, struct device *dev,
return err;
}
- gve_setup_rx_buffer(page_info, dma, page, &data_slot->addr);
+ gve_setup_rx_buffer(rx, page_info, dma, page, &data_slot->addr);
return 0;
}
@@ -144,20 +197,13 @@ static int gve_rx_prefill_pages(struct gve_rx_ring *rx,
if (!rx->data.page_info)
return -ENOMEM;
- if (!rx->data.raw_addressing) {
- rx->data.qpl = gve_assign_rx_qpl(cfg, rx->q_num);
- if (!rx->data.qpl) {
- kvfree(rx->data.page_info);
- rx->data.page_info = NULL;
- return -ENOMEM;
- }
- }
for (i = 0; i < slots; i++) {
if (!rx->data.raw_addressing) {
struct page *page = rx->data.qpl->pages[i];
dma_addr_t addr = i * PAGE_SIZE;
- gve_setup_rx_buffer(&rx->data.page_info[i], addr, page,
+ gve_setup_rx_buffer(rx, &rx->data.page_info[i], addr,
+ page,
&rx->data.data_ring[i].qpl_offset);
continue;
}
@@ -180,6 +226,7 @@ static int gve_rx_prefill_pages(struct gve_rx_ring *rx,
rx->qpl_copy_pool[j].page = page;
rx->qpl_copy_pool[j].page_offset = 0;
rx->qpl_copy_pool[j].page_address = page_address(page);
+ rx->qpl_copy_pool[j].buf_size = rx->packet_buffer_size;
/* The page already has 1 ref. */
page_ref_add(page, INT_MAX - 1);
@@ -204,9 +251,6 @@ alloc_err_qpl:
page_ref_sub(rx->data.page_info[i].page,
rx->data.page_info[i].pagecnt_bias - 1);
- gve_unassign_qpl(cfg->qpl_cfg, rx->data.qpl->id);
- rx->data.qpl = NULL;
-
return err;
alloc_err_rda:
@@ -217,15 +261,6 @@ alloc_err_rda:
return err;
}
-static void gve_rx_ctx_clear(struct gve_rx_ctx *ctx)
-{
- ctx->skb_head = NULL;
- ctx->skb_tail = NULL;
- ctx->total_size = 0;
- ctx->frag_cnt = 0;
- ctx->drop_pkt = false;
-}
-
void gve_rx_start_ring_gqi(struct gve_priv *priv, int idx)
{
int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
@@ -234,14 +269,16 @@ void gve_rx_start_ring_gqi(struct gve_priv *priv, int idx)
gve_add_napi(priv, ntfy_idx, gve_napi_poll);
}
-static int gve_rx_alloc_ring_gqi(struct gve_priv *priv,
- struct gve_rx_alloc_rings_cfg *cfg,
- struct gve_rx_ring *rx,
- int idx)
+int gve_rx_alloc_ring_gqi(struct gve_priv *priv,
+ struct gve_rx_alloc_rings_cfg *cfg,
+ struct gve_rx_ring *rx,
+ int idx)
{
struct device *hdev = &priv->pdev->dev;
- u32 slots = priv->rx_data_slot_cnt;
+ u32 slots = cfg->ring_size;
int filled_pages;
+ int qpl_page_cnt;
+ u32 qpl_id = 0;
size_t bytes;
int err;
@@ -251,6 +288,7 @@ static int gve_rx_alloc_ring_gqi(struct gve_priv *priv,
rx->gve = priv;
rx->q_num = idx;
+ rx->packet_buffer_size = cfg->packet_buffer_size;
rx->mask = slots - 1;
rx->data.raw_addressing = cfg->raw_addressing;
@@ -274,10 +312,22 @@ static int gve_rx_alloc_ring_gqi(struct gve_priv *priv,
goto abort_with_slots;
}
+ if (!rx->data.raw_addressing) {
+ qpl_id = gve_get_rx_qpl_id(cfg->qcfg_tx, rx->q_num);
+ qpl_page_cnt = cfg->ring_size;
+
+ rx->data.qpl = gve_alloc_queue_page_list(priv, qpl_id,
+ qpl_page_cnt);
+ if (!rx->data.qpl) {
+ err = -ENOMEM;
+ goto abort_with_copy_pool;
+ }
+ }
+
filled_pages = gve_rx_prefill_pages(rx, cfg);
if (filled_pages < 0) {
err = -ENOMEM;
- goto abort_with_copy_pool;
+ goto abort_with_qpl;
}
rx->fill_cnt = filled_pages;
/* Ensure data ring slots (packet buffers) are visible. */
@@ -304,11 +354,9 @@ static int gve_rx_alloc_ring_gqi(struct gve_priv *priv,
err = -ENOMEM;
goto abort_with_q_resources;
}
- rx->cnt = 0;
rx->db_threshold = slots / 2;
- rx->desc.seqno = 1;
+ gve_rx_init_ring_state_gqi(rx);
- rx->packet_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE;
gve_rx_ctx_clear(&rx->ctx);
return 0;
@@ -319,6 +367,11 @@ abort_with_q_resources:
rx->q_resources = NULL;
abort_filled:
gve_rx_unfill_pages(priv, rx, cfg);
+abort_with_qpl:
+ if (!rx->data.raw_addressing) {
+ gve_free_queue_page_list(priv, rx->data.qpl, qpl_id);
+ rx->data.qpl = NULL;
+ }
abort_with_copy_pool:
kvfree(rx->qpl_copy_pool);
rx->qpl_copy_pool = NULL;
@@ -337,18 +390,12 @@ int gve_rx_alloc_rings_gqi(struct gve_priv *priv,
int err = 0;
int i, j;
- if (!cfg->raw_addressing && !cfg->qpls) {
- netif_err(priv, drv, priv->dev,
- "Cannot alloc QPL ring before allocing QPLs\n");
- return -EINVAL;
- }
-
- rx = kvcalloc(cfg->qcfg->max_queues, sizeof(struct gve_rx_ring),
+ rx = kvcalloc(cfg->qcfg_rx->max_queues, sizeof(struct gve_rx_ring),
GFP_KERNEL);
if (!rx)
return -ENOMEM;
- for (i = 0; i < cfg->qcfg->num_queues; i++) {
+ for (i = 0; i < cfg->qcfg_rx->num_queues; i++) {
err = gve_rx_alloc_ring_gqi(priv, cfg, &rx[i], i);
if (err) {
netif_err(priv, drv, priv->dev,
@@ -377,7 +424,7 @@ void gve_rx_free_rings_gqi(struct gve_priv *priv,
if (!rx)
return;
- for (i = 0; i < cfg->qcfg->num_queues; i++)
+ for (i = 0; i < cfg->qcfg_rx->num_queues; i++)
gve_rx_free_ring_gqi(priv, &rx[i], cfg);
kvfree(rx);
@@ -548,7 +595,7 @@ static struct sk_buff *gve_rx_copy_to_pool(struct gve_rx_ring *rx,
copy_page_info->pad = page_info->pad;
skb = gve_rx_add_frags(napi, copy_page_info,
- rx->packet_buffer_size, len, ctx);
+ copy_page_info->buf_size, len, ctx);
if (unlikely(!skb))
return NULL;
@@ -588,7 +635,8 @@ gve_rx_qpl(struct device *dev, struct net_device *netdev,
* device.
*/
if (page_info->can_flip) {
- skb = gve_rx_add_frags(napi, page_info, rx->packet_buffer_size, len, ctx);
+ skb = gve_rx_add_frags(napi, page_info, page_info->buf_size,
+ len, ctx);
/* No point in recycling if we didn't get the skb */
if (skb) {
/* Make sure that the page isn't freed. */
@@ -638,7 +686,7 @@ static struct sk_buff *gve_rx_skb(struct gve_priv *priv, struct gve_rx_ring *rx,
skb = gve_rx_raw_addressing(&priv->pdev->dev, netdev,
page_info, len, napi,
data_slot,
- rx->packet_buffer_size, ctx);
+ page_info->buf_size, ctx);
} else {
skb = gve_rx_qpl(&priv->pdev->dev, netdev, rx,
page_info, len, napi, data_slot);
@@ -813,7 +861,7 @@ static void gve_rx(struct gve_rx_ring *rx, netdev_features_t feat,
void *old_data;
int xdp_act;
- xdp_init_buff(&xdp, rx->packet_buffer_size, &rx->xdp_rxq);
+ xdp_init_buff(&xdp, page_info->buf_size, &rx->xdp_rxq);
xdp_prepare_buff(&xdp, page_info->page_address +
page_info->page_offset, GVE_RX_PAD,
len, false);
diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c
index 8e8071308aeb..dcb0545baa50 100644
--- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c
+++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c
@@ -16,220 +16,120 @@
#include <net/ipv6.h>
#include <net/tcp.h>
-static int gve_buf_ref_cnt(struct gve_rx_buf_state_dqo *bs)
-{
- return page_count(bs->page_info.page) - bs->page_info.pagecnt_bias;
-}
-
-static void gve_free_page_dqo(struct gve_priv *priv,
- struct gve_rx_buf_state_dqo *bs,
- bool free_page)
-{
- page_ref_sub(bs->page_info.page, bs->page_info.pagecnt_bias - 1);
- if (free_page)
- gve_free_page(&priv->pdev->dev, bs->page_info.page, bs->addr,
- DMA_FROM_DEVICE);
- bs->page_info.page = NULL;
-}
-
-static struct gve_rx_buf_state_dqo *gve_alloc_buf_state(struct gve_rx_ring *rx)
-{
- struct gve_rx_buf_state_dqo *buf_state;
- s16 buffer_id;
-
- buffer_id = rx->dqo.free_buf_states;
- if (unlikely(buffer_id == -1))
- return NULL;
-
- buf_state = &rx->dqo.buf_states[buffer_id];
-
- /* Remove buf_state from free list */
- rx->dqo.free_buf_states = buf_state->next;
-
- /* Point buf_state to itself to mark it as allocated */
- buf_state->next = buffer_id;
-
- return buf_state;
-}
-
-static bool gve_buf_state_is_allocated(struct gve_rx_ring *rx,
- struct gve_rx_buf_state_dqo *buf_state)
-{
- s16 buffer_id = buf_state - rx->dqo.buf_states;
-
- return buf_state->next == buffer_id;
-}
-
-static void gve_free_buf_state(struct gve_rx_ring *rx,
- struct gve_rx_buf_state_dqo *buf_state)
+static void gve_rx_free_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx)
{
- s16 buffer_id = buf_state - rx->dqo.buf_states;
+ struct device *hdev = &priv->pdev->dev;
+ int buf_count = rx->dqo.bufq.mask + 1;
- buf_state->next = rx->dqo.free_buf_states;
- rx->dqo.free_buf_states = buffer_id;
+ if (rx->dqo.hdr_bufs.data) {
+ dma_free_coherent(hdev, priv->header_buf_size * buf_count,
+ rx->dqo.hdr_bufs.data, rx->dqo.hdr_bufs.addr);
+ rx->dqo.hdr_bufs.data = NULL;
+ }
}
-static struct gve_rx_buf_state_dqo *
-gve_dequeue_buf_state(struct gve_rx_ring *rx, struct gve_index_list *list)
+static void gve_rx_init_ring_state_dqo(struct gve_rx_ring *rx,
+ const u32 buffer_queue_slots,
+ const u32 completion_queue_slots)
{
- struct gve_rx_buf_state_dqo *buf_state;
- s16 buffer_id;
-
- buffer_id = list->head;
- if (unlikely(buffer_id == -1))
- return NULL;
-
- buf_state = &rx->dqo.buf_states[buffer_id];
-
- /* Remove buf_state from list */
- list->head = buf_state->next;
- if (buf_state->next == -1)
- list->tail = -1;
-
- /* Point buf_state to itself to mark it as allocated */
- buf_state->next = buffer_id;
-
- return buf_state;
-}
+ int i;
-static void gve_enqueue_buf_state(struct gve_rx_ring *rx,
- struct gve_index_list *list,
- struct gve_rx_buf_state_dqo *buf_state)
-{
- s16 buffer_id = buf_state - rx->dqo.buf_states;
+ /* Set buffer queue state */
+ rx->dqo.bufq.mask = buffer_queue_slots - 1;
+ rx->dqo.bufq.head = 0;
+ rx->dqo.bufq.tail = 0;
- buf_state->next = -1;
+ /* Set completion queue state */
+ rx->dqo.complq.num_free_slots = completion_queue_slots;
+ rx->dqo.complq.mask = completion_queue_slots - 1;
+ rx->dqo.complq.cur_gen_bit = 0;
+ rx->dqo.complq.head = 0;
- if (list->head == -1) {
- list->head = buffer_id;
- list->tail = buffer_id;
- } else {
- int tail = list->tail;
+ /* Set RX SKB context */
+ rx->ctx.skb_head = NULL;
+ rx->ctx.skb_tail = NULL;
- rx->dqo.buf_states[tail].next = buffer_id;
- list->tail = buffer_id;
+ /* Set up linked list of buffer IDs */
+ if (rx->dqo.buf_states) {
+ for (i = 0; i < rx->dqo.num_buf_states - 1; i++)
+ rx->dqo.buf_states[i].next = i + 1;
+ rx->dqo.buf_states[rx->dqo.num_buf_states - 1].next = -1;
}
+
+ rx->dqo.free_buf_states = 0;
+ rx->dqo.recycled_buf_states.head = -1;
+ rx->dqo.recycled_buf_states.tail = -1;
+ rx->dqo.used_buf_states.head = -1;
+ rx->dqo.used_buf_states.tail = -1;
}
-static struct gve_rx_buf_state_dqo *
-gve_get_recycled_buf_state(struct gve_rx_ring *rx)
+static void gve_rx_reset_ring_dqo(struct gve_priv *priv, int idx)
{
- struct gve_rx_buf_state_dqo *buf_state;
+ struct gve_rx_ring *rx = &priv->rx[idx];
+ size_t size;
int i;
- /* Recycled buf states are immediately usable. */
- buf_state = gve_dequeue_buf_state(rx, &rx->dqo.recycled_buf_states);
- if (likely(buf_state))
- return buf_state;
-
- if (unlikely(rx->dqo.used_buf_states.head == -1))
- return NULL;
-
- /* Used buf states are only usable when ref count reaches 0, which means
- * no SKBs refer to them.
- *
- * Search a limited number before giving up.
- */
- for (i = 0; i < 5; i++) {
- buf_state = gve_dequeue_buf_state(rx, &rx->dqo.used_buf_states);
- if (gve_buf_ref_cnt(buf_state) == 0) {
- rx->dqo.used_buf_states_cnt--;
- return buf_state;
- }
+ const u32 buffer_queue_slots = priv->rx_desc_cnt;
+ const u32 completion_queue_slots = priv->rx_desc_cnt;
- gve_enqueue_buf_state(rx, &rx->dqo.used_buf_states, buf_state);
+ /* Reset buffer queue */
+ if (rx->dqo.bufq.desc_ring) {
+ size = sizeof(rx->dqo.bufq.desc_ring[0]) *
+ buffer_queue_slots;
+ memset(rx->dqo.bufq.desc_ring, 0, size);
}
- /* For QPL, we cannot allocate any new buffers and must
- * wait for the existing ones to be available.
- */
- if (rx->dqo.qpl)
- return NULL;
-
- /* If there are no free buf states discard an entry from
- * `used_buf_states` so it can be used.
- */
- if (unlikely(rx->dqo.free_buf_states == -1)) {
- buf_state = gve_dequeue_buf_state(rx, &rx->dqo.used_buf_states);
- if (gve_buf_ref_cnt(buf_state) == 0)
- return buf_state;
-
- gve_free_page_dqo(rx->gve, buf_state, true);
- gve_free_buf_state(rx, buf_state);
+ /* Reset completion queue */
+ if (rx->dqo.complq.desc_ring) {
+ size = sizeof(rx->dqo.complq.desc_ring[0]) *
+ completion_queue_slots;
+ memset(rx->dqo.complq.desc_ring, 0, size);
}
- return NULL;
-}
-
-static int gve_alloc_page_dqo(struct gve_rx_ring *rx,
- struct gve_rx_buf_state_dqo *buf_state)
-{
- struct gve_priv *priv = rx->gve;
- u32 idx;
+ /* Reset q_resources */
+ if (rx->q_resources)
+ memset(rx->q_resources, 0, sizeof(*rx->q_resources));
- if (!rx->dqo.qpl) {
- int err;
+ /* Reset buf states */
+ if (rx->dqo.buf_states) {
+ for (i = 0; i < rx->dqo.num_buf_states; i++) {
+ struct gve_rx_buf_state_dqo *bs = &rx->dqo.buf_states[i];
- err = gve_alloc_page(priv, &priv->pdev->dev,
- &buf_state->page_info.page,
- &buf_state->addr,
- DMA_FROM_DEVICE, GFP_ATOMIC);
- if (err)
- return err;
- } else {
- idx = rx->dqo.next_qpl_page_idx;
- if (idx >= priv->rx_pages_per_qpl) {
- net_err_ratelimited("%s: Out of QPL pages\n",
- priv->dev->name);
- return -ENOMEM;
+ if (rx->dqo.page_pool)
+ gve_free_to_page_pool(rx, bs, false);
+ else
+ gve_free_qpl_page_dqo(bs);
}
- buf_state->page_info.page = rx->dqo.qpl->pages[idx];
- buf_state->addr = rx->dqo.qpl->page_buses[idx];
- rx->dqo.next_qpl_page_idx++;
}
- buf_state->page_info.page_offset = 0;
- buf_state->page_info.page_address =
- page_address(buf_state->page_info.page);
- buf_state->last_single_ref_offset = 0;
-
- /* The page already has 1 ref. */
- page_ref_add(buf_state->page_info.page, INT_MAX - 1);
- buf_state->page_info.pagecnt_bias = INT_MAX;
- return 0;
-}
-
-static void gve_rx_free_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx)
-{
- struct device *hdev = &priv->pdev->dev;
- int buf_count = rx->dqo.bufq.mask + 1;
-
- if (rx->dqo.hdr_bufs.data) {
- dma_free_coherent(hdev, priv->header_buf_size * buf_count,
- rx->dqo.hdr_bufs.data, rx->dqo.hdr_bufs.addr);
- rx->dqo.hdr_bufs.data = NULL;
- }
+ gve_rx_init_ring_state_dqo(rx, buffer_queue_slots,
+ completion_queue_slots);
}
void gve_rx_stop_ring_dqo(struct gve_priv *priv, int idx)
{
int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
+ struct gve_rx_ring *rx = &priv->rx[idx];
if (!gve_rx_was_added_to_block(priv, idx))
return;
+ if (rx->dqo.page_pool)
+ page_pool_disable_direct_recycling(rx->dqo.page_pool);
gve_remove_napi(priv, ntfy_idx);
gve_rx_remove_from_block(priv, idx);
+ gve_rx_reset_ring_dqo(priv, idx);
}
-static void gve_rx_free_ring_dqo(struct gve_priv *priv, struct gve_rx_ring *rx,
- struct gve_rx_alloc_rings_cfg *cfg)
+void gve_rx_free_ring_dqo(struct gve_priv *priv, struct gve_rx_ring *rx,
+ struct gve_rx_alloc_rings_cfg *cfg)
{
struct device *hdev = &priv->pdev->dev;
size_t completion_queue_slots;
size_t buffer_queue_slots;
int idx = rx->q_num;
size_t size;
+ u32 qpl_id;
int i;
completion_queue_slots = rx->dqo.complq.mask + 1;
@@ -243,12 +143,16 @@ static void gve_rx_free_ring_dqo(struct gve_priv *priv, struct gve_rx_ring *rx,
for (i = 0; i < rx->dqo.num_buf_states; i++) {
struct gve_rx_buf_state_dqo *bs = &rx->dqo.buf_states[i];
- /* Only free page for RDA. QPL pages are freed in gve_main. */
- if (bs->page_info.page)
- gve_free_page_dqo(priv, bs, !rx->dqo.qpl);
+
+ if (rx->dqo.page_pool)
+ gve_free_to_page_pool(rx, bs, false);
+ else
+ gve_free_qpl_page_dqo(bs);
}
+
if (rx->dqo.qpl) {
- gve_unassign_qpl(cfg->qpl_cfg, rx->dqo.qpl->id);
+ qpl_id = gve_get_rx_qpl_id(cfg->qcfg_tx, rx->q_num);
+ gve_free_queue_page_list(priv, rx->dqo.qpl, qpl_id);
rx->dqo.qpl = NULL;
}
@@ -270,15 +174,20 @@ static void gve_rx_free_ring_dqo(struct gve_priv *priv, struct gve_rx_ring *rx,
kvfree(rx->dqo.buf_states);
rx->dqo.buf_states = NULL;
+ if (rx->dqo.page_pool) {
+ page_pool_destroy(rx->dqo.page_pool);
+ rx->dqo.page_pool = NULL;
+ }
+
gve_rx_free_hdr_bufs(priv, rx);
netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx);
}
-static int gve_rx_alloc_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx)
+static int gve_rx_alloc_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx,
+ const u32 buf_count)
{
struct device *hdev = &priv->pdev->dev;
- int buf_count = rx->dqo.bufq.mask + 1;
rx->dqo.hdr_bufs.data = dma_alloc_coherent(hdev, priv->header_buf_size * buf_count,
&rx->dqo.hdr_bufs.addr, GFP_KERNEL);
@@ -296,17 +205,18 @@ void gve_rx_start_ring_dqo(struct gve_priv *priv, int idx)
gve_add_napi(priv, ntfy_idx, gve_napi_poll_dqo);
}
-static int gve_rx_alloc_ring_dqo(struct gve_priv *priv,
- struct gve_rx_alloc_rings_cfg *cfg,
- struct gve_rx_ring *rx,
- int idx)
+int gve_rx_alloc_ring_dqo(struct gve_priv *priv,
+ struct gve_rx_alloc_rings_cfg *cfg,
+ struct gve_rx_ring *rx,
+ int idx)
{
struct device *hdev = &priv->pdev->dev;
+ struct page_pool *pool;
+ int qpl_page_cnt;
size_t size;
- int i;
+ u32 qpl_id;
- const u32 buffer_queue_slots = cfg->raw_addressing ?
- priv->options_dqo_rda.rx_buff_ring_entries : cfg->ring_size;
+ const u32 buffer_queue_slots = cfg->ring_size;
const u32 completion_queue_slots = cfg->ring_size;
netif_dbg(priv, drv, priv->dev, "allocating rx ring DQO\n");
@@ -314,15 +224,18 @@ static int gve_rx_alloc_ring_dqo(struct gve_priv *priv,
memset(rx, 0, sizeof(*rx));
rx->gve = priv;
rx->q_num = idx;
- rx->dqo.bufq.mask = buffer_queue_slots - 1;
- rx->dqo.complq.num_free_slots = completion_queue_slots;
- rx->dqo.complq.mask = completion_queue_slots - 1;
- rx->ctx.skb_head = NULL;
- rx->ctx.skb_tail = NULL;
+ rx->packet_buffer_size = cfg->packet_buffer_size;
+
+ if (cfg->xdp) {
+ rx->packet_buffer_truesize = GVE_XDP_RX_BUFFER_SIZE_DQO;
+ rx->rx_headroom = XDP_PACKET_HEADROOM;
+ } else {
+ rx->packet_buffer_truesize = rx->packet_buffer_size;
+ rx->rx_headroom = 0;
+ }
- rx->dqo.num_buf_states = cfg->raw_addressing ?
- min_t(s16, S16_MAX, buffer_queue_slots * 4) :
- priv->rx_pages_per_qpl;
+ rx->dqo.num_buf_states = cfg->raw_addressing ? buffer_queue_slots :
+ gve_get_rx_pages_per_qpl_dqo(cfg->ring_size);
rx->dqo.buf_states = kvcalloc(rx->dqo.num_buf_states,
sizeof(rx->dqo.buf_states[0]),
GFP_KERNEL);
@@ -331,19 +244,9 @@ static int gve_rx_alloc_ring_dqo(struct gve_priv *priv,
/* Allocate header buffers for header-split */
if (cfg->enable_header_split)
- if (gve_rx_alloc_hdr_bufs(priv, rx))
+ if (gve_rx_alloc_hdr_bufs(priv, rx, buffer_queue_slots))
goto err;
- /* Set up linked list of buffer IDs */
- for (i = 0; i < rx->dqo.num_buf_states - 1; i++)
- rx->dqo.buf_states[i].next = i + 1;
-
- rx->dqo.buf_states[rx->dqo.num_buf_states - 1].next = -1;
- rx->dqo.recycled_buf_states.head = -1;
- rx->dqo.recycled_buf_states.tail = -1;
- rx->dqo.used_buf_states.head = -1;
- rx->dqo.used_buf_states.tail = -1;
-
/* Allocate RX completion queue */
size = sizeof(rx->dqo.complq.desc_ring[0]) *
completion_queue_slots;
@@ -359,8 +262,18 @@ static int gve_rx_alloc_ring_dqo(struct gve_priv *priv,
if (!rx->dqo.bufq.desc_ring)
goto err;
- if (!cfg->raw_addressing) {
- rx->dqo.qpl = gve_assign_rx_qpl(cfg, rx->q_num);
+ if (cfg->raw_addressing) {
+ pool = gve_rx_create_page_pool(priv, rx, cfg->xdp);
+ if (IS_ERR(pool))
+ goto err;
+
+ rx->dqo.page_pool = pool;
+ } else {
+ qpl_id = gve_get_rx_qpl_id(cfg->qcfg_tx, rx->q_num);
+ qpl_page_cnt = gve_get_rx_pages_per_qpl_dqo(cfg->ring_size);
+
+ rx->dqo.qpl = gve_alloc_queue_page_list(priv, qpl_id,
+ qpl_page_cnt);
if (!rx->dqo.qpl)
goto err;
rx->dqo.next_qpl_page_idx = 0;
@@ -371,6 +284,9 @@ static int gve_rx_alloc_ring_dqo(struct gve_priv *priv,
if (!rx->q_resources)
goto err;
+ gve_rx_init_ring_state_dqo(rx, buffer_queue_slots,
+ completion_queue_slots);
+
return 0;
err:
@@ -393,18 +309,12 @@ int gve_rx_alloc_rings_dqo(struct gve_priv *priv,
int err;
int i;
- if (!cfg->raw_addressing && !cfg->qpls) {
- netif_err(priv, drv, priv->dev,
- "Cannot alloc QPL ring before allocing QPLs\n");
- return -EINVAL;
- }
-
- rx = kvcalloc(cfg->qcfg->max_queues, sizeof(struct gve_rx_ring),
+ rx = kvcalloc(cfg->qcfg_rx->max_queues, sizeof(struct gve_rx_ring),
GFP_KERNEL);
if (!rx)
return -ENOMEM;
- for (i = 0; i < cfg->qcfg->num_queues; i++) {
+ for (i = 0; i < cfg->qcfg_rx->num_queues; i++) {
err = gve_rx_alloc_ring_dqo(priv, cfg, &rx[i], i);
if (err) {
netif_err(priv, drv, priv->dev,
@@ -433,7 +343,7 @@ void gve_rx_free_rings_dqo(struct gve_priv *priv,
if (!rx)
return;
- for (i = 0; i < cfg->qcfg->num_queues; i++)
+ for (i = 0; i < cfg->qcfg_rx->num_queues; i++)
gve_rx_free_ring_dqo(priv, &rx[i], cfg);
kvfree(rx);
@@ -455,26 +365,14 @@ void gve_rx_post_buffers_dqo(struct gve_rx_ring *rx)
num_avail_slots = min_t(u32, num_avail_slots, complq->num_free_slots);
while (num_posted < num_avail_slots) {
struct gve_rx_desc_dqo *desc = &bufq->desc_ring[bufq->tail];
- struct gve_rx_buf_state_dqo *buf_state;
-
- buf_state = gve_get_recycled_buf_state(rx);
- if (unlikely(!buf_state)) {
- buf_state = gve_alloc_buf_state(rx);
- if (unlikely(!buf_state))
- break;
-
- if (unlikely(gve_alloc_page_dqo(rx, buf_state))) {
- u64_stats_update_begin(&rx->statss);
- rx->rx_buf_alloc_fail++;
- u64_stats_update_end(&rx->statss);
- gve_free_buf_state(rx, buf_state);
- break;
- }
+
+ if (unlikely(gve_alloc_buffer(rx, desc))) {
+ u64_stats_update_begin(&rx->statss);
+ rx->rx_buf_alloc_fail++;
+ u64_stats_update_end(&rx->statss);
+ break;
}
- desc->buf_id = cpu_to_le16(buf_state - rx->dqo.buf_states);
- desc->buf_addr = cpu_to_le64(buf_state->addr +
- buf_state->page_info.page_offset);
if (rx->dqo.hdr_bufs.data)
desc->header_buf_addr =
cpu_to_le64(rx->dqo.hdr_bufs.addr +
@@ -491,48 +389,6 @@ void gve_rx_post_buffers_dqo(struct gve_rx_ring *rx)
rx->fill_cnt += num_posted;
}
-static void gve_try_recycle_buf(struct gve_priv *priv, struct gve_rx_ring *rx,
- struct gve_rx_buf_state_dqo *buf_state)
-{
- const u16 data_buffer_size = priv->data_buffer_size_dqo;
- int pagecount;
-
- /* Can't reuse if we only fit one buffer per page */
- if (data_buffer_size * 2 > PAGE_SIZE)
- goto mark_used;
-
- pagecount = gve_buf_ref_cnt(buf_state);
-
- /* Record the offset when we have a single remaining reference.
- *
- * When this happens, we know all of the other offsets of the page are
- * usable.
- */
- if (pagecount == 1) {
- buf_state->last_single_ref_offset =
- buf_state->page_info.page_offset;
- }
-
- /* Use the next buffer sized chunk in the page. */
- buf_state->page_info.page_offset += data_buffer_size;
- buf_state->page_info.page_offset &= (PAGE_SIZE - 1);
-
- /* If we wrap around to the same offset without ever dropping to 1
- * reference, then we don't know if this offset was ever freed.
- */
- if (buf_state->page_info.page_offset ==
- buf_state->last_single_ref_offset) {
- goto mark_used;
- }
-
- gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state);
- return;
-
-mark_used:
- gve_enqueue_buf_state(rx, &rx->dqo.used_buf_states, buf_state);
- rx->dqo.used_buf_states_cnt++;
-}
-
static void gve_rx_skb_csum(struct sk_buff *skb,
const struct gve_rx_compl_desc_dqo *desc,
struct gve_ptype ptype)
@@ -581,11 +437,13 @@ static void gve_rx_skb_hash(struct sk_buff *skb,
skb_set_hash(skb, le32_to_cpu(compl_desc->hash), hash_type);
}
-static void gve_rx_free_skb(struct gve_rx_ring *rx)
+static void gve_rx_free_skb(struct napi_struct *napi, struct gve_rx_ring *rx)
{
if (!rx->ctx.skb_head)
return;
+ if (rx->ctx.skb_head == napi->skb)
+ napi->skb = NULL;
dev_kfree_skb_any(rx->ctx.skb_head);
rx->ctx.skb_head = NULL;
rx->ctx.skb_tail = NULL;
@@ -628,6 +486,25 @@ static int gve_rx_copy_ondemand(struct gve_rx_ring *rx,
return 0;
}
+static void gve_skb_add_rx_frag(struct gve_rx_ring *rx,
+ struct gve_rx_buf_state_dqo *buf_state,
+ int num_frags, u16 buf_len)
+{
+ if (rx->dqo.page_pool) {
+ skb_add_rx_frag_netmem(rx->ctx.skb_tail, num_frags,
+ buf_state->page_info.netmem,
+ buf_state->page_info.page_offset +
+ buf_state->page_info.pad, buf_len,
+ buf_state->page_info.buf_size);
+ } else {
+ skb_add_rx_frag(rx->ctx.skb_tail, num_frags,
+ buf_state->page_info.page,
+ buf_state->page_info.page_offset +
+ buf_state->page_info.pad, buf_len,
+ buf_state->page_info.buf_size);
+ }
+}
+
/* Chains multi skbs for single rx packet.
* Returns 0 if buffer is appended, -1 otherwise.
*/
@@ -645,6 +522,9 @@ static int gve_rx_append_frags(struct napi_struct *napi,
if (!skb)
return -1;
+ if (rx->dqo.page_pool)
+ skb_mark_for_recycle(skb);
+
if (rx->ctx.skb_tail == rx->ctx.skb_head)
skb_shinfo(rx->ctx.skb_head)->frag_list = skb;
else
@@ -655,26 +535,41 @@ static int gve_rx_append_frags(struct napi_struct *napi,
if (rx->ctx.skb_tail != rx->ctx.skb_head) {
rx->ctx.skb_head->len += buf_len;
rx->ctx.skb_head->data_len += buf_len;
- rx->ctx.skb_head->truesize += priv->data_buffer_size_dqo;
+ rx->ctx.skb_head->truesize += buf_state->page_info.buf_size;
}
/* Trigger ondemand page allocation if we are running low on buffers */
if (gve_rx_should_trigger_copy_ondemand(rx))
return gve_rx_copy_ondemand(rx, buf_state, buf_len);
- skb_add_rx_frag(rx->ctx.skb_tail, num_frags,
- buf_state->page_info.page,
- buf_state->page_info.page_offset,
- buf_len, priv->data_buffer_size_dqo);
- gve_dec_pagecnt_bias(&buf_state->page_info);
-
- /* Advances buffer page-offset if page is partially used.
- * Marks buffer as used if page is full.
- */
- gve_try_recycle_buf(priv, rx, buf_state);
+ gve_skb_add_rx_frag(rx, buf_state, num_frags, buf_len);
+ gve_reuse_buffer(rx, buf_state);
return 0;
}
+static void gve_xdp_done_dqo(struct gve_priv *priv, struct gve_rx_ring *rx,
+ struct xdp_buff *xdp, struct bpf_prog *xprog,
+ int xdp_act,
+ struct gve_rx_buf_state_dqo *buf_state)
+{
+ u64_stats_update_begin(&rx->statss);
+ switch (xdp_act) {
+ case XDP_ABORTED:
+ case XDP_DROP:
+ default:
+ rx->xdp_actions[xdp_act]++;
+ break;
+ case XDP_TX:
+ rx->xdp_tx_errors++;
+ break;
+ case XDP_REDIRECT:
+ rx->xdp_redirect_errors++;
+ break;
+ }
+ u64_stats_update_end(&rx->statss);
+ gve_free_buffer(rx, buf_state);
+}
+
/* Returns 0 if descriptor is completed successfully.
* Returns -EINVAL if descriptor is invalid.
* Returns -ENOMEM if data cannot be copied to skb.
@@ -689,6 +584,7 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
const bool hsplit = compl_desc->split_header;
struct gve_rx_buf_state_dqo *buf_state;
struct gve_priv *priv = rx->gve;
+ struct bpf_prog *xprog;
u16 buf_len;
u16 hdr_len;
@@ -705,8 +601,7 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
}
if (unlikely(compl_desc->rx_error)) {
- gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states,
- buf_state);
+ gve_free_buffer(rx, buf_state);
return -EINVAL;
}
@@ -716,7 +611,12 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
/* Page might have not been used for awhile and was likely last written
* by a different thread.
*/
- prefetch(buf_state->page_info.page);
+ if (rx->dqo.page_pool) {
+ if (!netmem_is_net_iov(buf_state->page_info.netmem))
+ prefetch(netmem_to_page(buf_state->page_info.netmem));
+ } else {
+ prefetch(buf_state->page_info.page);
+ }
/* Copy the header into the skb in the case of header split */
if (hsplit) {
@@ -730,6 +630,9 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
if (unlikely(!rx->ctx.skb_head))
goto error;
rx->ctx.skb_tail = rx->ctx.skb_head;
+
+ if (rx->dqo.page_pool)
+ skb_mark_for_recycle(rx->ctx.skb_head);
} else {
unsplit = 1;
}
@@ -742,7 +645,8 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
/* Sync the portion of dma buffer for CPU to read. */
dma_sync_single_range_for_cpu(&priv->pdev->dev, buf_state->addr,
- buf_state->page_info.page_offset,
+ buf_state->page_info.page_offset +
+ buf_state->page_info.pad,
buf_len, DMA_FROM_DEVICE);
/* Append to current skb if one exists. */
@@ -754,6 +658,34 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
return 0;
}
+ xprog = READ_ONCE(priv->xdp_prog);
+ if (xprog) {
+ struct xdp_buff xdp;
+ void *old_data;
+ int xdp_act;
+
+ xdp_init_buff(&xdp, buf_state->page_info.buf_size,
+ &rx->xdp_rxq);
+ xdp_prepare_buff(&xdp,
+ buf_state->page_info.page_address +
+ buf_state->page_info.page_offset,
+ buf_state->page_info.pad,
+ buf_len, false);
+ old_data = xdp.data;
+ xdp_act = bpf_prog_run_xdp(xprog, &xdp);
+ buf_state->page_info.pad += xdp.data - old_data;
+ buf_len = xdp.data_end - xdp.data;
+ if (xdp_act != XDP_PASS) {
+ gve_xdp_done_dqo(priv, rx, &xdp, xprog, xdp_act,
+ buf_state);
+ return 0;
+ }
+
+ u64_stats_update_begin(&rx->statss);
+ rx->xdp_actions[XDP_PASS]++;
+ u64_stats_update_end(&rx->statss);
+ }
+
if (eop && buf_len <= priv->rx_copybreak) {
rx->ctx.skb_head = gve_rx_copy(priv->dev, napi,
&buf_state->page_info, buf_len);
@@ -766,8 +698,7 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
rx->rx_copybreak_pkt++;
u64_stats_update_end(&rx->statss);
- gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states,
- buf_state);
+ gve_free_buffer(rx, buf_state);
return 0;
}
@@ -782,16 +713,15 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
return 0;
}
- skb_add_rx_frag(rx->ctx.skb_head, 0, buf_state->page_info.page,
- buf_state->page_info.page_offset, buf_len,
- priv->data_buffer_size_dqo);
- gve_dec_pagecnt_bias(&buf_state->page_info);
+ if (rx->dqo.page_pool)
+ skb_mark_for_recycle(rx->ctx.skb_head);
- gve_try_recycle_buf(priv, rx, buf_state);
+ gve_skb_add_rx_frag(rx, buf_state, 0, buf_len);
+ gve_reuse_buffer(rx, buf_state);
return 0;
error:
- gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state);
+ gve_free_buffer(rx, buf_state);
return -ENOMEM;
}
@@ -884,7 +814,7 @@ int gve_rx_poll_dqo(struct gve_notify_block *block, int budget)
err = gve_rx_dqo(napi, rx, compl_desc, complq->head, rx->q_num);
if (err < 0) {
- gve_rx_free_skb(rx);
+ gve_rx_free_skb(napi, rx);
u64_stats_update_begin(&rx->statss);
if (err == -ENOMEM)
rx->rx_skb_alloc_fail++;
@@ -927,7 +857,7 @@ int gve_rx_poll_dqo(struct gve_notify_block *block, int budget)
/* gve_rx_complete_skb() will consume skb if successful */
if (gve_rx_complete_skb(rx, napi, compl_desc, feat) != 0) {
- gve_rx_free_skb(rx);
+ gve_rx_free_skb(napi, rx);
u64_stats_update_begin(&rx->statss);
rx->rx_desc_err_dropped_pkt++;
u64_stats_update_end(&rx->statss);
diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c
index 4b9853adc113..1b40bf0c811a 100644
--- a/drivers/net/ethernet/google/gve/gve_tx.c
+++ b/drivers/net/ethernet/google/gve/gve_tx.c
@@ -158,15 +158,16 @@ static int gve_clean_xdp_done(struct gve_priv *priv, struct gve_tx_ring *tx,
u32 to_do)
{
struct gve_tx_buffer_state *info;
- u32 clean_end = tx->done + to_do;
u64 pkts = 0, bytes = 0;
size_t space_freed = 0;
u32 xsk_complete = 0;
u32 idx;
+ int i;
- for (; tx->done < clean_end; tx->done++) {
+ for (i = 0; i < to_do; i++) {
idx = tx->done & tx->mask;
info = &tx->info[idx];
+ tx->done++;
if (unlikely(!info->xdp.size))
continue;
@@ -205,7 +206,10 @@ void gve_tx_stop_ring_gqi(struct gve_priv *priv, int idx)
return;
gve_remove_napi(priv, ntfy_idx);
- gve_clean_tx_done(priv, tx, priv->tx_desc_cnt, false);
+ if (tx->q_num < priv->tx_cfg.num_queues)
+ gve_clean_tx_done(priv, tx, priv->tx_desc_cnt, false);
+ else
+ gve_clean_xdp_done(priv, tx, priv->tx_desc_cnt);
netdev_tx_reset_queue(tx->netdev_txq);
gve_tx_remove_from_block(priv, idx);
}
@@ -216,6 +220,7 @@ static void gve_tx_free_ring_gqi(struct gve_priv *priv, struct gve_tx_ring *tx,
struct device *hdev = &priv->pdev->dev;
int idx = tx->q_num;
size_t bytes;
+ u32 qpl_id;
u32 slots;
slots = tx->mask + 1;
@@ -223,9 +228,12 @@ static void gve_tx_free_ring_gqi(struct gve_priv *priv, struct gve_tx_ring *tx,
tx->q_resources, tx->q_resources_bus);
tx->q_resources = NULL;
- if (!tx->raw_addressing) {
- gve_tx_fifo_release(priv, &tx->tx_fifo);
- gve_unassign_qpl(cfg->qpl_cfg, tx->tx_fifo.qpl->id);
+ if (tx->tx_fifo.qpl) {
+ if (tx->tx_fifo.base)
+ gve_tx_fifo_release(priv, &tx->tx_fifo);
+
+ qpl_id = gve_tx_qpl_id(priv, tx->q_num);
+ gve_free_queue_page_list(priv, tx->tx_fifo.qpl, qpl_id);
tx->tx_fifo.qpl = NULL;
}
@@ -256,6 +264,8 @@ static int gve_tx_alloc_ring_gqi(struct gve_priv *priv,
int idx)
{
struct device *hdev = &priv->pdev->dev;
+ int qpl_page_cnt;
+ u32 qpl_id = 0;
size_t bytes;
/* Make sure everything is zeroed to start */
@@ -280,9 +290,14 @@ static int gve_tx_alloc_ring_gqi(struct gve_priv *priv,
tx->raw_addressing = cfg->raw_addressing;
tx->dev = hdev;
if (!tx->raw_addressing) {
- tx->tx_fifo.qpl = gve_assign_tx_qpl(cfg, idx);
+ qpl_id = gve_tx_qpl_id(priv, tx->q_num);
+ qpl_page_cnt = priv->tx_pages_per_qpl;
+
+ tx->tx_fifo.qpl = gve_alloc_queue_page_list(priv, qpl_id,
+ qpl_page_cnt);
if (!tx->tx_fifo.qpl)
goto abort_with_desc;
+
/* map Tx FIFO */
if (gve_tx_fifo_init(priv, &tx->tx_fifo))
goto abort_with_qpl;
@@ -302,8 +317,10 @@ abort_with_fifo:
if (!tx->raw_addressing)
gve_tx_fifo_release(priv, &tx->tx_fifo);
abort_with_qpl:
- if (!tx->raw_addressing)
- gve_unassign_qpl(cfg->qpl_cfg, tx->tx_fifo.qpl->id);
+ if (!tx->raw_addressing) {
+ gve_free_queue_page_list(priv, tx->tx_fifo.qpl, qpl_id);
+ tx->tx_fifo.qpl = NULL;
+ }
abort_with_desc:
dma_free_coherent(hdev, bytes, tx->desc, tx->bus);
tx->desc = NULL;
@@ -317,33 +334,23 @@ int gve_tx_alloc_rings_gqi(struct gve_priv *priv,
struct gve_tx_alloc_rings_cfg *cfg)
{
struct gve_tx_ring *tx = cfg->tx;
+ int total_queues;
int err = 0;
int i, j;
- if (!cfg->raw_addressing && !cfg->qpls) {
- netif_err(priv, drv, priv->dev,
- "Cannot alloc QPL ring before allocing QPLs\n");
- return -EINVAL;
- }
-
- if (cfg->start_idx + cfg->num_rings > cfg->qcfg->max_queues) {
+ total_queues = cfg->qcfg->num_queues + cfg->num_xdp_rings;
+ if (total_queues > cfg->qcfg->max_queues) {
netif_err(priv, drv, priv->dev,
"Cannot alloc more than the max num of Tx rings\n");
return -EINVAL;
}
- if (cfg->start_idx == 0) {
- tx = kvcalloc(cfg->qcfg->max_queues, sizeof(struct gve_tx_ring),
- GFP_KERNEL);
- if (!tx)
- return -ENOMEM;
- } else if (!tx) {
- netif_err(priv, drv, priv->dev,
- "Cannot alloc tx rings from a nonzero start idx without tx array\n");
- return -EINVAL;
- }
+ tx = kvcalloc(cfg->qcfg->max_queues, sizeof(struct gve_tx_ring),
+ GFP_KERNEL);
+ if (!tx)
+ return -ENOMEM;
- for (i = cfg->start_idx; i < cfg->start_idx + cfg->num_rings; i++) {
+ for (i = 0; i < total_queues; i++) {
err = gve_tx_alloc_ring_gqi(priv, cfg, &tx[i], i);
if (err) {
netif_err(priv, drv, priv->dev,
@@ -359,8 +366,7 @@ int gve_tx_alloc_rings_gqi(struct gve_priv *priv,
cleanup:
for (j = 0; j < i; j++)
gve_tx_free_ring_gqi(priv, &tx[j], cfg);
- if (cfg->start_idx == 0)
- kvfree(tx);
+ kvfree(tx);
return err;
}
@@ -373,13 +379,11 @@ void gve_tx_free_rings_gqi(struct gve_priv *priv,
if (!tx)
return;
- for (i = cfg->start_idx; i < cfg->start_idx + cfg->num_rings; i++)
+ for (i = 0; i < cfg->qcfg->num_queues + cfg->qcfg->num_xdp_queues; i++)
gve_tx_free_ring_gqi(priv, &tx[i], cfg);
- if (cfg->start_idx == 0) {
- kvfree(tx);
- cfg->tx = NULL;
- }
+ kvfree(tx);
+ cfg->tx = NULL;
}
/* gve_tx_avail - Calculates the number of slots available in the ring
@@ -826,11 +830,14 @@ int gve_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
struct gve_tx_ring *tx;
int i, err = 0, qid;
- if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+ if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK) || !priv->xdp_prog)
return -EINVAL;
+ if (!gve_get_napi_enabled(priv))
+ return -ENETDOWN;
+
qid = gve_xdp_tx_queue_id(priv,
- smp_processor_id() % priv->num_xdp_queues);
+ smp_processor_id() % priv->tx_cfg.num_xdp_queues);
tx = &priv->tx[qid];
@@ -945,14 +952,10 @@ static int gve_xsk_tx(struct gve_priv *priv, struct gve_tx_ring *tx,
spin_lock(&tx->xdp_lock);
while (sent < budget) {
- if (!gve_can_tx(tx, GVE_TX_START_THRESH))
+ if (!gve_can_tx(tx, GVE_TX_START_THRESH) ||
+ !xsk_tx_peek_desc(tx->xsk_pool, &desc))
goto out;
- if (!xsk_tx_peek_desc(tx->xsk_pool, &desc)) {
- tx->xdp_xsk_done = tx->xdp_xsk_wakeup;
- goto out;
- }
-
data = xsk_buff_raw_get_data(tx->xsk_pool, desc.addr);
nsegs = gve_tx_fill_xdp(priv, tx, data, desc.len, NULL, true);
tx->req += nsegs;
@@ -967,33 +970,41 @@ out:
return sent;
}
+int gve_xsk_tx_poll(struct gve_notify_block *rx_block, int budget)
+{
+ struct gve_rx_ring *rx = rx_block->rx;
+ struct gve_priv *priv = rx->gve;
+ struct gve_tx_ring *tx;
+ int sent = 0;
+
+ tx = &priv->tx[gve_xdp_tx_queue_id(priv, rx->q_num)];
+ if (tx->xsk_pool) {
+ sent = gve_xsk_tx(priv, tx, budget);
+
+ u64_stats_update_begin(&tx->statss);
+ tx->xdp_xsk_sent += sent;
+ u64_stats_update_end(&tx->statss);
+ if (xsk_uses_need_wakeup(tx->xsk_pool))
+ xsk_set_tx_need_wakeup(tx->xsk_pool);
+ }
+
+ return sent;
+}
+
bool gve_xdp_poll(struct gve_notify_block *block, int budget)
{
struct gve_priv *priv = block->priv;
struct gve_tx_ring *tx = block->tx;
u32 nic_done;
- bool repoll;
u32 to_do;
/* Find out how much work there is to be done */
nic_done = gve_tx_load_event_counter(priv, tx);
to_do = min_t(u32, (nic_done - tx->done), budget);
gve_clean_xdp_done(priv, tx, to_do);
- repoll = nic_done != tx->done;
-
- if (tx->xsk_pool) {
- int sent = gve_xsk_tx(priv, tx, budget);
-
- u64_stats_update_begin(&tx->statss);
- tx->xdp_xsk_sent += sent;
- u64_stats_update_end(&tx->statss);
- repoll |= (sent == budget);
- if (xsk_uses_need_wakeup(tx->xsk_pool))
- xsk_set_tx_need_wakeup(tx->xsk_pool);
- }
/* If we still have work we want to repoll */
- return repoll;
+ return nic_done != tx->done;
}
bool gve_tx_poll(struct gve_notify_block *block, int budget)
diff --git a/drivers/net/ethernet/google/gve/gve_tx_dqo.c b/drivers/net/ethernet/google/gve/gve_tx_dqo.c
index bc34b6cd3a3e..a27f1574a733 100644
--- a/drivers/net/ethernet/google/gve/gve_tx_dqo.c
+++ b/drivers/net/ethernet/google/gve/gve_tx_dqo.c
@@ -209,6 +209,7 @@ static void gve_tx_free_ring_dqo(struct gve_priv *priv, struct gve_tx_ring *tx,
struct device *hdev = &priv->pdev->dev;
int idx = tx->q_num;
size_t bytes;
+ u32 qpl_id;
if (tx->q_resources) {
dma_free_coherent(hdev, sizeof(*tx->q_resources),
@@ -237,7 +238,8 @@ static void gve_tx_free_ring_dqo(struct gve_priv *priv, struct gve_tx_ring *tx,
tx->dqo.tx_qpl_buf_next = NULL;
if (tx->dqo.qpl) {
- gve_unassign_qpl(cfg->qpl_cfg, tx->dqo.qpl->id);
+ qpl_id = gve_tx_qpl_id(priv, tx->q_num);
+ gve_free_queue_page_list(priv, tx->dqo.qpl, qpl_id);
tx->dqo.qpl = NULL;
}
@@ -285,7 +287,9 @@ static int gve_tx_alloc_ring_dqo(struct gve_priv *priv,
{
struct device *hdev = &priv->pdev->dev;
int num_pending_packets;
+ int qpl_page_cnt;
size_t bytes;
+ u32 qpl_id;
int i;
memset(tx, 0, sizeof(*tx));
@@ -295,9 +299,7 @@ static int gve_tx_alloc_ring_dqo(struct gve_priv *priv,
/* Queue sizes must be a power of 2 */
tx->mask = cfg->ring_size - 1;
- tx->dqo.complq_mask = priv->queue_format == GVE_DQO_RDA_FORMAT ?
- priv->options_dqo_rda.tx_comp_ring_entries - 1 :
- tx->mask;
+ tx->dqo.complq_mask = tx->mask;
/* The max number of pending packets determines the maximum number of
* descriptors which maybe written to the completion queue.
@@ -354,7 +356,11 @@ static int gve_tx_alloc_ring_dqo(struct gve_priv *priv,
goto err;
if (!cfg->raw_addressing) {
- tx->dqo.qpl = gve_assign_tx_qpl(cfg, idx);
+ qpl_id = gve_tx_qpl_id(priv, tx->q_num);
+ qpl_page_cnt = priv->tx_pages_per_qpl;
+
+ tx->dqo.qpl = gve_alloc_queue_page_list(priv, qpl_id,
+ qpl_page_cnt);
if (!tx->dqo.qpl)
goto err;
@@ -373,33 +379,23 @@ int gve_tx_alloc_rings_dqo(struct gve_priv *priv,
struct gve_tx_alloc_rings_cfg *cfg)
{
struct gve_tx_ring *tx = cfg->tx;
+ int total_queues;
int err = 0;
int i, j;
- if (!cfg->raw_addressing && !cfg->qpls) {
- netif_err(priv, drv, priv->dev,
- "Cannot alloc QPL ring before allocing QPLs\n");
- return -EINVAL;
- }
-
- if (cfg->start_idx + cfg->num_rings > cfg->qcfg->max_queues) {
+ total_queues = cfg->qcfg->num_queues + cfg->num_xdp_rings;
+ if (total_queues > cfg->qcfg->max_queues) {
netif_err(priv, drv, priv->dev,
"Cannot alloc more than the max num of Tx rings\n");
return -EINVAL;
}
- if (cfg->start_idx == 0) {
- tx = kvcalloc(cfg->qcfg->max_queues, sizeof(struct gve_tx_ring),
- GFP_KERNEL);
- if (!tx)
- return -ENOMEM;
- } else if (!tx) {
- netif_err(priv, drv, priv->dev,
- "Cannot alloc tx rings from a nonzero start idx without tx array\n");
- return -EINVAL;
- }
+ tx = kvcalloc(cfg->qcfg->max_queues, sizeof(struct gve_tx_ring),
+ GFP_KERNEL);
+ if (!tx)
+ return -ENOMEM;
- for (i = cfg->start_idx; i < cfg->start_idx + cfg->num_rings; i++) {
+ for (i = 0; i < total_queues; i++) {
err = gve_tx_alloc_ring_dqo(priv, cfg, &tx[i], i);
if (err) {
netif_err(priv, drv, priv->dev,
@@ -415,8 +411,7 @@ int gve_tx_alloc_rings_dqo(struct gve_priv *priv,
err:
for (j = 0; j < i; j++)
gve_tx_free_ring_dqo(priv, &tx[j], cfg);
- if (cfg->start_idx == 0)
- kvfree(tx);
+ kvfree(tx);
return err;
}
@@ -429,13 +424,11 @@ void gve_tx_free_rings_dqo(struct gve_priv *priv,
if (!tx)
return;
- for (i = cfg->start_idx; i < cfg->start_idx + cfg->num_rings; i++)
+ for (i = 0; i < cfg->qcfg->num_queues + cfg->qcfg->num_xdp_queues; i++)
gve_tx_free_ring_dqo(priv, &tx[i], cfg);
- if (cfg->start_idx == 0) {
- kvfree(tx);
- cfg->tx = NULL;
- }
+ kvfree(tx);
+ cfg->tx = NULL;
}
/* Returns the number of slots available in the ring */
@@ -555,28 +548,18 @@ static int gve_prep_tso(struct sk_buff *skb)
if (unlikely(skb_shinfo(skb)->gso_size < GVE_TX_MIN_TSO_MSS_DQO))
return -1;
+ if (!(skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
+ return -EINVAL;
+
/* Needed because we will modify header. */
err = skb_cow_head(skb, 0);
if (err < 0)
return err;
tcp = tcp_hdr(skb);
-
- /* Remove payload length from checksum. */
paylen = skb->len - skb_transport_offset(skb);
-
- switch (skb_shinfo(skb)->gso_type) {
- case SKB_GSO_TCPV4:
- case SKB_GSO_TCPV6:
- csum_replace_by_diff(&tcp->check,
- (__force __wsum)htonl(paylen));
-
- /* Compute length of segmentation header. */
- header_len = skb_tcp_all_headers(skb);
- break;
- default:
- return -EINVAL;
- }
+ csum_replace_by_diff(&tcp->check, (__force __wsum)htonl(paylen));
+ header_len = skb_tcp_all_headers(skb);
if (unlikely(header_len > GVE_TX_MAX_HDR_SIZE_DQO))
return -EINVAL;
@@ -677,7 +660,8 @@ static int gve_tx_add_skb_no_copy_dqo(struct gve_tx_ring *tx,
goto err;
dma_unmap_len_set(pkt, len[pkt->num_bufs], len);
- dma_unmap_addr_set(pkt, dma[pkt->num_bufs], addr);
+ netmem_dma_unmap_addr_set(skb_frag_netmem(frag), pkt,
+ dma[pkt->num_bufs], addr);
++pkt->num_bufs;
gve_tx_fill_pkt_desc_dqo(tx, desc_idx, skb, len, addr,
@@ -876,22 +860,42 @@ static bool gve_can_send_tso(const struct sk_buff *skb)
const int header_len = skb_tcp_all_headers(skb);
const int gso_size = shinfo->gso_size;
int cur_seg_num_bufs;
+ int prev_frag_size;
int cur_seg_size;
int i;
cur_seg_size = skb_headlen(skb) - header_len;
+ prev_frag_size = skb_headlen(skb);
cur_seg_num_bufs = cur_seg_size > 0;
for (i = 0; i < shinfo->nr_frags; i++) {
if (cur_seg_size >= gso_size) {
cur_seg_size %= gso_size;
cur_seg_num_bufs = cur_seg_size > 0;
+
+ if (prev_frag_size > GVE_TX_MAX_BUF_SIZE_DQO) {
+ int prev_frag_remain = prev_frag_size %
+ GVE_TX_MAX_BUF_SIZE_DQO;
+
+ /* If the last descriptor of the previous frag
+ * is less than cur_seg_size, the segment will
+ * span two descriptors in the previous frag.
+ * Since max gso size (9728) is less than
+ * GVE_TX_MAX_BUF_SIZE_DQO, it is impossible
+ * for the segment to span more than two
+ * descriptors.
+ */
+ if (prev_frag_remain &&
+ cur_seg_size > prev_frag_remain)
+ cur_seg_num_bufs++;
+ }
}
if (unlikely(++cur_seg_num_bufs > max_bufs_per_seg))
return false;
- cur_seg_size += skb_frag_size(&shinfo->frags[i]);
+ prev_frag_size = skb_frag_size(&shinfo->frags[i]);
+ cur_seg_size += prev_frag_size;
}
return true;
@@ -1035,8 +1039,9 @@ static void gve_unmap_packet(struct device *dev,
dma_unmap_single(dev, dma_unmap_addr(pkt, dma[0]),
dma_unmap_len(pkt, len[0]), DMA_TO_DEVICE);
for (i = 1; i < pkt->num_bufs; i++) {
- dma_unmap_page(dev, dma_unmap_addr(pkt, dma[i]),
- dma_unmap_len(pkt, len[i]), DMA_TO_DEVICE);
+ netmem_dma_unmap_page_attrs(dev, dma_unmap_addr(pkt, dma[i]),
+ dma_unmap_len(pkt, len[i]),
+ DMA_TO_DEVICE, 0);
}
pkt->num_bufs = 0;
}
@@ -1136,8 +1141,7 @@ static void gve_handle_miss_completion(struct gve_priv *priv,
/* jiffies can wraparound but time comparisons can handle overflows. */
pending_packet->timeout_jiffies =
jiffies +
- msecs_to_jiffies(GVE_REINJECT_COMPL_TIMEOUT *
- MSEC_PER_SEC);
+ secs_to_jiffies(GVE_REINJECT_COMPL_TIMEOUT);
add_to_list(tx, &tx->dqo_compl.miss_completions, pending_packet);
*bytes += pending_packet->skb->len;
@@ -1181,8 +1185,7 @@ static void remove_miss_completions(struct gve_priv *priv,
pending_packet->state = GVE_PACKET_STATE_TIMED_OUT_COMPL;
pending_packet->timeout_jiffies =
jiffies +
- msecs_to_jiffies(GVE_DEALLOCATE_COMPL_TIMEOUT *
- MSEC_PER_SEC);
+ secs_to_jiffies(GVE_DEALLOCATE_COMPL_TIMEOUT);
/* Maintain pending packet in another list so the packet can be
* unallocated at a later time.
*/
diff --git a/drivers/net/ethernet/google/gve/gve_utils.c b/drivers/net/ethernet/google/gve/gve_utils.c
index 2349750075a5..ace9b8698021 100644
--- a/drivers/net/ethernet/google/gve/gve_utils.c
+++ b/drivers/net/ethernet/google/gve/gve_utils.c
@@ -110,12 +110,13 @@ void gve_add_napi(struct gve_priv *priv, int ntfy_idx,
{
struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
- netif_napi_add(priv->dev, &block->napi, gve_poll);
+ netif_napi_add_locked(priv->dev, &block->napi, gve_poll);
+ netif_napi_set_irq_locked(&block->napi, block->irq);
}
void gve_remove_napi(struct gve_priv *priv, int ntfy_idx)
{
struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
- netif_napi_del(&block->napi);
+ netif_napi_del_locked(&block->napi);
}