summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/google
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/ethernet/google')
-rw-r--r--drivers/net/ethernet/google/Kconfig1
-rw-r--r--drivers/net/ethernet/google/gve/Makefile3
-rw-r--r--drivers/net/ethernet/google/gve/gve.h361
-rw-r--r--drivers/net/ethernet/google/gve/gve_adminq.c715
-rw-r--r--drivers/net/ethernet/google/gve/gve_adminq.h225
-rw-r--r--drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c316
-rw-r--r--drivers/net/ethernet/google/gve/gve_dqo.h24
-rw-r--r--drivers/net/ethernet/google/gve/gve_ethtool.c424
-rw-r--r--drivers/net/ethernet/google/gve/gve_flow_rule.c298
-rw-r--r--drivers/net/ethernet/google/gve/gve_main.c1506
-rw-r--r--drivers/net/ethernet/google/gve/gve_rx.c255
-rw-r--r--drivers/net/ethernet/google/gve/gve_rx_dqo.c603
-rw-r--r--drivers/net/ethernet/google/gve/gve_tx.c193
-rw-r--r--drivers/net/ethernet/google/gve/gve_tx_dqo.c165
-rw-r--r--drivers/net/ethernet/google/gve/gve_utils.c49
-rw-r--r--drivers/net/ethernet/google/gve/gve_utils.h8
16 files changed, 3776 insertions, 1370 deletions
diff --git a/drivers/net/ethernet/google/Kconfig b/drivers/net/ethernet/google/Kconfig
index 8641a00f8e63..564862a57124 100644
--- a/drivers/net/ethernet/google/Kconfig
+++ b/drivers/net/ethernet/google/Kconfig
@@ -18,6 +18,7 @@ if NET_VENDOR_GOOGLE
config GVE
tristate "Google Virtual NIC (gVNIC) support"
depends on (PCI_MSI && (X86 || CPU_LITTLE_ENDIAN))
+ select PAGE_POOL
help
This driver supports Google Virtual NIC (gVNIC)"
diff --git a/drivers/net/ethernet/google/gve/Makefile b/drivers/net/ethernet/google/gve/Makefile
index b9a6be76531b..4520f1c07a63 100644
--- a/drivers/net/ethernet/google/gve/Makefile
+++ b/drivers/net/ethernet/google/gve/Makefile
@@ -1,4 +1,5 @@
# Makefile for the Google virtual Ethernet (gve) driver
obj-$(CONFIG_GVE) += gve.o
-gve-objs := gve_main.o gve_tx.o gve_tx_dqo.o gve_rx.o gve_rx_dqo.o gve_ethtool.o gve_adminq.o gve_utils.o
+gve-objs := gve_main.o gve_tx.o gve_tx_dqo.o gve_rx.o gve_rx_dqo.o gve_ethtool.o gve_adminq.o gve_utils.o gve_flow_rule.o \
+ gve_buffer_mgmt_dqo.o
diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h
index b80349154604..2fab38c8ee78 100644
--- a/drivers/net/ethernet/google/gve/gve.h
+++ b/drivers/net/ethernet/google/gve/gve.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: (GPL-2.0 OR MIT)
* Google virtual Ethernet (gve) driver
*
- * Copyright (C) 2015-2021 Google, Inc.
+ * Copyright (C) 2015-2024 Google LLC
*/
#ifndef _GVE_H_
@@ -9,9 +9,11 @@
#include <linux/dma-mapping.h>
#include <linux/dmapool.h>
+#include <linux/ethtool_netlink.h>
#include <linux/netdevice.h>
#include <linux/pci.h>
#include <linux/u64_stats_sync.h>
+#include <net/page_pool/helpers.h>
#include <net/xdp.h>
#include "gve_desc.h"
@@ -49,16 +51,35 @@
/* PTYPEs are always 10 bits. */
#define GVE_NUM_PTYPES 1024
+/* Default minimum ring size */
+#define GVE_DEFAULT_MIN_TX_RING_SIZE 256
+#define GVE_DEFAULT_MIN_RX_RING_SIZE 512
+
#define GVE_DEFAULT_RX_BUFFER_SIZE 2048
+#define GVE_MAX_RX_BUFFER_SIZE 4096
+
+#define GVE_XDP_RX_BUFFER_SIZE_DQO 4096
+
#define GVE_DEFAULT_RX_BUFFER_OFFSET 2048
+#define GVE_PAGE_POOL_SIZE_MULTIPLIER 4
+
+#define GVE_FLOW_RULES_CACHE_SIZE \
+ (GVE_ADMINQ_BUFFER_SIZE / sizeof(struct gve_adminq_queried_flow_rule))
+#define GVE_FLOW_RULE_IDS_CACHE_SIZE \
+ (GVE_ADMINQ_BUFFER_SIZE / sizeof(((struct gve_adminq_queried_flow_rule *)0)->location))
+
+#define GVE_RSS_KEY_SIZE 40
+#define GVE_RSS_INDIR_SIZE 128
+
#define GVE_XDP_ACTIONS 5
#define GVE_GQ_TX_MIN_PKT_DESC_BYTES 182
+#define GVE_DEFAULT_HEADER_BUFFER_SIZE 128
+
#define DQO_QPL_DEFAULT_TX_PAGES 512
-#define DQO_QPL_DEFAULT_RX_PAGES 2048
/* Maximum TSO size supported on DQO */
#define GVE_DQO_TX_MAX 0x3FFFF
@@ -86,9 +107,16 @@ struct gve_rx_desc_queue {
/* The page info for a single slot in the RX data queue */
struct gve_rx_slot_page_info {
- struct page *page;
+ /* netmem is used for DQO RDA mode
+ * page is used in all other modes
+ */
+ union {
+ struct page *page;
+ netmem_ref netmem;
+ };
void *page_address;
u32 page_offset; /* offset to write to in page */
+ unsigned int buf_size;
int pagecnt_bias; /* expected pagecnt if only the driver has a ref */
u16 pad; /* adjustment for rx padding */
u8 can_flip; /* tracks if the networking stack is using the page */
@@ -150,6 +178,11 @@ struct gve_rx_compl_queue_dqo {
u32 mask; /* Mask for indices to the size of the ring */
};
+struct gve_header_buf {
+ u8 *data;
+ dma_addr_t addr;
+};
+
/* Stores state for tracking buffers posted to HW */
struct gve_rx_buf_state_dqo {
/* The page posted to HW. */
@@ -196,6 +229,11 @@ struct gve_rx_cnts {
/* Contains datapath state used to represent an RX queue. */
struct gve_rx_ring {
struct gve_priv *gve;
+
+ u16 packet_buffer_size; /* Size of buffer posted to NIC */
+ u16 packet_buffer_truesize; /* Total size of RX buffer */
+ u16 rx_headroom;
+
union {
/* GQI fields */
struct {
@@ -204,7 +242,6 @@ struct gve_rx_ring {
/* threshold for posting new buffs and descs */
u32 db_threshold;
- u16 packet_buffer_size;
u32 qpl_copy_pool_mask;
u32 qpl_copy_pool_head;
@@ -252,19 +289,28 @@ struct gve_rx_ring {
/* track number of used buffers */
u16 used_buf_states_cnt;
+
+ /* Address info of the buffers for header-split */
+ struct gve_header_buf hdr_bufs;
+
+ struct page_pool *page_pool;
} dqo;
};
u64 rbytes; /* free-running bytes received */
+ u64 rx_hsplit_bytes; /* free-running header bytes received */
u64 rpackets; /* free-running packets received */
u32 cnt; /* free-running total number of completed packets */
u32 fill_cnt; /* free-running total number of descs and buffs posted */
u32 mask; /* masks the cnt and fill_cnt to the size of the ring */
+ u64 rx_hsplit_pkt; /* free-running packets with headers split */
u64 rx_copybreak_pkt; /* free-running count of copybreak packets */
u64 rx_copied_pkt; /* free-running total number of copied packets */
u64 rx_skb_alloc_fail; /* free-running count of skb alloc fails */
u64 rx_buf_alloc_fail; /* free-running count of buffer alloc fails */
u64 rx_desc_err_dropped_pkt; /* free-running count of packets dropped by descriptor error */
+ /* free-running count of unsplit packets due to header buffer overflow or hdr_len is 0 */
+ u64 rx_hsplit_unsplit_pkt;
u64 rx_cont_packet_cnt; /* free-running multi-fragment packets received */
u64 rx_frag_flip_cnt; /* free-running count of rx segments where page_flip was used */
u64 rx_frag_copy_cnt; /* free-running count of rx segments copied */
@@ -573,8 +619,6 @@ struct gve_tx_ring {
dma_addr_t complq_bus_dqo; /* dma address of the dqo.compl_ring */
struct u64_stats_sync statss; /* sync stats for 32bit archs */
struct xsk_buff_pool *xsk_pool;
- u32 xdp_xsk_wakeup;
- u32 xdp_xsk_done;
u64 xdp_xsk_sent;
u64 xdp_xmit;
u64 xdp_xmit_errors;
@@ -590,12 +634,21 @@ struct gve_notify_block {
struct gve_priv *priv;
struct gve_tx_ring *tx; /* tx rings on this block */
struct gve_rx_ring *rx; /* rx rings on this block */
+ u32 irq;
+};
+
+/* Tracks allowed and current rx queue settings */
+struct gve_rx_queue_config {
+ u16 max_queues;
+ u16 num_queues;
+ u16 packet_buffer_size;
};
-/* Tracks allowed and current queue settings */
-struct gve_queue_config {
+/* Tracks allowed and current tx queue settings */
+struct gve_tx_queue_config {
u16 max_queues;
- u16 num_queues; /* current */
+ u16 num_queues; /* number of TX queues, excluding XDP queues */
+ u16 num_xdp_queues;
};
/* Tracks the available and used qpl IDs */
@@ -604,11 +657,6 @@ struct gve_qpl_config {
unsigned long *qpl_id_map; /* bitmap of used qpl ids */
};
-struct gve_options_dqo_rda {
- u16 tx_comp_ring_entries; /* number of tx_comp descriptors */
- u16 rx_buff_ring_entries; /* number of rx_buff descriptors */
-};
-
struct gve_irq_db {
__be32 index;
} ____cacheline_aligned;
@@ -622,6 +670,36 @@ struct gve_ptype_lut {
struct gve_ptype ptypes[GVE_NUM_PTYPES];
};
+/* Parameters for allocating resources for tx queues */
+struct gve_tx_alloc_rings_cfg {
+ struct gve_tx_queue_config *qcfg;
+
+ u16 num_xdp_rings;
+
+ u16 ring_size;
+ bool raw_addressing;
+
+ /* Allocated resources are returned here */
+ struct gve_tx_ring *tx;
+};
+
+/* Parameters for allocating resources for rx queues */
+struct gve_rx_alloc_rings_cfg {
+ /* tx config is also needed to determine QPL ids */
+ struct gve_rx_queue_config *qcfg_rx;
+ struct gve_tx_queue_config *qcfg_tx;
+
+ u16 ring_size;
+ u16 packet_buffer_size;
+ bool raw_addressing;
+ bool enable_header_split;
+ bool reset_rss;
+ bool xdp;
+
+ /* Allocated resources are returned here */
+ struct gve_rx_ring *rx;
+};
+
/* GVE_QUEUE_FORMAT_UNSPECIFIED must be zero since 0 is the default value
* when the entire configure_device_resources command is zeroed out and the
* queue_format is not specified.
@@ -634,11 +712,48 @@ enum gve_queue_format {
GVE_DQO_QPL_FORMAT = 0x4,
};
+struct gve_flow_spec {
+ __be32 src_ip[4];
+ __be32 dst_ip[4];
+ union {
+ struct {
+ __be16 src_port;
+ __be16 dst_port;
+ };
+ __be32 spi;
+ };
+ union {
+ u8 tos;
+ u8 tclass;
+ };
+};
+
+struct gve_flow_rule {
+ u32 location;
+ u16 flow_type;
+ u16 action;
+ struct gve_flow_spec key;
+ struct gve_flow_spec mask;
+};
+
+struct gve_flow_rules_cache {
+ bool rules_cache_synced; /* False if the driver's rules_cache is outdated */
+ struct gve_adminq_queried_flow_rule *rules_cache;
+ __be32 *rule_ids_cache;
+ /* The total number of queried rules that stored in the caches */
+ u32 rules_cache_num;
+ u32 rule_ids_cache_num;
+};
+
+struct gve_rss_config {
+ u8 *hash_key;
+ u32 *hash_lut;
+};
+
struct gve_priv {
struct net_device *dev;
struct gve_tx_ring *tx; /* array of tx_cfg.num_queues */
struct gve_rx_ring *rx; /* array of rx_cfg.num_queues */
- struct gve_queue_page_list *qpls; /* array of num qpls */
struct gve_notify_block *ntfy_blocks; /* array of num_ntfy_blks */
struct gve_irq_db *irq_db_indices; /* array of num_ntfy_blks */
dma_addr_t irq_db_indices_bus;
@@ -651,19 +766,21 @@ struct gve_priv {
u16 num_event_counters;
u16 tx_desc_cnt; /* num desc per ring */
u16 rx_desc_cnt; /* num desc per ring */
+ u16 max_tx_desc_cnt;
+ u16 max_rx_desc_cnt;
+ u16 min_tx_desc_cnt;
+ u16 min_rx_desc_cnt;
+ bool modify_ring_size_enabled;
+ bool default_min_ring_size;
u16 tx_pages_per_qpl; /* Suggested number of pages per qpl for TX queues by NIC */
- u16 rx_pages_per_qpl; /* Suggested number of pages per qpl for RX queues by NIC */
- u16 rx_data_slot_cnt; /* rx buffer length */
u64 max_registered_pages;
u64 num_registered_pages; /* num pages registered with NIC */
struct bpf_prog *xdp_prog; /* XDP BPF program */
u32 rx_copybreak; /* copy packets smaller than this */
u16 default_num_queues; /* default num queues to set up */
- u16 num_xdp_queues;
- struct gve_queue_config tx_cfg;
- struct gve_queue_config rx_cfg;
- struct gve_qpl_config qpl_cfg; /* map used QPL ids */
+ struct gve_tx_queue_config tx_cfg;
+ struct gve_rx_queue_config rx_cfg;
u32 num_ntfy_blks; /* spilt between TX and RX so must be even */
struct gve_registers __iomem *reg_bar0; /* see gve_register.h */
@@ -678,6 +795,7 @@ struct gve_priv {
union gve_adminq_command *adminq;
dma_addr_t adminq_bus_addr;
struct dma_pool *adminq_pool;
+ struct mutex adminq_lock; /* Protects adminq command execution */
u32 adminq_mask; /* masks prod_cnt to adminq size */
u32 adminq_prod_cnt; /* free-running count of AQ cmds executed */
u32 adminq_cmd_fail; /* free-running count of AQ cmds failed */
@@ -697,6 +815,10 @@ struct gve_priv {
u32 adminq_report_link_speed_cnt;
u32 adminq_get_ptype_map_cnt;
u32 adminq_verify_driver_compatibility_cnt;
+ u32 adminq_query_flow_rules_cnt;
+ u32 adminq_cfg_flow_rule_cnt;
+ u32 adminq_cfg_rss_cnt;
+ u32 adminq_query_rss_cnt;
/* Global stats */
u32 interface_up_cnt; /* count of times interface turned up since last reset */
@@ -725,17 +847,29 @@ struct gve_priv {
u64 link_speed;
bool up_before_suspend; /* True if dev was up before suspend */
- struct gve_options_dqo_rda options_dqo_rda;
struct gve_ptype_lut *ptype_lut_dqo;
/* Must be a power of two. */
- int data_buffer_size_dqo;
+ u16 max_rx_buffer_size; /* device limit */
enum gve_queue_format queue_format;
/* Interrupt coalescing settings */
u32 tx_coalesce_usecs;
u32 rx_coalesce_usecs;
+
+ u16 header_buf_size; /* device configured, header-split supported if non-zero */
+ bool header_split_enabled; /* True if the header split is enabled by the user */
+
+ u32 max_flow_rules;
+ u32 num_flow_rules;
+
+ struct gve_flow_rules_cache flow_rules_cache;
+
+ u16 rss_key_size;
+ u16 rss_lut_size;
+ bool cache_rss_config;
+ struct gve_rss_config rss_config;
};
enum gve_service_task_flags_bit {
@@ -917,34 +1051,22 @@ static inline bool gve_is_qpl(struct gve_priv *priv)
priv->queue_format == GVE_DQO_QPL_FORMAT;
}
-/* Returns the number of tx queue page lists
- */
-static inline u32 gve_num_tx_qpls(struct gve_priv *priv)
-{
- if (!gve_is_qpl(priv))
- return 0;
-
- return priv->tx_cfg.num_queues + priv->num_xdp_queues;
-}
-
-/* Returns the number of XDP tx queue page lists
- */
-static inline u32 gve_num_xdp_qpls(struct gve_priv *priv)
+/* Returns the number of tx queue page lists */
+static inline u32 gve_num_tx_qpls(const struct gve_tx_queue_config *tx_cfg,
+ bool is_qpl)
{
- if (priv->queue_format != GVE_GQI_QPL_FORMAT)
+ if (!is_qpl)
return 0;
-
- return priv->num_xdp_queues;
+ return tx_cfg->num_queues + tx_cfg->num_xdp_queues;
}
-/* Returns the number of rx queue page lists
- */
-static inline u32 gve_num_rx_qpls(struct gve_priv *priv)
+/* Returns the number of rx queue page lists */
+static inline u32 gve_num_rx_qpls(const struct gve_rx_queue_config *rx_cfg,
+ bool is_qpl)
{
- if (!gve_is_qpl(priv))
+ if (!is_qpl)
return 0;
-
- return priv->rx_cfg.num_queues;
+ return rx_cfg->num_queues;
}
static inline u32 gve_tx_qpl_id(struct gve_priv *priv, int tx_qid)
@@ -957,59 +1079,35 @@ static inline u32 gve_rx_qpl_id(struct gve_priv *priv, int rx_qid)
return priv->tx_cfg.max_queues + rx_qid;
}
-static inline u32 gve_tx_start_qpl_id(struct gve_priv *priv)
-{
- return gve_tx_qpl_id(priv, 0);
-}
-
-static inline u32 gve_rx_start_qpl_id(struct gve_priv *priv)
+static inline u32 gve_get_rx_qpl_id(const struct gve_tx_queue_config *tx_cfg,
+ int rx_qid)
{
- return gve_rx_qpl_id(priv, 0);
+ return tx_cfg->max_queues + rx_qid;
}
-/* Returns a pointer to the next available tx qpl in the list of qpls
- */
-static inline
-struct gve_queue_page_list *gve_assign_tx_qpl(struct gve_priv *priv, int tx_qid)
+static inline u32 gve_tx_start_qpl_id(struct gve_priv *priv)
{
- int id = gve_tx_qpl_id(priv, tx_qid);
-
- /* QPL already in use */
- if (test_bit(id, priv->qpl_cfg.qpl_id_map))
- return NULL;
-
- set_bit(id, priv->qpl_cfg.qpl_id_map);
- return &priv->qpls[id];
+ return gve_tx_qpl_id(priv, 0);
}
-/* Returns a pointer to the next available rx qpl in the list of qpls
- */
-static inline
-struct gve_queue_page_list *gve_assign_rx_qpl(struct gve_priv *priv, int rx_qid)
+static inline u32 gve_rx_start_qpl_id(const struct gve_tx_queue_config *tx_cfg)
{
- int id = gve_rx_qpl_id(priv, rx_qid);
-
- /* QPL already in use */
- if (test_bit(id, priv->qpl_cfg.qpl_id_map))
- return NULL;
-
- set_bit(id, priv->qpl_cfg.qpl_id_map);
- return &priv->qpls[id];
+ return gve_get_rx_qpl_id(tx_cfg, 0);
}
-/* Unassigns the qpl with the given id
- */
-static inline void gve_unassign_qpl(struct gve_priv *priv, int id)
+static inline u32 gve_get_rx_pages_per_qpl_dqo(u32 rx_desc_cnt)
{
- clear_bit(id, priv->qpl_cfg.qpl_id_map);
+ /* For DQO, page count should be more than ring size for
+ * out-of-order completions. Set it to two times of ring size.
+ */
+ return 2 * rx_desc_cnt;
}
-/* Returns the correct dma direction for tx and rx qpls
- */
+/* Returns the correct dma direction for tx and rx qpls */
static inline enum dma_data_direction gve_qpl_dma_dir(struct gve_priv *priv,
int id)
{
- if (id < gve_rx_start_qpl_id(priv))
+ if (id < gve_rx_start_qpl_id(&priv->tx_cfg))
return DMA_TO_DEVICE;
else
return DMA_FROM_DEVICE;
@@ -1023,7 +1121,7 @@ static inline bool gve_is_gqi(struct gve_priv *priv)
static inline u32 gve_num_tx_queues(struct gve_priv *priv)
{
- return priv->tx_cfg.num_queues + priv->num_xdp_queues;
+ return priv->tx_cfg.num_queues + priv->tx_cfg.num_xdp_queues;
}
static inline u32 gve_xdp_tx_queue_id(struct gve_priv *priv, u32 queue_id)
@@ -1036,12 +1134,31 @@ static inline u32 gve_xdp_tx_start_queue_id(struct gve_priv *priv)
return gve_xdp_tx_queue_id(priv, 0);
}
+static inline bool gve_supports_xdp_xmit(struct gve_priv *priv)
+{
+ switch (priv->queue_format) {
+ case GVE_GQI_QPL_FORMAT:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/* gqi napi handler defined in gve_main.c */
+int gve_napi_poll(struct napi_struct *napi, int budget);
+
/* buffers */
int gve_alloc_page(struct gve_priv *priv, struct device *dev,
struct page **page, dma_addr_t *dma,
enum dma_data_direction, gfp_t gfp_flags);
void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma,
enum dma_data_direction);
+/* qpls */
+struct gve_queue_page_list *gve_alloc_queue_page_list(struct gve_priv *priv,
+ u32 id, int pages);
+void gve_free_queue_page_list(struct gve_priv *priv,
+ struct gve_queue_page_list *qpl,
+ u32 id);
/* tx handling */
netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev);
int gve_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
@@ -1051,8 +1168,13 @@ int gve_xdp_xmit_one(struct gve_priv *priv, struct gve_tx_ring *tx,
void gve_xdp_tx_flush(struct gve_priv *priv, u32 xdp_qid);
bool gve_tx_poll(struct gve_notify_block *block, int budget);
bool gve_xdp_poll(struct gve_notify_block *block, int budget);
-int gve_tx_alloc_rings(struct gve_priv *priv, int start_id, int num_rings);
-void gve_tx_free_rings_gqi(struct gve_priv *priv, int start_id, int num_rings);
+int gve_xsk_tx_poll(struct gve_notify_block *block, int budget);
+int gve_tx_alloc_rings_gqi(struct gve_priv *priv,
+ struct gve_tx_alloc_rings_cfg *cfg);
+void gve_tx_free_rings_gqi(struct gve_priv *priv,
+ struct gve_tx_alloc_rings_cfg *cfg);
+void gve_tx_start_ring_gqi(struct gve_priv *priv, int idx);
+void gve_tx_stop_ring_gqi(struct gve_priv *priv, int idx);
u32 gve_tx_load_event_counter(struct gve_priv *priv,
struct gve_tx_ring *tx);
bool gve_tx_clean_pending(struct gve_priv *priv, struct gve_tx_ring *tx);
@@ -1060,14 +1182,73 @@ bool gve_tx_clean_pending(struct gve_priv *priv, struct gve_tx_ring *tx);
void gve_rx_write_doorbell(struct gve_priv *priv, struct gve_rx_ring *rx);
int gve_rx_poll(struct gve_notify_block *block, int budget);
bool gve_rx_work_pending(struct gve_rx_ring *rx);
-int gve_rx_alloc_rings(struct gve_priv *priv);
-void gve_rx_free_rings_gqi(struct gve_priv *priv);
+int gve_rx_alloc_ring_gqi(struct gve_priv *priv,
+ struct gve_rx_alloc_rings_cfg *cfg,
+ struct gve_rx_ring *rx,
+ int idx);
+void gve_rx_free_ring_gqi(struct gve_priv *priv, struct gve_rx_ring *rx,
+ struct gve_rx_alloc_rings_cfg *cfg);
+int gve_rx_alloc_rings_gqi(struct gve_priv *priv,
+ struct gve_rx_alloc_rings_cfg *cfg);
+void gve_rx_free_rings_gqi(struct gve_priv *priv,
+ struct gve_rx_alloc_rings_cfg *cfg);
+void gve_rx_start_ring_gqi(struct gve_priv *priv, int idx);
+void gve_rx_stop_ring_gqi(struct gve_priv *priv, int idx);
+u16 gve_get_pkt_buf_size(const struct gve_priv *priv, bool enable_hplit);
+bool gve_header_split_supported(const struct gve_priv *priv);
+int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split);
+/* rx buffer handling */
+int gve_buf_ref_cnt(struct gve_rx_buf_state_dqo *bs);
+void gve_free_page_dqo(struct gve_priv *priv, struct gve_rx_buf_state_dqo *bs,
+ bool free_page);
+struct gve_rx_buf_state_dqo *gve_alloc_buf_state(struct gve_rx_ring *rx);
+bool gve_buf_state_is_allocated(struct gve_rx_ring *rx,
+ struct gve_rx_buf_state_dqo *buf_state);
+void gve_free_buf_state(struct gve_rx_ring *rx,
+ struct gve_rx_buf_state_dqo *buf_state);
+struct gve_rx_buf_state_dqo *gve_dequeue_buf_state(struct gve_rx_ring *rx,
+ struct gve_index_list *list);
+void gve_enqueue_buf_state(struct gve_rx_ring *rx, struct gve_index_list *list,
+ struct gve_rx_buf_state_dqo *buf_state);
+struct gve_rx_buf_state_dqo *gve_get_recycled_buf_state(struct gve_rx_ring *rx);
+void gve_try_recycle_buf(struct gve_priv *priv, struct gve_rx_ring *rx,
+ struct gve_rx_buf_state_dqo *buf_state);
+void gve_free_to_page_pool(struct gve_rx_ring *rx,
+ struct gve_rx_buf_state_dqo *buf_state,
+ bool allow_direct);
+int gve_alloc_qpl_page_dqo(struct gve_rx_ring *rx,
+ struct gve_rx_buf_state_dqo *buf_state);
+void gve_free_qpl_page_dqo(struct gve_rx_buf_state_dqo *buf_state);
+void gve_reuse_buffer(struct gve_rx_ring *rx,
+ struct gve_rx_buf_state_dqo *buf_state);
+void gve_free_buffer(struct gve_rx_ring *rx,
+ struct gve_rx_buf_state_dqo *buf_state);
+int gve_alloc_buffer(struct gve_rx_ring *rx, struct gve_rx_desc_dqo *desc);
+struct page_pool *gve_rx_create_page_pool(struct gve_priv *priv,
+ struct gve_rx_ring *rx,
+ bool xdp);
+
/* Reset */
void gve_schedule_reset(struct gve_priv *priv);
int gve_reset(struct gve_priv *priv, bool attempt_teardown);
+void gve_get_curr_alloc_cfgs(struct gve_priv *priv,
+ struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
+ struct gve_rx_alloc_rings_cfg *rx_alloc_cfg);
+int gve_adjust_config(struct gve_priv *priv,
+ struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
+ struct gve_rx_alloc_rings_cfg *rx_alloc_cfg);
int gve_adjust_queues(struct gve_priv *priv,
- struct gve_queue_config new_rx_config,
- struct gve_queue_config new_tx_config);
+ struct gve_rx_queue_config new_rx_config,
+ struct gve_tx_queue_config new_tx_config,
+ bool reset_rss);
+/* flow steering rule */
+int gve_get_flow_rule_entry(struct gve_priv *priv, struct ethtool_rxnfc *cmd);
+int gve_get_flow_rule_ids(struct gve_priv *priv, struct ethtool_rxnfc *cmd, u32 *rule_locs);
+int gve_add_flow_rule(struct gve_priv *priv, struct ethtool_rxnfc *cmd);
+int gve_del_flow_rule(struct gve_priv *priv, struct ethtool_rxnfc *cmd);
+int gve_flow_rules_reset(struct gve_priv *priv);
+/* RSS config */
+int gve_init_rss_config(struct gve_priv *priv, u16 num_queues);
/* report stats handling */
void gve_handle_report_stats(struct gve_priv *priv);
/* exported by ethtool.c */
diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c
index 12fbd723ecc6..3e8fc33cc11f 100644
--- a/drivers/net/ethernet/google/gve/gve_adminq.c
+++ b/drivers/net/ethernet/google/gve/gve_adminq.c
@@ -32,6 +32,8 @@ struct gve_device_option *gve_get_next_option(struct gve_device_descriptor *desc
return option_end > descriptor_end ? NULL : (struct gve_device_option *)option_end;
}
+#define GVE_DEVICE_OPTION_NO_MIN_RING_SIZE 8
+
static
void gve_parse_device_option(struct gve_priv *priv,
struct gve_device_descriptor *device_descriptor,
@@ -40,7 +42,11 @@ void gve_parse_device_option(struct gve_priv *priv,
struct gve_device_option_gqi_qpl **dev_op_gqi_qpl,
struct gve_device_option_dqo_rda **dev_op_dqo_rda,
struct gve_device_option_jumbo_frames **dev_op_jumbo_frames,
- struct gve_device_option_dqo_qpl **dev_op_dqo_qpl)
+ struct gve_device_option_dqo_qpl **dev_op_dqo_qpl,
+ struct gve_device_option_buffer_sizes **dev_op_buffer_sizes,
+ struct gve_device_option_flow_steering **dev_op_flow_steering,
+ struct gve_device_option_rss_config **dev_op_rss_config,
+ struct gve_device_option_modify_ring **dev_op_modify_ring)
{
u32 req_feat_mask = be32_to_cpu(option->required_features_mask);
u16 option_length = be16_to_cpu(option->option_length);
@@ -147,6 +153,78 @@ void gve_parse_device_option(struct gve_priv *priv,
}
*dev_op_jumbo_frames = (void *)(option + 1);
break;
+ case GVE_DEV_OPT_ID_BUFFER_SIZES:
+ if (option_length < sizeof(**dev_op_buffer_sizes) ||
+ req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_BUFFER_SIZES) {
+ dev_warn(&priv->pdev->dev, GVE_DEVICE_OPTION_ERROR_FMT,
+ "Buffer Sizes",
+ (int)sizeof(**dev_op_buffer_sizes),
+ GVE_DEV_OPT_REQ_FEAT_MASK_BUFFER_SIZES,
+ option_length, req_feat_mask);
+ break;
+ }
+
+ if (option_length > sizeof(**dev_op_buffer_sizes))
+ dev_warn(&priv->pdev->dev,
+ GVE_DEVICE_OPTION_TOO_BIG_FMT,
+ "Buffer Sizes");
+ *dev_op_buffer_sizes = (void *)(option + 1);
+ break;
+ case GVE_DEV_OPT_ID_MODIFY_RING:
+ if (option_length < GVE_DEVICE_OPTION_NO_MIN_RING_SIZE ||
+ req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_MODIFY_RING) {
+ dev_warn(&priv->pdev->dev, GVE_DEVICE_OPTION_ERROR_FMT,
+ "Modify Ring", (int)sizeof(**dev_op_modify_ring),
+ GVE_DEV_OPT_REQ_FEAT_MASK_MODIFY_RING,
+ option_length, req_feat_mask);
+ break;
+ }
+
+ if (option_length > sizeof(**dev_op_modify_ring)) {
+ dev_warn(&priv->pdev->dev,
+ GVE_DEVICE_OPTION_TOO_BIG_FMT, "Modify Ring");
+ }
+
+ *dev_op_modify_ring = (void *)(option + 1);
+
+ /* device has not provided min ring size */
+ if (option_length == GVE_DEVICE_OPTION_NO_MIN_RING_SIZE)
+ priv->default_min_ring_size = true;
+ break;
+ case GVE_DEV_OPT_ID_FLOW_STEERING:
+ if (option_length < sizeof(**dev_op_flow_steering) ||
+ req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_FLOW_STEERING) {
+ dev_warn(&priv->pdev->dev, GVE_DEVICE_OPTION_ERROR_FMT,
+ "Flow Steering",
+ (int)sizeof(**dev_op_flow_steering),
+ GVE_DEV_OPT_REQ_FEAT_MASK_FLOW_STEERING,
+ option_length, req_feat_mask);
+ break;
+ }
+
+ if (option_length > sizeof(**dev_op_flow_steering))
+ dev_warn(&priv->pdev->dev,
+ GVE_DEVICE_OPTION_TOO_BIG_FMT,
+ "Flow Steering");
+ *dev_op_flow_steering = (void *)(option + 1);
+ break;
+ case GVE_DEV_OPT_ID_RSS_CONFIG:
+ if (option_length < sizeof(**dev_op_rss_config) ||
+ req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_RSS_CONFIG) {
+ dev_warn(&priv->pdev->dev, GVE_DEVICE_OPTION_ERROR_FMT,
+ "RSS config",
+ (int)sizeof(**dev_op_rss_config),
+ GVE_DEV_OPT_REQ_FEAT_MASK_RSS_CONFIG,
+ option_length, req_feat_mask);
+ break;
+ }
+
+ if (option_length > sizeof(**dev_op_rss_config))
+ dev_warn(&priv->pdev->dev,
+ GVE_DEVICE_OPTION_TOO_BIG_FMT,
+ "RSS config");
+ *dev_op_rss_config = (void *)(option + 1);
+ break;
default:
/* If we don't recognize the option just continue
* without doing anything.
@@ -164,7 +242,11 @@ gve_process_device_options(struct gve_priv *priv,
struct gve_device_option_gqi_qpl **dev_op_gqi_qpl,
struct gve_device_option_dqo_rda **dev_op_dqo_rda,
struct gve_device_option_jumbo_frames **dev_op_jumbo_frames,
- struct gve_device_option_dqo_qpl **dev_op_dqo_qpl)
+ struct gve_device_option_dqo_qpl **dev_op_dqo_qpl,
+ struct gve_device_option_buffer_sizes **dev_op_buffer_sizes,
+ struct gve_device_option_flow_steering **dev_op_flow_steering,
+ struct gve_device_option_rss_config **dev_op_rss_config,
+ struct gve_device_option_modify_ring **dev_op_modify_ring)
{
const int num_options = be16_to_cpu(descriptor->num_device_options);
struct gve_device_option *dev_opt;
@@ -185,7 +267,9 @@ gve_process_device_options(struct gve_priv *priv,
gve_parse_device_option(priv, descriptor, dev_opt,
dev_op_gqi_rda, dev_op_gqi_qpl,
dev_op_dqo_rda, dev_op_jumbo_frames,
- dev_op_dqo_qpl);
+ dev_op_dqo_qpl, dev_op_buffer_sizes,
+ dev_op_flow_steering, dev_op_rss_config,
+ dev_op_modify_ring);
dev_opt = next_opt;
}
@@ -223,6 +307,10 @@ int gve_adminq_alloc(struct device *dev, struct gve_priv *priv)
priv->adminq_report_stats_cnt = 0;
priv->adminq_report_link_speed_cnt = 0;
priv->adminq_get_ptype_map_cnt = 0;
+ priv->adminq_query_flow_rules_cnt = 0;
+ priv->adminq_cfg_flow_rule_cnt = 0;
+ priv->adminq_cfg_rss_cnt = 0;
+ priv->adminq_query_rss_cnt = 0;
/* Setup Admin queue with the device */
if (priv->pdev->revision < 0x1) {
@@ -239,6 +327,7 @@ int gve_adminq_alloc(struct device *dev, struct gve_priv *priv)
&priv->reg_bar0->adminq_base_address_lo);
iowrite32be(GVE_DRIVER_STATUS_RUN_MASK, &priv->reg_bar0->driver_status);
}
+ mutex_init(&priv->adminq_lock);
gve_set_admin_queue_ok(priv);
return 0;
}
@@ -415,6 +504,8 @@ static int gve_adminq_issue_cmd(struct gve_priv *priv,
memcpy(cmd, cmd_orig, sizeof(*cmd_orig));
opcode = be32_to_cpu(READ_ONCE(cmd->opcode));
+ if (opcode == GVE_ADMINQ_EXTENDED_COMMAND)
+ opcode = be32_to_cpu(cmd->extended_command.inner_opcode);
switch (opcode) {
case GVE_ADMINQ_DESCRIBE_DEVICE:
@@ -459,6 +550,18 @@ static int gve_adminq_issue_cmd(struct gve_priv *priv,
case GVE_ADMINQ_VERIFY_DRIVER_COMPATIBILITY:
priv->adminq_verify_driver_compatibility_cnt++;
break;
+ case GVE_ADMINQ_QUERY_FLOW_RULES:
+ priv->adminq_query_flow_rules_cnt++;
+ break;
+ case GVE_ADMINQ_CONFIGURE_FLOW_RULE:
+ priv->adminq_cfg_flow_rule_cnt++;
+ break;
+ case GVE_ADMINQ_CONFIGURE_RSS:
+ priv->adminq_cfg_rss_cnt++;
+ break;
+ case GVE_ADMINQ_QUERY_RSS:
+ priv->adminq_query_rss_cnt++;
+ break;
default:
dev_err(&priv->pdev->dev, "unknown AQ command opcode %d\n", opcode);
}
@@ -466,28 +569,58 @@ static int gve_adminq_issue_cmd(struct gve_priv *priv,
return 0;
}
-/* This function is not threadsafe - the caller is responsible for any
- * necessary locks.
- * The caller is also responsible for making sure there are no commands
- * waiting to be executed.
- */
static int gve_adminq_execute_cmd(struct gve_priv *priv,
union gve_adminq_command *cmd_orig)
{
u32 tail, head;
int err;
+ mutex_lock(&priv->adminq_lock);
tail = ioread32be(&priv->reg_bar0->adminq_event_counter);
head = priv->adminq_prod_cnt;
- if (tail != head)
- // This is not a valid path
- return -EINVAL;
+ if (tail != head) {
+ err = -EINVAL;
+ goto out;
+ }
err = gve_adminq_issue_cmd(priv, cmd_orig);
if (err)
- return err;
+ goto out;
- return gve_adminq_kick_and_wait(priv);
+ err = gve_adminq_kick_and_wait(priv);
+
+out:
+ mutex_unlock(&priv->adminq_lock);
+ return err;
+}
+
+static int gve_adminq_execute_extended_cmd(struct gve_priv *priv, u32 opcode,
+ size_t cmd_size, void *cmd_orig)
+{
+ union gve_adminq_command cmd;
+ dma_addr_t inner_cmd_bus;
+ void *inner_cmd;
+ int err;
+
+ inner_cmd = dma_alloc_coherent(&priv->pdev->dev, cmd_size,
+ &inner_cmd_bus, GFP_KERNEL);
+ if (!inner_cmd)
+ return -ENOMEM;
+
+ memcpy(inner_cmd, cmd_orig, cmd_size);
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.opcode = cpu_to_be32(GVE_ADMINQ_EXTENDED_COMMAND);
+ cmd.extended_command = (struct gve_adminq_extended_command) {
+ .inner_opcode = cpu_to_be32(opcode),
+ .inner_length = cpu_to_be32(cmd_size),
+ .inner_command_addr = cpu_to_be64(inner_cmd_bus),
+ };
+
+ err = gve_adminq_execute_cmd(priv, &cmd);
+
+ dma_free_coherent(&priv->pdev->dev, cmd_size, inner_cmd, inner_cmd_bus);
+ return err;
}
/* The device specifies that the management vector can either be the first irq
@@ -546,6 +679,7 @@ static int gve_adminq_create_tx_queue(struct gve_priv *priv, u32 queue_index)
cpu_to_be64(tx->q_resources_bus),
.tx_ring_addr = cpu_to_be64(tx->bus),
.ntfy_id = cpu_to_be32(tx->ntfy_id),
+ .tx_ring_size = cpu_to_be16(priv->tx_desc_cnt),
};
if (gve_is_gqi(priv)) {
@@ -554,24 +688,17 @@ static int gve_adminq_create_tx_queue(struct gve_priv *priv, u32 queue_index)
cmd.create_tx_queue.queue_page_list_id = cpu_to_be32(qpl_id);
} else {
- u16 comp_ring_size;
u32 qpl_id = 0;
- if (priv->queue_format == GVE_DQO_RDA_FORMAT) {
+ if (priv->queue_format == GVE_DQO_RDA_FORMAT)
qpl_id = GVE_RAW_ADDRESSING_QPL_ID;
- comp_ring_size =
- priv->options_dqo_rda.tx_comp_ring_entries;
- } else {
+ else
qpl_id = tx->dqo.qpl->id;
- comp_ring_size = priv->tx_desc_cnt;
- }
cmd.create_tx_queue.queue_page_list_id = cpu_to_be32(qpl_id);
- cmd.create_tx_queue.tx_ring_size =
- cpu_to_be16(priv->tx_desc_cnt);
cmd.create_tx_queue.tx_comp_ring_addr =
cpu_to_be64(tx->complq_bus_dqo);
cmd.create_tx_queue.tx_comp_ring_size =
- cpu_to_be16(comp_ring_size);
+ cpu_to_be16(priv->tx_desc_cnt);
}
return gve_adminq_issue_cmd(priv, &cmd);
@@ -591,60 +718,71 @@ int gve_adminq_create_tx_queues(struct gve_priv *priv, u32 start_id, u32 num_que
return gve_adminq_kick_and_wait(priv);
}
-static int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_index)
+static void gve_adminq_get_create_rx_queue_cmd(struct gve_priv *priv,
+ union gve_adminq_command *cmd,
+ u32 queue_index)
{
struct gve_rx_ring *rx = &priv->rx[queue_index];
- union gve_adminq_command cmd;
- memset(&cmd, 0, sizeof(cmd));
- cmd.opcode = cpu_to_be32(GVE_ADMINQ_CREATE_RX_QUEUE);
- cmd.create_rx_queue = (struct gve_adminq_create_rx_queue) {
+ memset(cmd, 0, sizeof(*cmd));
+ cmd->opcode = cpu_to_be32(GVE_ADMINQ_CREATE_RX_QUEUE);
+ cmd->create_rx_queue = (struct gve_adminq_create_rx_queue) {
.queue_id = cpu_to_be32(queue_index),
.ntfy_id = cpu_to_be32(rx->ntfy_id),
.queue_resources_addr = cpu_to_be64(rx->q_resources_bus),
+ .rx_ring_size = cpu_to_be16(priv->rx_desc_cnt),
+ .packet_buffer_size = cpu_to_be16(rx->packet_buffer_size),
};
if (gve_is_gqi(priv)) {
u32 qpl_id = priv->queue_format == GVE_GQI_RDA_FORMAT ?
GVE_RAW_ADDRESSING_QPL_ID : rx->data.qpl->id;
- cmd.create_rx_queue.rx_desc_ring_addr =
- cpu_to_be64(rx->desc.bus),
- cmd.create_rx_queue.rx_data_ring_addr =
- cpu_to_be64(rx->data.data_bus),
- cmd.create_rx_queue.index = cpu_to_be32(queue_index);
- cmd.create_rx_queue.queue_page_list_id = cpu_to_be32(qpl_id);
- cmd.create_rx_queue.packet_buffer_size = cpu_to_be16(rx->packet_buffer_size);
+ cmd->create_rx_queue.rx_desc_ring_addr =
+ cpu_to_be64(rx->desc.bus);
+ cmd->create_rx_queue.rx_data_ring_addr =
+ cpu_to_be64(rx->data.data_bus);
+ cmd->create_rx_queue.index = cpu_to_be32(queue_index);
+ cmd->create_rx_queue.queue_page_list_id = cpu_to_be32(qpl_id);
} else {
- u16 rx_buff_ring_entries;
u32 qpl_id = 0;
- if (priv->queue_format == GVE_DQO_RDA_FORMAT) {
+ if (priv->queue_format == GVE_DQO_RDA_FORMAT)
qpl_id = GVE_RAW_ADDRESSING_QPL_ID;
- rx_buff_ring_entries =
- priv->options_dqo_rda.rx_buff_ring_entries;
- } else {
+ else
qpl_id = rx->dqo.qpl->id;
- rx_buff_ring_entries = priv->rx_desc_cnt;
- }
- cmd.create_rx_queue.queue_page_list_id = cpu_to_be32(qpl_id);
- cmd.create_rx_queue.rx_ring_size =
- cpu_to_be16(priv->rx_desc_cnt);
- cmd.create_rx_queue.rx_desc_ring_addr =
+ cmd->create_rx_queue.queue_page_list_id = cpu_to_be32(qpl_id);
+ cmd->create_rx_queue.rx_desc_ring_addr =
cpu_to_be64(rx->dqo.complq.bus);
- cmd.create_rx_queue.rx_data_ring_addr =
+ cmd->create_rx_queue.rx_data_ring_addr =
cpu_to_be64(rx->dqo.bufq.bus);
- cmd.create_rx_queue.packet_buffer_size =
- cpu_to_be16(priv->data_buffer_size_dqo);
- cmd.create_rx_queue.rx_buff_ring_size =
- cpu_to_be16(rx_buff_ring_entries);
- cmd.create_rx_queue.enable_rsc =
+ cmd->create_rx_queue.rx_buff_ring_size =
+ cpu_to_be16(priv->rx_desc_cnt);
+ cmd->create_rx_queue.enable_rsc =
!!(priv->dev->features & NETIF_F_LRO);
+ if (priv->header_split_enabled)
+ cmd->create_rx_queue.header_buffer_size =
+ cpu_to_be16(priv->header_buf_size);
}
+}
+static int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_index)
+{
+ union gve_adminq_command cmd;
+
+ gve_adminq_get_create_rx_queue_cmd(priv, &cmd, queue_index);
return gve_adminq_issue_cmd(priv, &cmd);
}
+/* Unlike gve_adminq_create_rx_queue, this actually rings the doorbell */
+int gve_adminq_create_single_rx_queue(struct gve_priv *priv, u32 queue_index)
+{
+ union gve_adminq_command cmd;
+
+ gve_adminq_get_create_rx_queue_cmd(priv, &cmd, queue_index);
+ return gve_adminq_execute_cmd(priv, &cmd);
+}
+
int gve_adminq_create_rx_queues(struct gve_priv *priv, u32 num_queues)
{
int err;
@@ -691,22 +829,31 @@ int gve_adminq_destroy_tx_queues(struct gve_priv *priv, u32 start_id, u32 num_qu
return gve_adminq_kick_and_wait(priv);
}
+static void gve_adminq_make_destroy_rx_queue_cmd(union gve_adminq_command *cmd,
+ u32 queue_index)
+{
+ memset(cmd, 0, sizeof(*cmd));
+ cmd->opcode = cpu_to_be32(GVE_ADMINQ_DESTROY_RX_QUEUE);
+ cmd->destroy_rx_queue = (struct gve_adminq_destroy_rx_queue) {
+ .queue_id = cpu_to_be32(queue_index),
+ };
+}
+
static int gve_adminq_destroy_rx_queue(struct gve_priv *priv, u32 queue_index)
{
union gve_adminq_command cmd;
- int err;
- memset(&cmd, 0, sizeof(cmd));
- cmd.opcode = cpu_to_be32(GVE_ADMINQ_DESTROY_RX_QUEUE);
- cmd.destroy_rx_queue = (struct gve_adminq_destroy_rx_queue) {
- .queue_id = cpu_to_be32(queue_index),
- };
+ gve_adminq_make_destroy_rx_queue_cmd(&cmd, queue_index);
+ return gve_adminq_issue_cmd(priv, &cmd);
+}
- err = gve_adminq_issue_cmd(priv, &cmd);
- if (err)
- return err;
+/* Unlike gve_adminq_destroy_rx_queue, this actually rings the doorbell */
+int gve_adminq_destroy_single_rx_queue(struct gve_priv *priv, u32 queue_index)
+{
+ union gve_adminq_command cmd;
- return 0;
+ gve_adminq_make_destroy_rx_queue_cmd(&cmd, queue_index);
+ return gve_adminq_execute_cmd(priv, &cmd);
}
int gve_adminq_destroy_rx_queues(struct gve_priv *priv, u32 num_queues)
@@ -723,31 +870,26 @@ int gve_adminq_destroy_rx_queues(struct gve_priv *priv, u32 num_queues)
return gve_adminq_kick_and_wait(priv);
}
-static int gve_set_desc_cnt(struct gve_priv *priv,
- struct gve_device_descriptor *descriptor)
+static void gve_set_default_desc_cnt(struct gve_priv *priv,
+ const struct gve_device_descriptor *descriptor)
{
priv->tx_desc_cnt = be16_to_cpu(descriptor->tx_queue_entries);
priv->rx_desc_cnt = be16_to_cpu(descriptor->rx_queue_entries);
- return 0;
+
+ /* set default ranges */
+ priv->max_tx_desc_cnt = priv->tx_desc_cnt;
+ priv->max_rx_desc_cnt = priv->rx_desc_cnt;
+ priv->min_tx_desc_cnt = priv->tx_desc_cnt;
+ priv->min_rx_desc_cnt = priv->rx_desc_cnt;
}
-static int
-gve_set_desc_cnt_dqo(struct gve_priv *priv,
- const struct gve_device_descriptor *descriptor,
- const struct gve_device_option_dqo_rda *dev_op_dqo_rda)
+static void gve_set_default_rss_sizes(struct gve_priv *priv)
{
- priv->tx_desc_cnt = be16_to_cpu(descriptor->tx_queue_entries);
- priv->rx_desc_cnt = be16_to_cpu(descriptor->rx_queue_entries);
-
- if (priv->queue_format == GVE_DQO_QPL_FORMAT)
- return 0;
-
- priv->options_dqo_rda.tx_comp_ring_entries =
- be16_to_cpu(dev_op_dqo_rda->tx_comp_ring_entries);
- priv->options_dqo_rda.rx_buff_ring_entries =
- be16_to_cpu(dev_op_dqo_rda->rx_buff_ring_entries);
-
- return 0;
+ if (!gve_is_gqi(priv)) {
+ priv->rss_key_size = GVE_RSS_KEY_SIZE;
+ priv->rss_lut_size = GVE_RSS_INDIR_SIZE;
+ priv->cache_rss_config = true;
+ }
}
static void gve_enable_supported_features(struct gve_priv *priv,
@@ -755,7 +897,15 @@ static void gve_enable_supported_features(struct gve_priv *priv,
const struct gve_device_option_jumbo_frames
*dev_op_jumbo_frames,
const struct gve_device_option_dqo_qpl
- *dev_op_dqo_qpl)
+ *dev_op_dqo_qpl,
+ const struct gve_device_option_buffer_sizes
+ *dev_op_buffer_sizes,
+ const struct gve_device_option_flow_steering
+ *dev_op_flow_steering,
+ const struct gve_device_option_rss_config
+ *dev_op_rss_config,
+ const struct gve_device_option_modify_ring
+ *dev_op_modify_ring)
{
/* Before control reaches this point, the page-size-capped max MTU from
* the gve_device_descriptor field has already been stored in
@@ -772,18 +922,73 @@ static void gve_enable_supported_features(struct gve_priv *priv,
if (dev_op_dqo_qpl) {
priv->tx_pages_per_qpl =
be16_to_cpu(dev_op_dqo_qpl->tx_pages_per_qpl);
- priv->rx_pages_per_qpl =
- be16_to_cpu(dev_op_dqo_qpl->rx_pages_per_qpl);
if (priv->tx_pages_per_qpl == 0)
priv->tx_pages_per_qpl = DQO_QPL_DEFAULT_TX_PAGES;
- if (priv->rx_pages_per_qpl == 0)
- priv->rx_pages_per_qpl = DQO_QPL_DEFAULT_RX_PAGES;
+ }
+
+ if (dev_op_buffer_sizes &&
+ (supported_features_mask & GVE_SUP_BUFFER_SIZES_MASK)) {
+ priv->max_rx_buffer_size =
+ be16_to_cpu(dev_op_buffer_sizes->packet_buffer_size);
+ priv->header_buf_size =
+ be16_to_cpu(dev_op_buffer_sizes->header_buffer_size);
+ dev_info(&priv->pdev->dev,
+ "BUFFER SIZES device option enabled with max_rx_buffer_size of %u, header_buf_size of %u.\n",
+ priv->max_rx_buffer_size, priv->header_buf_size);
+ }
+
+ /* Read and store ring size ranges given by device */
+ if (dev_op_modify_ring &&
+ (supported_features_mask & GVE_SUP_MODIFY_RING_MASK)) {
+ priv->modify_ring_size_enabled = true;
+
+ /* max ring size for DQO QPL should not be overwritten because of device limit */
+ if (priv->queue_format != GVE_DQO_QPL_FORMAT) {
+ priv->max_rx_desc_cnt = be16_to_cpu(dev_op_modify_ring->max_rx_ring_size);
+ priv->max_tx_desc_cnt = be16_to_cpu(dev_op_modify_ring->max_tx_ring_size);
+ }
+ if (priv->default_min_ring_size) {
+ /* If device hasn't provided minimums, use default minimums */
+ priv->min_tx_desc_cnt = GVE_DEFAULT_MIN_TX_RING_SIZE;
+ priv->min_rx_desc_cnt = GVE_DEFAULT_MIN_RX_RING_SIZE;
+ } else {
+ priv->min_rx_desc_cnt = be16_to_cpu(dev_op_modify_ring->min_rx_ring_size);
+ priv->min_tx_desc_cnt = be16_to_cpu(dev_op_modify_ring->min_tx_ring_size);
+ }
+ }
+
+ if (dev_op_flow_steering &&
+ (supported_features_mask & GVE_SUP_FLOW_STEERING_MASK)) {
+ if (dev_op_flow_steering->max_flow_rules) {
+ priv->max_flow_rules =
+ be32_to_cpu(dev_op_flow_steering->max_flow_rules);
+ priv->dev->hw_features |= NETIF_F_NTUPLE;
+ dev_info(&priv->pdev->dev,
+ "FLOW STEERING device option enabled with max rule limit of %u.\n",
+ priv->max_flow_rules);
+ }
+ }
+
+ if (dev_op_rss_config &&
+ (supported_features_mask & GVE_SUP_RSS_CONFIG_MASK)) {
+ priv->rss_key_size =
+ be16_to_cpu(dev_op_rss_config->hash_key_size);
+ priv->rss_lut_size =
+ be16_to_cpu(dev_op_rss_config->hash_lut_size);
+ priv->cache_rss_config = false;
+ dev_dbg(&priv->pdev->dev,
+ "RSS device option enabled with key size of %u, lut size of %u.\n",
+ priv->rss_key_size, priv->rss_lut_size);
}
}
int gve_adminq_describe_device(struct gve_priv *priv)
{
+ struct gve_device_option_flow_steering *dev_op_flow_steering = NULL;
+ struct gve_device_option_buffer_sizes *dev_op_buffer_sizes = NULL;
struct gve_device_option_jumbo_frames *dev_op_jumbo_frames = NULL;
+ struct gve_device_option_modify_ring *dev_op_modify_ring = NULL;
+ struct gve_device_option_rss_config *dev_op_rss_config = NULL;
struct gve_device_option_gqi_rda *dev_op_gqi_rda = NULL;
struct gve_device_option_gqi_qpl *dev_op_gqi_qpl = NULL;
struct gve_device_option_dqo_rda *dev_op_dqo_rda = NULL;
@@ -815,8 +1020,11 @@ int gve_adminq_describe_device(struct gve_priv *priv)
err = gve_process_device_options(priv, descriptor, &dev_op_gqi_rda,
&dev_op_gqi_qpl, &dev_op_dqo_rda,
- &dev_op_jumbo_frames,
- &dev_op_dqo_qpl);
+ &dev_op_jumbo_frames, &dev_op_dqo_qpl,
+ &dev_op_buffer_sizes,
+ &dev_op_flow_steering,
+ &dev_op_rss_config,
+ &dev_op_modify_ring);
if (err)
goto free_device_descriptor;
@@ -851,15 +1059,15 @@ int gve_adminq_describe_device(struct gve_priv *priv)
dev_info(&priv->pdev->dev,
"Driver is running with GQI QPL queue format.\n");
}
- if (gve_is_gqi(priv)) {
- err = gve_set_desc_cnt(priv, descriptor);
- } else {
- /* DQO supports LRO. */
+
+ /* set default descriptor counts */
+ gve_set_default_desc_cnt(priv, descriptor);
+
+ gve_set_default_rss_sizes(priv);
+
+ /* DQO supports LRO. */
+ if (!gve_is_gqi(priv))
priv->dev->hw_features |= NETIF_F_LRO;
- err = gve_set_desc_cnt_dqo(priv, descriptor, dev_op_dqo_rda);
- }
- if (err)
- goto free_device_descriptor;
priv->max_registered_pages =
be64_to_cpu(descriptor->max_registered_pages);
@@ -875,17 +1083,12 @@ int gve_adminq_describe_device(struct gve_priv *priv)
mac = descriptor->mac;
dev_info(&priv->pdev->dev, "MAC addr: %pM\n", mac);
priv->tx_pages_per_qpl = be16_to_cpu(descriptor->tx_pages_per_qpl);
- priv->rx_data_slot_cnt = be16_to_cpu(descriptor->rx_pages_per_qpl);
-
- if (gve_is_gqi(priv) && priv->rx_data_slot_cnt < priv->rx_desc_cnt) {
- dev_err(&priv->pdev->dev, "rx_data_slot_cnt cannot be smaller than rx_desc_cnt, setting rx_desc_cnt down to %d.\n",
- priv->rx_data_slot_cnt);
- priv->rx_desc_cnt = priv->rx_data_slot_cnt;
- }
priv->default_num_queues = be16_to_cpu(descriptor->default_num_queues);
gve_enable_supported_features(priv, supported_features_mask,
- dev_op_jumbo_frames, dev_op_dqo_qpl);
+ dev_op_jumbo_frames, dev_op_dqo_qpl,
+ dev_op_buffer_sizes, dev_op_flow_steering,
+ dev_op_rss_config, dev_op_modify_ring);
free_device_descriptor:
dma_pool_free(priv->adminq_pool, descriptor, descriptor_bus);
@@ -938,20 +1141,6 @@ int gve_adminq_unregister_page_list(struct gve_priv *priv, u32 page_list_id)
return gve_adminq_execute_cmd(priv, &cmd);
}
-int gve_adminq_set_mtu(struct gve_priv *priv, u64 mtu)
-{
- union gve_adminq_command cmd;
-
- memset(&cmd, 0, sizeof(cmd));
- cmd.opcode = cpu_to_be32(GVE_ADMINQ_SET_DRIVER_PARAMETER);
- cmd.set_driver_param = (struct gve_adminq_set_driver_parameter) {
- .parameter_type = cpu_to_be32(GVE_SET_PARAM_MTU),
- .parameter_value = cpu_to_be64(mtu),
- };
-
- return gve_adminq_execute_cmd(priv, &cmd);
-}
-
int gve_adminq_report_stats(struct gve_priv *priv, u64 stats_report_len,
dma_addr_t stats_report_addr, u64 interval)
{
@@ -1048,3 +1237,293 @@ err:
ptype_map_bus);
return err;
}
+
+static int
+gve_adminq_configure_flow_rule(struct gve_priv *priv,
+ struct gve_adminq_configure_flow_rule *flow_rule_cmd)
+{
+ int err = gve_adminq_execute_extended_cmd(priv,
+ GVE_ADMINQ_CONFIGURE_FLOW_RULE,
+ sizeof(struct gve_adminq_configure_flow_rule),
+ flow_rule_cmd);
+
+ if (err == -ETIME) {
+ dev_err(&priv->pdev->dev, "Timeout to configure the flow rule, trigger reset");
+ gve_reset(priv, true);
+ } else if (!err) {
+ priv->flow_rules_cache.rules_cache_synced = false;
+ }
+
+ return err;
+}
+
+int gve_adminq_add_flow_rule(struct gve_priv *priv, struct gve_adminq_flow_rule *rule, u32 loc)
+{
+ struct gve_adminq_configure_flow_rule flow_rule_cmd = {
+ .opcode = cpu_to_be16(GVE_FLOW_RULE_CFG_ADD),
+ .location = cpu_to_be32(loc),
+ .rule = *rule,
+ };
+
+ return gve_adminq_configure_flow_rule(priv, &flow_rule_cmd);
+}
+
+int gve_adminq_del_flow_rule(struct gve_priv *priv, u32 loc)
+{
+ struct gve_adminq_configure_flow_rule flow_rule_cmd = {
+ .opcode = cpu_to_be16(GVE_FLOW_RULE_CFG_DEL),
+ .location = cpu_to_be32(loc),
+ };
+
+ return gve_adminq_configure_flow_rule(priv, &flow_rule_cmd);
+}
+
+int gve_adminq_reset_flow_rules(struct gve_priv *priv)
+{
+ struct gve_adminq_configure_flow_rule flow_rule_cmd = {
+ .opcode = cpu_to_be16(GVE_FLOW_RULE_CFG_RESET),
+ };
+
+ return gve_adminq_configure_flow_rule(priv, &flow_rule_cmd);
+}
+
+int gve_adminq_configure_rss(struct gve_priv *priv, struct ethtool_rxfh_param *rxfh)
+{
+ const u32 *hash_lut_to_config = NULL;
+ const u8 *hash_key_to_config = NULL;
+ dma_addr_t lut_bus = 0, key_bus = 0;
+ union gve_adminq_command cmd;
+ __be32 *lut = NULL;
+ u8 hash_alg = 0;
+ u8 *key = NULL;
+ int err = 0;
+ u16 i;
+
+ switch (rxfh->hfunc) {
+ case ETH_RSS_HASH_NO_CHANGE:
+ fallthrough;
+ case ETH_RSS_HASH_TOP:
+ hash_alg = ETH_RSS_HASH_TOP;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ if (rxfh->indir) {
+ if (rxfh->indir_size != priv->rss_lut_size)
+ return -EINVAL;
+
+ hash_lut_to_config = rxfh->indir;
+ } else if (priv->cache_rss_config) {
+ hash_lut_to_config = priv->rss_config.hash_lut;
+ }
+
+ if (hash_lut_to_config) {
+ lut = dma_alloc_coherent(&priv->pdev->dev,
+ priv->rss_lut_size * sizeof(*lut),
+ &lut_bus, GFP_KERNEL);
+ if (!lut)
+ return -ENOMEM;
+
+ for (i = 0; i < priv->rss_lut_size; i++)
+ lut[i] = cpu_to_be32(hash_lut_to_config[i]);
+ }
+
+ if (rxfh->key) {
+ if (rxfh->key_size != priv->rss_key_size) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ hash_key_to_config = rxfh->key;
+ } else if (priv->cache_rss_config) {
+ hash_key_to_config = priv->rss_config.hash_key;
+ }
+
+ if (hash_key_to_config) {
+ key = dma_alloc_coherent(&priv->pdev->dev,
+ priv->rss_key_size,
+ &key_bus, GFP_KERNEL);
+ if (!key) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ memcpy(key, hash_key_to_config, priv->rss_key_size);
+ }
+
+ /* Zero-valued fields in the cmd.configure_rss instruct the device to
+ * not update those fields.
+ */
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.opcode = cpu_to_be32(GVE_ADMINQ_CONFIGURE_RSS);
+ cmd.configure_rss = (struct gve_adminq_configure_rss) {
+ .hash_types = cpu_to_be16(BIT(GVE_RSS_HASH_TCPV4) |
+ BIT(GVE_RSS_HASH_UDPV4) |
+ BIT(GVE_RSS_HASH_TCPV6) |
+ BIT(GVE_RSS_HASH_UDPV6)),
+ .hash_alg = hash_alg,
+ .hash_key_size =
+ cpu_to_be16((key_bus) ? priv->rss_key_size : 0),
+ .hash_lut_size =
+ cpu_to_be16((lut_bus) ? priv->rss_lut_size : 0),
+ .hash_key_addr = cpu_to_be64(key_bus),
+ .hash_lut_addr = cpu_to_be64(lut_bus),
+ };
+
+ err = gve_adminq_execute_cmd(priv, &cmd);
+
+out:
+ if (lut)
+ dma_free_coherent(&priv->pdev->dev,
+ priv->rss_lut_size * sizeof(*lut),
+ lut, lut_bus);
+ if (key)
+ dma_free_coherent(&priv->pdev->dev,
+ priv->rss_key_size, key, key_bus);
+ return err;
+}
+
+/* In the dma memory that the driver allocated for the device to query the flow rules, the device
+ * will first write it with a struct of gve_query_flow_rules_descriptor. Next to it, the device
+ * will write an array of rules or rule ids with the count that specified in the descriptor.
+ * For GVE_FLOW_RULE_QUERY_STATS, the device will only write the descriptor.
+ */
+static int gve_adminq_process_flow_rules_query(struct gve_priv *priv, u16 query_opcode,
+ struct gve_query_flow_rules_descriptor *descriptor)
+{
+ struct gve_flow_rules_cache *flow_rules_cache = &priv->flow_rules_cache;
+ u32 num_queried_rules, total_memory_len, rule_info_len;
+ void *rule_info;
+
+ total_memory_len = be32_to_cpu(descriptor->total_length);
+ num_queried_rules = be32_to_cpu(descriptor->num_queried_rules);
+ rule_info = (void *)(descriptor + 1);
+
+ switch (query_opcode) {
+ case GVE_FLOW_RULE_QUERY_RULES:
+ rule_info_len = num_queried_rules * sizeof(*flow_rules_cache->rules_cache);
+ if (sizeof(*descriptor) + rule_info_len != total_memory_len) {
+ dev_err(&priv->dev->dev, "flow rules query is out of memory.\n");
+ return -ENOMEM;
+ }
+
+ memcpy(flow_rules_cache->rules_cache, rule_info, rule_info_len);
+ flow_rules_cache->rules_cache_num = num_queried_rules;
+ break;
+ case GVE_FLOW_RULE_QUERY_IDS:
+ rule_info_len = num_queried_rules * sizeof(*flow_rules_cache->rule_ids_cache);
+ if (sizeof(*descriptor) + rule_info_len != total_memory_len) {
+ dev_err(&priv->dev->dev, "flow rule ids query is out of memory.\n");
+ return -ENOMEM;
+ }
+
+ memcpy(flow_rules_cache->rule_ids_cache, rule_info, rule_info_len);
+ flow_rules_cache->rule_ids_cache_num = num_queried_rules;
+ break;
+ case GVE_FLOW_RULE_QUERY_STATS:
+ priv->num_flow_rules = be32_to_cpu(descriptor->num_flow_rules);
+ priv->max_flow_rules = be32_to_cpu(descriptor->max_flow_rules);
+ return 0;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int gve_adminq_query_flow_rules(struct gve_priv *priv, u16 query_opcode, u32 starting_loc)
+{
+ struct gve_query_flow_rules_descriptor *descriptor;
+ union gve_adminq_command cmd;
+ dma_addr_t descriptor_bus;
+ int err = 0;
+
+ memset(&cmd, 0, sizeof(cmd));
+ descriptor = dma_pool_alloc(priv->adminq_pool, GFP_KERNEL, &descriptor_bus);
+ if (!descriptor)
+ return -ENOMEM;
+
+ cmd.opcode = cpu_to_be32(GVE_ADMINQ_QUERY_FLOW_RULES);
+ cmd.query_flow_rules = (struct gve_adminq_query_flow_rules) {
+ .opcode = cpu_to_be16(query_opcode),
+ .starting_rule_id = cpu_to_be32(starting_loc),
+ .available_length = cpu_to_be64(GVE_ADMINQ_BUFFER_SIZE),
+ .rule_descriptor_addr = cpu_to_be64(descriptor_bus),
+ };
+ err = gve_adminq_execute_cmd(priv, &cmd);
+ if (err)
+ goto out;
+
+ err = gve_adminq_process_flow_rules_query(priv, query_opcode, descriptor);
+
+out:
+ dma_pool_free(priv->adminq_pool, descriptor, descriptor_bus);
+ return err;
+}
+
+static int gve_adminq_process_rss_query(struct gve_priv *priv,
+ struct gve_query_rss_descriptor *descriptor,
+ struct ethtool_rxfh_param *rxfh)
+{
+ u32 total_memory_length;
+ u16 hash_lut_length;
+ void *rss_info_addr;
+ __be32 *lut;
+ u16 i;
+
+ total_memory_length = be32_to_cpu(descriptor->total_length);
+ hash_lut_length = priv->rss_lut_size * sizeof(*rxfh->indir);
+
+ if (sizeof(*descriptor) + priv->rss_key_size + hash_lut_length != total_memory_length) {
+ dev_err(&priv->dev->dev,
+ "rss query desc from device has invalid length parameter.\n");
+ return -EINVAL;
+ }
+
+ rxfh->hfunc = descriptor->hash_alg;
+
+ rss_info_addr = (void *)(descriptor + 1);
+ if (rxfh->key) {
+ rxfh->key_size = priv->rss_key_size;
+ memcpy(rxfh->key, rss_info_addr, priv->rss_key_size);
+ }
+
+ rss_info_addr += priv->rss_key_size;
+ lut = (__be32 *)rss_info_addr;
+ if (rxfh->indir) {
+ rxfh->indir_size = priv->rss_lut_size;
+ for (i = 0; i < priv->rss_lut_size; i++)
+ rxfh->indir[i] = be32_to_cpu(lut[i]);
+ }
+
+ return 0;
+}
+
+int gve_adminq_query_rss_config(struct gve_priv *priv, struct ethtool_rxfh_param *rxfh)
+{
+ struct gve_query_rss_descriptor *descriptor;
+ union gve_adminq_command cmd;
+ dma_addr_t descriptor_bus;
+ int err = 0;
+
+ descriptor = dma_pool_alloc(priv->adminq_pool, GFP_KERNEL, &descriptor_bus);
+ if (!descriptor)
+ return -ENOMEM;
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.opcode = cpu_to_be32(GVE_ADMINQ_QUERY_RSS);
+ cmd.query_rss = (struct gve_adminq_query_rss) {
+ .available_length = cpu_to_be64(GVE_ADMINQ_BUFFER_SIZE),
+ .rss_descriptor_addr = cpu_to_be64(descriptor_bus),
+ };
+ err = gve_adminq_execute_cmd(priv, &cmd);
+ if (err)
+ goto out;
+
+ err = gve_adminq_process_rss_query(priv, descriptor, rxfh);
+
+out:
+ dma_pool_free(priv->adminq_pool, descriptor, descriptor_bus);
+ return err;
+}
diff --git a/drivers/net/ethernet/google/gve/gve_adminq.h b/drivers/net/ethernet/google/gve/gve_adminq.h
index 5865ccdccbd0..228217458275 100644
--- a/drivers/net/ethernet/google/gve/gve_adminq.h
+++ b/drivers/net/ethernet/google/gve/gve_adminq.h
@@ -20,11 +20,26 @@ enum gve_adminq_opcodes {
GVE_ADMINQ_DESTROY_TX_QUEUE = 0x7,
GVE_ADMINQ_DESTROY_RX_QUEUE = 0x8,
GVE_ADMINQ_DECONFIGURE_DEVICE_RESOURCES = 0x9,
+ GVE_ADMINQ_CONFIGURE_RSS = 0xA,
GVE_ADMINQ_SET_DRIVER_PARAMETER = 0xB,
GVE_ADMINQ_REPORT_STATS = 0xC,
GVE_ADMINQ_REPORT_LINK_SPEED = 0xD,
GVE_ADMINQ_GET_PTYPE_MAP = 0xE,
GVE_ADMINQ_VERIFY_DRIVER_COMPATIBILITY = 0xF,
+ GVE_ADMINQ_QUERY_FLOW_RULES = 0x10,
+ GVE_ADMINQ_QUERY_RSS = 0x12,
+
+ /* For commands that are larger than 56 bytes */
+ GVE_ADMINQ_EXTENDED_COMMAND = 0xFF,
+};
+
+/* The normal adminq command is restricted to be 56 bytes at maximum. For the
+ * longer adminq command, it is wrapped by GVE_ADMINQ_EXTENDED_COMMAND with
+ * inner opcode of gve_adminq_extended_cmd_opcodes specified. The inner command
+ * is written in the dma memory allocated by GVE_ADMINQ_EXTENDED_COMMAND.
+ */
+enum gve_adminq_extended_cmd_opcodes {
+ GVE_ADMINQ_CONFIGURE_FLOW_RULE = 0x101,
};
/* Admin queue status codes */
@@ -103,8 +118,7 @@ static_assert(sizeof(struct gve_device_option_gqi_qpl) == 4);
struct gve_device_option_dqo_rda {
__be32 supported_features_mask;
- __be16 tx_comp_ring_entries;
- __be16 rx_buff_ring_entries;
+ __be32 reserved;
};
static_assert(sizeof(struct gve_device_option_dqo_rda) == 8);
@@ -125,6 +139,41 @@ struct gve_device_option_jumbo_frames {
static_assert(sizeof(struct gve_device_option_jumbo_frames) == 8);
+struct gve_device_option_buffer_sizes {
+ /* GVE_SUP_BUFFER_SIZES_MASK bit should be set */
+ __be32 supported_features_mask;
+ __be16 packet_buffer_size;
+ __be16 header_buffer_size;
+};
+
+static_assert(sizeof(struct gve_device_option_buffer_sizes) == 8);
+
+struct gve_device_option_modify_ring {
+ __be32 supported_featured_mask;
+ __be16 max_rx_ring_size;
+ __be16 max_tx_ring_size;
+ __be16 min_rx_ring_size;
+ __be16 min_tx_ring_size;
+};
+
+static_assert(sizeof(struct gve_device_option_modify_ring) == 12);
+
+struct gve_device_option_flow_steering {
+ __be32 supported_features_mask;
+ __be32 reserved;
+ __be32 max_flow_rules;
+};
+
+static_assert(sizeof(struct gve_device_option_flow_steering) == 12);
+
+struct gve_device_option_rss_config {
+ __be32 supported_features_mask;
+ __be16 hash_key_size;
+ __be16 hash_lut_size;
+};
+
+static_assert(sizeof(struct gve_device_option_rss_config) == 8);
+
/* Terminology:
*
* RDA - Raw DMA Addressing - Buffers associated with SKBs are directly DMA
@@ -134,25 +183,37 @@ static_assert(sizeof(struct gve_device_option_jumbo_frames) == 8);
* the device for read/write and data is copied from/to SKBs.
*/
enum gve_dev_opt_id {
- GVE_DEV_OPT_ID_GQI_RAW_ADDRESSING = 0x1,
- GVE_DEV_OPT_ID_GQI_RDA = 0x2,
- GVE_DEV_OPT_ID_GQI_QPL = 0x3,
- GVE_DEV_OPT_ID_DQO_RDA = 0x4,
- GVE_DEV_OPT_ID_DQO_QPL = 0x7,
- GVE_DEV_OPT_ID_JUMBO_FRAMES = 0x8,
+ GVE_DEV_OPT_ID_GQI_RAW_ADDRESSING = 0x1,
+ GVE_DEV_OPT_ID_GQI_RDA = 0x2,
+ GVE_DEV_OPT_ID_GQI_QPL = 0x3,
+ GVE_DEV_OPT_ID_DQO_RDA = 0x4,
+ GVE_DEV_OPT_ID_MODIFY_RING = 0x6,
+ GVE_DEV_OPT_ID_DQO_QPL = 0x7,
+ GVE_DEV_OPT_ID_JUMBO_FRAMES = 0x8,
+ GVE_DEV_OPT_ID_BUFFER_SIZES = 0xa,
+ GVE_DEV_OPT_ID_FLOW_STEERING = 0xb,
+ GVE_DEV_OPT_ID_RSS_CONFIG = 0xe,
};
enum gve_dev_opt_req_feat_mask {
- GVE_DEV_OPT_REQ_FEAT_MASK_GQI_RAW_ADDRESSING = 0x0,
- GVE_DEV_OPT_REQ_FEAT_MASK_GQI_RDA = 0x0,
- GVE_DEV_OPT_REQ_FEAT_MASK_GQI_QPL = 0x0,
- GVE_DEV_OPT_REQ_FEAT_MASK_DQO_RDA = 0x0,
- GVE_DEV_OPT_REQ_FEAT_MASK_JUMBO_FRAMES = 0x0,
- GVE_DEV_OPT_REQ_FEAT_MASK_DQO_QPL = 0x0,
+ GVE_DEV_OPT_REQ_FEAT_MASK_GQI_RAW_ADDRESSING = 0x0,
+ GVE_DEV_OPT_REQ_FEAT_MASK_GQI_RDA = 0x0,
+ GVE_DEV_OPT_REQ_FEAT_MASK_GQI_QPL = 0x0,
+ GVE_DEV_OPT_REQ_FEAT_MASK_DQO_RDA = 0x0,
+ GVE_DEV_OPT_REQ_FEAT_MASK_JUMBO_FRAMES = 0x0,
+ GVE_DEV_OPT_REQ_FEAT_MASK_DQO_QPL = 0x0,
+ GVE_DEV_OPT_REQ_FEAT_MASK_BUFFER_SIZES = 0x0,
+ GVE_DEV_OPT_REQ_FEAT_MASK_MODIFY_RING = 0x0,
+ GVE_DEV_OPT_REQ_FEAT_MASK_FLOW_STEERING = 0x0,
+ GVE_DEV_OPT_REQ_FEAT_MASK_RSS_CONFIG = 0x0,
};
enum gve_sup_feature_mask {
- GVE_SUP_JUMBO_FRAMES_MASK = 1 << 2,
+ GVE_SUP_MODIFY_RING_MASK = 1 << 0,
+ GVE_SUP_JUMBO_FRAMES_MASK = 1 << 2,
+ GVE_SUP_BUFFER_SIZES_MASK = 1 << 4,
+ GVE_SUP_FLOW_STEERING_MASK = 1 << 5,
+ GVE_SUP_RSS_CONFIG_MASK = 1 << 7,
};
#define GVE_DEV_OPT_LEN_GQI_RAW_ADDRESSING 0x0
@@ -165,6 +226,8 @@ enum gve_driver_capbility {
gve_driver_capability_dqo_qpl = 2, /* reserved for future use */
gve_driver_capability_dqo_rda = 3,
gve_driver_capability_alt_miss_compl = 4,
+ gve_driver_capability_flexible_buffer_size = 5,
+ gve_driver_capability_flexible_rss_size = 6,
};
#define GVE_CAP1(a) BIT((int)a)
@@ -176,12 +239,22 @@ enum gve_driver_capbility {
(GVE_CAP1(gve_driver_capability_gqi_qpl) | \
GVE_CAP1(gve_driver_capability_gqi_rda) | \
GVE_CAP1(gve_driver_capability_dqo_rda) | \
- GVE_CAP1(gve_driver_capability_alt_miss_compl))
+ GVE_CAP1(gve_driver_capability_alt_miss_compl) | \
+ GVE_CAP1(gve_driver_capability_flexible_buffer_size) | \
+ GVE_CAP1(gve_driver_capability_flexible_rss_size))
#define GVE_DRIVER_CAPABILITY_FLAGS2 0x0
#define GVE_DRIVER_CAPABILITY_FLAGS3 0x0
#define GVE_DRIVER_CAPABILITY_FLAGS4 0x0
+struct gve_adminq_extended_command {
+ __be32 inner_opcode;
+ __be32 inner_length;
+ __be64 inner_command_addr;
+};
+
+static_assert(sizeof(struct gve_adminq_extended_command) == 16);
+
struct gve_driver_info {
u8 os_type; /* 0x01 = Linux */
u8 driver_major;
@@ -260,7 +333,9 @@ struct gve_adminq_create_rx_queue {
__be16 packet_buffer_size;
__be16 rx_buff_ring_size;
u8 enable_rsc;
- u8 padding[5];
+ u8 padding1;
+ __be16 header_buffer_size;
+ u8 padding2[2];
};
static_assert(sizeof(struct gve_adminq_create_rx_queue) == 56);
@@ -384,6 +459,109 @@ struct gve_adminq_get_ptype_map {
__be64 ptype_map_addr;
};
+/* Flow-steering related definitions */
+enum gve_adminq_flow_rule_cfg_opcode {
+ GVE_FLOW_RULE_CFG_ADD = 0,
+ GVE_FLOW_RULE_CFG_DEL = 1,
+ GVE_FLOW_RULE_CFG_RESET = 2,
+};
+
+enum gve_adminq_flow_rule_query_opcode {
+ GVE_FLOW_RULE_QUERY_RULES = 0,
+ GVE_FLOW_RULE_QUERY_IDS = 1,
+ GVE_FLOW_RULE_QUERY_STATS = 2,
+};
+
+enum gve_adminq_flow_type {
+ GVE_FLOW_TYPE_TCPV4,
+ GVE_FLOW_TYPE_UDPV4,
+ GVE_FLOW_TYPE_SCTPV4,
+ GVE_FLOW_TYPE_AHV4,
+ GVE_FLOW_TYPE_ESPV4,
+ GVE_FLOW_TYPE_TCPV6,
+ GVE_FLOW_TYPE_UDPV6,
+ GVE_FLOW_TYPE_SCTPV6,
+ GVE_FLOW_TYPE_AHV6,
+ GVE_FLOW_TYPE_ESPV6,
+};
+
+/* Flow-steering command */
+struct gve_adminq_flow_rule {
+ __be16 flow_type;
+ __be16 action; /* RX queue id */
+ struct gve_flow_spec key;
+ struct gve_flow_spec mask;
+};
+
+struct gve_adminq_configure_flow_rule {
+ __be16 opcode;
+ u8 padding[2];
+ struct gve_adminq_flow_rule rule;
+ __be32 location;
+};
+
+static_assert(sizeof(struct gve_adminq_configure_flow_rule) == 92);
+
+struct gve_query_flow_rules_descriptor {
+ __be32 num_flow_rules;
+ __be32 max_flow_rules;
+ __be32 num_queried_rules;
+ __be32 total_length;
+};
+
+struct gve_adminq_queried_flow_rule {
+ __be32 location;
+ struct gve_adminq_flow_rule flow_rule;
+};
+
+struct gve_adminq_query_flow_rules {
+ __be16 opcode;
+ u8 padding[2];
+ __be32 starting_rule_id;
+ __be64 available_length; /* The dma memory length that the driver allocated */
+ __be64 rule_descriptor_addr; /* The dma memory address */
+};
+
+static_assert(sizeof(struct gve_adminq_query_flow_rules) == 24);
+
+enum gve_rss_hash_type {
+ GVE_RSS_HASH_IPV4,
+ GVE_RSS_HASH_TCPV4,
+ GVE_RSS_HASH_IPV6,
+ GVE_RSS_HASH_IPV6_EX,
+ GVE_RSS_HASH_TCPV6,
+ GVE_RSS_HASH_TCPV6_EX,
+ GVE_RSS_HASH_UDPV4,
+ GVE_RSS_HASH_UDPV6,
+ GVE_RSS_HASH_UDPV6_EX,
+};
+
+struct gve_adminq_configure_rss {
+ __be16 hash_types;
+ u8 hash_alg;
+ u8 reserved;
+ __be16 hash_key_size;
+ __be16 hash_lut_size;
+ __be64 hash_key_addr;
+ __be64 hash_lut_addr;
+};
+
+static_assert(sizeof(struct gve_adminq_configure_rss) == 24);
+
+struct gve_query_rss_descriptor {
+ __be32 total_length;
+ __be16 hash_types;
+ u8 hash_alg;
+ u8 reserved;
+};
+
+struct gve_adminq_query_rss {
+ __be64 available_length;
+ __be64 rss_descriptor_addr;
+};
+
+static_assert(sizeof(struct gve_adminq_query_rss) == 16);
+
union gve_adminq_command {
struct {
__be32 opcode;
@@ -404,6 +582,10 @@ union gve_adminq_command {
struct gve_adminq_get_ptype_map get_ptype_map;
struct gve_adminq_verify_driver_compatibility
verify_driver_compatibility;
+ struct gve_adminq_query_flow_rules query_flow_rules;
+ struct gve_adminq_configure_rss configure_rss;
+ struct gve_adminq_query_rss query_rss;
+ struct gve_adminq_extended_command extended_command;
};
};
u8 reserved[64];
@@ -423,18 +605,25 @@ int gve_adminq_configure_device_resources(struct gve_priv *priv,
int gve_adminq_deconfigure_device_resources(struct gve_priv *priv);
int gve_adminq_create_tx_queues(struct gve_priv *priv, u32 start_id, u32 num_queues);
int gve_adminq_destroy_tx_queues(struct gve_priv *priv, u32 start_id, u32 num_queues);
+int gve_adminq_create_single_rx_queue(struct gve_priv *priv, u32 queue_index);
int gve_adminq_create_rx_queues(struct gve_priv *priv, u32 num_queues);
+int gve_adminq_destroy_single_rx_queue(struct gve_priv *priv, u32 queue_index);
int gve_adminq_destroy_rx_queues(struct gve_priv *priv, u32 queue_id);
int gve_adminq_register_page_list(struct gve_priv *priv,
struct gve_queue_page_list *qpl);
int gve_adminq_unregister_page_list(struct gve_priv *priv, u32 page_list_id);
-int gve_adminq_set_mtu(struct gve_priv *priv, u64 mtu);
int gve_adminq_report_stats(struct gve_priv *priv, u64 stats_report_len,
dma_addr_t stats_report_addr, u64 interval);
int gve_adminq_verify_driver_compatibility(struct gve_priv *priv,
u64 driver_info_len,
dma_addr_t driver_info_addr);
int gve_adminq_report_link_speed(struct gve_priv *priv);
+int gve_adminq_add_flow_rule(struct gve_priv *priv, struct gve_adminq_flow_rule *rule, u32 loc);
+int gve_adminq_del_flow_rule(struct gve_priv *priv, u32 loc);
+int gve_adminq_reset_flow_rules(struct gve_priv *priv);
+int gve_adminq_query_flow_rules(struct gve_priv *priv, u16 query_opcode, u32 starting_loc);
+int gve_adminq_configure_rss(struct gve_priv *priv, struct ethtool_rxfh_param *rxfh);
+int gve_adminq_query_rss_config(struct gve_priv *priv, struct ethtool_rxfh_param *rxfh);
struct gve_ptype_lut;
int gve_adminq_get_ptype_map_dqo(struct gve_priv *priv,
diff --git a/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c b/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c
new file mode 100644
index 000000000000..a71883e1d920
--- /dev/null
+++ b/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c
@@ -0,0 +1,316 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Google virtual Ethernet (gve) driver
+ *
+ * Copyright (C) 2015-2024 Google, Inc.
+ */
+
+#include "gve.h"
+#include "gve_utils.h"
+
+int gve_buf_ref_cnt(struct gve_rx_buf_state_dqo *bs)
+{
+ return page_count(bs->page_info.page) - bs->page_info.pagecnt_bias;
+}
+
+struct gve_rx_buf_state_dqo *gve_alloc_buf_state(struct gve_rx_ring *rx)
+{
+ struct gve_rx_buf_state_dqo *buf_state;
+ s16 buffer_id;
+
+ buffer_id = rx->dqo.free_buf_states;
+ if (unlikely(buffer_id == -1))
+ return NULL;
+
+ buf_state = &rx->dqo.buf_states[buffer_id];
+
+ /* Remove buf_state from free list */
+ rx->dqo.free_buf_states = buf_state->next;
+
+ /* Point buf_state to itself to mark it as allocated */
+ buf_state->next = buffer_id;
+
+ return buf_state;
+}
+
+bool gve_buf_state_is_allocated(struct gve_rx_ring *rx,
+ struct gve_rx_buf_state_dqo *buf_state)
+{
+ s16 buffer_id = buf_state - rx->dqo.buf_states;
+
+ return buf_state->next == buffer_id;
+}
+
+void gve_free_buf_state(struct gve_rx_ring *rx,
+ struct gve_rx_buf_state_dqo *buf_state)
+{
+ s16 buffer_id = buf_state - rx->dqo.buf_states;
+
+ buf_state->next = rx->dqo.free_buf_states;
+ rx->dqo.free_buf_states = buffer_id;
+}
+
+struct gve_rx_buf_state_dqo *gve_dequeue_buf_state(struct gve_rx_ring *rx,
+ struct gve_index_list *list)
+{
+ struct gve_rx_buf_state_dqo *buf_state;
+ s16 buffer_id;
+
+ buffer_id = list->head;
+ if (unlikely(buffer_id == -1))
+ return NULL;
+
+ buf_state = &rx->dqo.buf_states[buffer_id];
+
+ /* Remove buf_state from list */
+ list->head = buf_state->next;
+ if (buf_state->next == -1)
+ list->tail = -1;
+
+ /* Point buf_state to itself to mark it as allocated */
+ buf_state->next = buffer_id;
+
+ return buf_state;
+}
+
+void gve_enqueue_buf_state(struct gve_rx_ring *rx, struct gve_index_list *list,
+ struct gve_rx_buf_state_dqo *buf_state)
+{
+ s16 buffer_id = buf_state - rx->dqo.buf_states;
+
+ buf_state->next = -1;
+
+ if (list->head == -1) {
+ list->head = buffer_id;
+ list->tail = buffer_id;
+ } else {
+ int tail = list->tail;
+
+ rx->dqo.buf_states[tail].next = buffer_id;
+ list->tail = buffer_id;
+ }
+}
+
+struct gve_rx_buf_state_dqo *gve_get_recycled_buf_state(struct gve_rx_ring *rx)
+{
+ struct gve_rx_buf_state_dqo *buf_state;
+ int i;
+
+ /* Recycled buf states are immediately usable. */
+ buf_state = gve_dequeue_buf_state(rx, &rx->dqo.recycled_buf_states);
+ if (likely(buf_state))
+ return buf_state;
+
+ if (unlikely(rx->dqo.used_buf_states.head == -1))
+ return NULL;
+
+ /* Used buf states are only usable when ref count reaches 0, which means
+ * no SKBs refer to them.
+ *
+ * Search a limited number before giving up.
+ */
+ for (i = 0; i < 5; i++) {
+ buf_state = gve_dequeue_buf_state(rx, &rx->dqo.used_buf_states);
+ if (gve_buf_ref_cnt(buf_state) == 0) {
+ rx->dqo.used_buf_states_cnt--;
+ return buf_state;
+ }
+
+ gve_enqueue_buf_state(rx, &rx->dqo.used_buf_states, buf_state);
+ }
+
+ return NULL;
+}
+
+int gve_alloc_qpl_page_dqo(struct gve_rx_ring *rx,
+ struct gve_rx_buf_state_dqo *buf_state)
+{
+ struct gve_priv *priv = rx->gve;
+ u32 idx;
+
+ idx = rx->dqo.next_qpl_page_idx;
+ if (idx >= gve_get_rx_pages_per_qpl_dqo(priv->rx_desc_cnt)) {
+ net_err_ratelimited("%s: Out of QPL pages\n",
+ priv->dev->name);
+ return -ENOMEM;
+ }
+ buf_state->page_info.page = rx->dqo.qpl->pages[idx];
+ buf_state->addr = rx->dqo.qpl->page_buses[idx];
+ rx->dqo.next_qpl_page_idx++;
+ buf_state->page_info.page_offset = 0;
+ buf_state->page_info.page_address =
+ page_address(buf_state->page_info.page);
+ buf_state->page_info.buf_size = rx->packet_buffer_truesize;
+ buf_state->page_info.pad = rx->rx_headroom;
+ buf_state->last_single_ref_offset = 0;
+
+ /* The page already has 1 ref. */
+ page_ref_add(buf_state->page_info.page, INT_MAX - 1);
+ buf_state->page_info.pagecnt_bias = INT_MAX;
+
+ return 0;
+}
+
+void gve_free_qpl_page_dqo(struct gve_rx_buf_state_dqo *buf_state)
+{
+ if (!buf_state->page_info.page)
+ return;
+
+ page_ref_sub(buf_state->page_info.page,
+ buf_state->page_info.pagecnt_bias - 1);
+ buf_state->page_info.page = NULL;
+}
+
+void gve_try_recycle_buf(struct gve_priv *priv, struct gve_rx_ring *rx,
+ struct gve_rx_buf_state_dqo *buf_state)
+{
+ const u16 data_buffer_size = rx->packet_buffer_truesize;
+ int pagecount;
+
+ /* Can't reuse if we only fit one buffer per page */
+ if (data_buffer_size * 2 > PAGE_SIZE)
+ goto mark_used;
+
+ pagecount = gve_buf_ref_cnt(buf_state);
+
+ /* Record the offset when we have a single remaining reference.
+ *
+ * When this happens, we know all of the other offsets of the page are
+ * usable.
+ */
+ if (pagecount == 1) {
+ buf_state->last_single_ref_offset =
+ buf_state->page_info.page_offset;
+ }
+
+ /* Use the next buffer sized chunk in the page. */
+ buf_state->page_info.page_offset += data_buffer_size;
+ buf_state->page_info.page_offset &= (PAGE_SIZE - 1);
+
+ /* If we wrap around to the same offset without ever dropping to 1
+ * reference, then we don't know if this offset was ever freed.
+ */
+ if (buf_state->page_info.page_offset ==
+ buf_state->last_single_ref_offset) {
+ goto mark_used;
+ }
+
+ gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state);
+ return;
+
+mark_used:
+ gve_enqueue_buf_state(rx, &rx->dqo.used_buf_states, buf_state);
+ rx->dqo.used_buf_states_cnt++;
+}
+
+void gve_free_to_page_pool(struct gve_rx_ring *rx,
+ struct gve_rx_buf_state_dqo *buf_state,
+ bool allow_direct)
+{
+ netmem_ref netmem = buf_state->page_info.netmem;
+
+ if (!netmem)
+ return;
+
+ page_pool_put_full_netmem(netmem_get_pp(netmem), netmem, allow_direct);
+ buf_state->page_info.netmem = 0;
+}
+
+static int gve_alloc_from_page_pool(struct gve_rx_ring *rx,
+ struct gve_rx_buf_state_dqo *buf_state)
+{
+ netmem_ref netmem;
+
+ buf_state->page_info.buf_size = rx->packet_buffer_truesize;
+ netmem = page_pool_alloc_netmem(rx->dqo.page_pool,
+ &buf_state->page_info.page_offset,
+ &buf_state->page_info.buf_size,
+ GFP_ATOMIC);
+
+ if (!netmem)
+ return -ENOMEM;
+
+ buf_state->page_info.netmem = netmem;
+ buf_state->page_info.page_address = netmem_address(netmem);
+ buf_state->addr = page_pool_get_dma_addr_netmem(netmem);
+ buf_state->page_info.pad = rx->dqo.page_pool->p.offset;
+
+ return 0;
+}
+
+struct page_pool *gve_rx_create_page_pool(struct gve_priv *priv,
+ struct gve_rx_ring *rx,
+ bool xdp)
+{
+ u32 ntfy_id = gve_rx_idx_to_ntfy(priv, rx->q_num);
+ struct page_pool_params pp = {
+ .flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV,
+ .order = 0,
+ .pool_size = GVE_PAGE_POOL_SIZE_MULTIPLIER * priv->rx_desc_cnt,
+ .dev = &priv->pdev->dev,
+ .netdev = priv->dev,
+ .napi = &priv->ntfy_blocks[ntfy_id].napi,
+ .max_len = PAGE_SIZE,
+ .dma_dir = xdp ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE,
+ .offset = xdp ? XDP_PACKET_HEADROOM : 0,
+ };
+
+ return page_pool_create(&pp);
+}
+
+void gve_free_buffer(struct gve_rx_ring *rx,
+ struct gve_rx_buf_state_dqo *buf_state)
+{
+ if (rx->dqo.page_pool) {
+ gve_free_to_page_pool(rx, buf_state, true);
+ gve_free_buf_state(rx, buf_state);
+ } else {
+ gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states,
+ buf_state);
+ }
+}
+
+void gve_reuse_buffer(struct gve_rx_ring *rx,
+ struct gve_rx_buf_state_dqo *buf_state)
+{
+ if (rx->dqo.page_pool) {
+ buf_state->page_info.netmem = 0;
+ gve_free_buf_state(rx, buf_state);
+ } else {
+ gve_dec_pagecnt_bias(&buf_state->page_info);
+ gve_try_recycle_buf(rx->gve, rx, buf_state);
+ }
+}
+
+int gve_alloc_buffer(struct gve_rx_ring *rx, struct gve_rx_desc_dqo *desc)
+{
+ struct gve_rx_buf_state_dqo *buf_state;
+
+ if (rx->dqo.page_pool) {
+ buf_state = gve_alloc_buf_state(rx);
+ if (WARN_ON_ONCE(!buf_state))
+ return -ENOMEM;
+
+ if (gve_alloc_from_page_pool(rx, buf_state))
+ goto free_buf_state;
+ } else {
+ buf_state = gve_get_recycled_buf_state(rx);
+ if (unlikely(!buf_state)) {
+ buf_state = gve_alloc_buf_state(rx);
+ if (unlikely(!buf_state))
+ return -ENOMEM;
+
+ if (unlikely(gve_alloc_qpl_page_dqo(rx, buf_state)))
+ goto free_buf_state;
+ }
+ }
+ desc->buf_id = cpu_to_le16(buf_state - rx->dqo.buf_states);
+ desc->buf_addr = cpu_to_le64(buf_state->addr +
+ buf_state->page_info.page_offset +
+ buf_state->page_info.pad);
+
+ return 0;
+
+free_buf_state:
+ gve_free_buf_state(rx, buf_state);
+ return -ENOMEM;
+}
diff --git a/drivers/net/ethernet/google/gve/gve_dqo.h b/drivers/net/ethernet/google/gve/gve_dqo.h
index c36b93f0de15..e83773fb891f 100644
--- a/drivers/net/ethernet/google/gve/gve_dqo.h
+++ b/drivers/net/ethernet/google/gve/gve_dqo.h
@@ -38,10 +38,24 @@ netdev_features_t gve_features_check_dqo(struct sk_buff *skb,
netdev_features_t features);
bool gve_tx_poll_dqo(struct gve_notify_block *block, bool do_clean);
int gve_rx_poll_dqo(struct gve_notify_block *block, int budget);
-int gve_tx_alloc_rings_dqo(struct gve_priv *priv);
-void gve_tx_free_rings_dqo(struct gve_priv *priv);
-int gve_rx_alloc_rings_dqo(struct gve_priv *priv);
-void gve_rx_free_rings_dqo(struct gve_priv *priv);
+int gve_tx_alloc_rings_dqo(struct gve_priv *priv,
+ struct gve_tx_alloc_rings_cfg *cfg);
+void gve_tx_free_rings_dqo(struct gve_priv *priv,
+ struct gve_tx_alloc_rings_cfg *cfg);
+void gve_tx_start_ring_dqo(struct gve_priv *priv, int idx);
+void gve_tx_stop_ring_dqo(struct gve_priv *priv, int idx);
+int gve_rx_alloc_ring_dqo(struct gve_priv *priv,
+ struct gve_rx_alloc_rings_cfg *cfg,
+ struct gve_rx_ring *rx,
+ int idx);
+void gve_rx_free_ring_dqo(struct gve_priv *priv, struct gve_rx_ring *rx,
+ struct gve_rx_alloc_rings_cfg *cfg);
+int gve_rx_alloc_rings_dqo(struct gve_priv *priv,
+ struct gve_rx_alloc_rings_cfg *cfg);
+void gve_rx_free_rings_dqo(struct gve_priv *priv,
+ struct gve_rx_alloc_rings_cfg *cfg);
+void gve_rx_start_ring_dqo(struct gve_priv *priv, int idx);
+void gve_rx_stop_ring_dqo(struct gve_priv *priv, int idx);
int gve_clean_tx_done_dqo(struct gve_priv *priv, struct gve_tx_ring *tx,
struct napi_struct *napi);
void gve_rx_post_buffers_dqo(struct gve_rx_ring *rx);
@@ -93,4 +107,6 @@ gve_set_itr_coalesce_usecs_dqo(struct gve_priv *priv,
gve_write_irq_doorbell_dqo(priv, block,
gve_setup_itr_interval_dqo(usecs));
}
+
+int gve_napi_poll_dqo(struct napi_struct *napi, int budget);
#endif /* _GVE_DQO_H_ */
diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c
index e5397aa1e48f..3c1da0cf3f61 100644
--- a/drivers/net/ethernet/google/gve/gve_ethtool.c
+++ b/drivers/net/ethernet/google/gve/gve_ethtool.c
@@ -1,14 +1,14 @@
// SPDX-License-Identifier: (GPL-2.0 OR MIT)
/* Google virtual Ethernet (gve) driver
*
- * Copyright (C) 2015-2021 Google, Inc.
+ * Copyright (C) 2015-2024 Google LLC
*/
-#include <linux/ethtool.h>
#include <linux/rtnetlink.h>
#include "gve.h"
#include "gve_adminq.h"
#include "gve_dqo.h"
+#include "gve_utils.h"
static void gve_get_drvinfo(struct net_device *netdev,
struct ethtool_drvinfo *info)
@@ -40,17 +40,18 @@ static u32 gve_get_msglevel(struct net_device *netdev)
* as declared in enum xdp_action inside file uapi/linux/bpf.h .
*/
static const char gve_gstrings_main_stats[][ETH_GSTRING_LEN] = {
- "rx_packets", "tx_packets", "rx_bytes", "tx_bytes",
- "rx_dropped", "tx_dropped", "tx_timeouts",
+ "rx_packets", "rx_hsplit_pkt", "tx_packets", "rx_bytes",
+ "tx_bytes", "rx_dropped", "tx_dropped", "tx_timeouts",
"rx_skb_alloc_fail", "rx_buf_alloc_fail", "rx_desc_err_dropped_pkt",
+ "rx_hsplit_unsplit_pkt",
"interface_up_cnt", "interface_down_cnt", "reset_cnt",
"page_alloc_fail", "dma_mapping_error", "stats_report_trigger_cnt",
};
static const char gve_gstrings_rx_stats[][ETH_GSTRING_LEN] = {
- "rx_posted_desc[%u]", "rx_completed_desc[%u]", "rx_consumed_desc[%u]", "rx_bytes[%u]",
- "rx_cont_packet_cnt[%u]", "rx_frag_flip_cnt[%u]", "rx_frag_copy_cnt[%u]",
- "rx_frag_alloc_cnt[%u]",
+ "rx_posted_desc[%u]", "rx_completed_desc[%u]", "rx_consumed_desc[%u]",
+ "rx_bytes[%u]", "rx_hsplit_bytes[%u]", "rx_cont_packet_cnt[%u]",
+ "rx_frag_flip_cnt[%u]", "rx_frag_copy_cnt[%u]", "rx_frag_alloc_cnt[%u]",
"rx_dropped_pkt[%u]", "rx_copybreak_pkt[%u]", "rx_copied_pkt[%u]",
"rx_queue_drop_cnt[%u]", "rx_no_buffers_posted[%u]",
"rx_drops_packet_over_mru[%u]", "rx_drops_invalid_checksum[%u]",
@@ -62,18 +63,20 @@ static const char gve_gstrings_rx_stats[][ETH_GSTRING_LEN] = {
static const char gve_gstrings_tx_stats[][ETH_GSTRING_LEN] = {
"tx_posted_desc[%u]", "tx_completed_desc[%u]", "tx_consumed_desc[%u]", "tx_bytes[%u]",
"tx_wake[%u]", "tx_stop[%u]", "tx_event_counter[%u]",
- "tx_dma_mapping_error[%u]", "tx_xsk_wakeup[%u]",
- "tx_xsk_done[%u]", "tx_xsk_sent[%u]", "tx_xdp_xmit[%u]", "tx_xdp_xmit_errors[%u]"
+ "tx_dma_mapping_error[%u]",
+ "tx_xsk_sent[%u]", "tx_xdp_xmit[%u]", "tx_xdp_xmit_errors[%u]"
};
-static const char gve_gstrings_adminq_stats[][ETH_GSTRING_LEN] = {
+static const char gve_gstrings_adminq_stats[][ETH_GSTRING_LEN] __nonstring_array = {
"adminq_prod_cnt", "adminq_cmd_fail", "adminq_timeouts",
"adminq_describe_device_cnt", "adminq_cfg_device_resources_cnt",
"adminq_register_page_list_cnt", "adminq_unregister_page_list_cnt",
"adminq_create_tx_queue_cnt", "adminq_create_rx_queue_cnt",
"adminq_destroy_tx_queue_cnt", "adminq_destroy_rx_queue_cnt",
"adminq_dcfg_device_resources_cnt", "adminq_set_driver_parameter_cnt",
- "adminq_report_stats_cnt", "adminq_report_link_speed_cnt"
+ "adminq_report_stats_cnt", "adminq_report_link_speed_cnt", "adminq_get_ptype_map_cnt",
+ "adminq_query_flow_rules", "adminq_cfg_flow_rule", "adminq_cfg_rss_cnt",
+ "adminq_query_rss_cnt",
};
static const char gve_gstrings_priv_flags[][ETH_GSTRING_LEN] = {
@@ -89,42 +92,34 @@ static const char gve_gstrings_priv_flags[][ETH_GSTRING_LEN] = {
static void gve_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
{
struct gve_priv *priv = netdev_priv(netdev);
- char *s = (char *)data;
+ u8 *s = (char *)data;
int num_tx_queues;
int i, j;
num_tx_queues = gve_num_tx_queues(priv);
switch (stringset) {
case ETH_SS_STATS:
- memcpy(s, *gve_gstrings_main_stats,
- sizeof(gve_gstrings_main_stats));
- s += sizeof(gve_gstrings_main_stats);
-
- for (i = 0; i < priv->rx_cfg.num_queues; i++) {
- for (j = 0; j < NUM_GVE_RX_CNTS; j++) {
- snprintf(s, ETH_GSTRING_LEN,
- gve_gstrings_rx_stats[j], i);
- s += ETH_GSTRING_LEN;
- }
- }
+ for (i = 0; i < ARRAY_SIZE(gve_gstrings_main_stats); i++)
+ ethtool_puts(&s, gve_gstrings_main_stats[i]);
- for (i = 0; i < num_tx_queues; i++) {
- for (j = 0; j < NUM_GVE_TX_CNTS; j++) {
- snprintf(s, ETH_GSTRING_LEN,
- gve_gstrings_tx_stats[j], i);
- s += ETH_GSTRING_LEN;
- }
- }
+ for (i = 0; i < priv->rx_cfg.num_queues; i++)
+ for (j = 0; j < NUM_GVE_RX_CNTS; j++)
+ ethtool_sprintf(&s, gve_gstrings_rx_stats[j],
+ i);
+
+ for (i = 0; i < num_tx_queues; i++)
+ for (j = 0; j < NUM_GVE_TX_CNTS; j++)
+ ethtool_sprintf(&s, gve_gstrings_tx_stats[j],
+ i);
+
+ for (i = 0; i < ARRAY_SIZE(gve_gstrings_adminq_stats); i++)
+ ethtool_cpy(&s, gve_gstrings_adminq_stats[i]);
- memcpy(s, *gve_gstrings_adminq_stats,
- sizeof(gve_gstrings_adminq_stats));
- s += sizeof(gve_gstrings_adminq_stats);
break;
case ETH_SS_PRIV_FLAGS:
- memcpy(s, *gve_gstrings_priv_flags,
- sizeof(gve_gstrings_priv_flags));
- s += sizeof(gve_gstrings_priv_flags);
+ for (i = 0; i < ARRAY_SIZE(gve_gstrings_priv_flags); i++)
+ ethtool_puts(&s, gve_gstrings_priv_flags[i]);
break;
default:
@@ -154,15 +149,19 @@ static void
gve_get_ethtool_stats(struct net_device *netdev,
struct ethtool_stats *stats, u64 *data)
{
- u64 tmp_rx_pkts, tmp_rx_bytes, tmp_rx_skb_alloc_fail,
- tmp_rx_buf_alloc_fail, tmp_rx_desc_err_dropped_pkt,
+ u64 tmp_rx_pkts, tmp_rx_hsplit_pkt, tmp_rx_bytes, tmp_rx_hsplit_bytes,
+ tmp_rx_skb_alloc_fail, tmp_rx_buf_alloc_fail,
+ tmp_rx_desc_err_dropped_pkt, tmp_rx_hsplit_unsplit_pkt,
tmp_tx_pkts, tmp_tx_bytes;
- u64 rx_buf_alloc_fail, rx_desc_err_dropped_pkt, rx_pkts,
- rx_skb_alloc_fail, rx_bytes, tx_pkts, tx_bytes, tx_dropped;
+ u64 rx_buf_alloc_fail, rx_desc_err_dropped_pkt, rx_hsplit_unsplit_pkt,
+ rx_pkts, rx_hsplit_pkt, rx_skb_alloc_fail, rx_bytes, tx_pkts, tx_bytes,
+ tx_dropped;
int stats_idx, base_stats_idx, max_stats_idx;
struct stats *report_stats;
int *rx_qid_to_stats_idx;
int *tx_qid_to_stats_idx;
+ int num_stopped_rxqs = 0;
+ int num_stopped_txqs = 0;
struct gve_priv *priv;
bool skip_nic_stats;
unsigned int start;
@@ -179,14 +178,27 @@ gve_get_ethtool_stats(struct net_device *netdev,
sizeof(int), GFP_KERNEL);
if (!rx_qid_to_stats_idx)
return;
+ for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) {
+ rx_qid_to_stats_idx[ring] = -1;
+ if (!gve_rx_was_added_to_block(priv, ring))
+ num_stopped_rxqs++;
+ }
tx_qid_to_stats_idx = kmalloc_array(num_tx_queues,
sizeof(int), GFP_KERNEL);
if (!tx_qid_to_stats_idx) {
kfree(rx_qid_to_stats_idx);
return;
}
- for (rx_pkts = 0, rx_bytes = 0, rx_skb_alloc_fail = 0,
- rx_buf_alloc_fail = 0, rx_desc_err_dropped_pkt = 0, ring = 0;
+ for (ring = 0; ring < num_tx_queues; ring++) {
+ tx_qid_to_stats_idx[ring] = -1;
+ if (!gve_tx_was_added_to_block(priv, ring))
+ num_stopped_txqs++;
+ }
+
+ for (rx_pkts = 0, rx_bytes = 0, rx_hsplit_pkt = 0,
+ rx_skb_alloc_fail = 0, rx_buf_alloc_fail = 0,
+ rx_desc_err_dropped_pkt = 0, rx_hsplit_unsplit_pkt = 0,
+ ring = 0;
ring < priv->rx_cfg.num_queues; ring++) {
if (priv->rx) {
do {
@@ -195,18 +207,23 @@ gve_get_ethtool_stats(struct net_device *netdev,
start =
u64_stats_fetch_begin(&priv->rx[ring].statss);
tmp_rx_pkts = rx->rpackets;
+ tmp_rx_hsplit_pkt = rx->rx_hsplit_pkt;
tmp_rx_bytes = rx->rbytes;
tmp_rx_skb_alloc_fail = rx->rx_skb_alloc_fail;
tmp_rx_buf_alloc_fail = rx->rx_buf_alloc_fail;
tmp_rx_desc_err_dropped_pkt =
rx->rx_desc_err_dropped_pkt;
+ tmp_rx_hsplit_unsplit_pkt =
+ rx->rx_hsplit_unsplit_pkt;
} while (u64_stats_fetch_retry(&priv->rx[ring].statss,
start));
rx_pkts += tmp_rx_pkts;
+ rx_hsplit_pkt += tmp_rx_hsplit_pkt;
rx_bytes += tmp_rx_bytes;
rx_skb_alloc_fail += tmp_rx_skb_alloc_fail;
rx_buf_alloc_fail += tmp_rx_buf_alloc_fail;
rx_desc_err_dropped_pkt += tmp_rx_desc_err_dropped_pkt;
+ rx_hsplit_unsplit_pkt += tmp_rx_hsplit_unsplit_pkt;
}
}
for (tx_pkts = 0, tx_bytes = 0, tx_dropped = 0, ring = 0;
@@ -227,6 +244,7 @@ gve_get_ethtool_stats(struct net_device *netdev,
i = 0;
data[i++] = rx_pkts;
+ data[i++] = rx_hsplit_pkt;
data[i++] = tx_pkts;
data[i++] = rx_bytes;
data[i++] = tx_bytes;
@@ -238,6 +256,7 @@ gve_get_ethtool_stats(struct net_device *netdev,
data[i++] = rx_skb_alloc_fail;
data[i++] = rx_buf_alloc_fail;
data[i++] = rx_desc_err_dropped_pkt;
+ data[i++] = rx_hsplit_unsplit_pkt;
data[i++] = priv->interface_up_cnt;
data[i++] = priv->interface_down_cnt;
data[i++] = priv->reset_cnt;
@@ -249,7 +268,13 @@ gve_get_ethtool_stats(struct net_device *netdev,
/* For rx cross-reporting stats, start from nic rx stats in report */
base_stats_idx = GVE_TX_STATS_REPORT_NUM * num_tx_queues +
GVE_RX_STATS_REPORT_NUM * priv->rx_cfg.num_queues;
- max_stats_idx = NIC_RX_STATS_REPORT_NUM * priv->rx_cfg.num_queues +
+ /* The boundary between driver stats and NIC stats shifts if there are
+ * stopped queues.
+ */
+ base_stats_idx += NIC_RX_STATS_REPORT_NUM * num_stopped_rxqs +
+ NIC_TX_STATS_REPORT_NUM * num_stopped_txqs;
+ max_stats_idx = NIC_RX_STATS_REPORT_NUM *
+ (priv->rx_cfg.num_queues - num_stopped_rxqs) +
base_stats_idx;
/* Preprocess the stats report for rx, map queue id to start index */
skip_nic_stats = false;
@@ -263,6 +288,10 @@ gve_get_ethtool_stats(struct net_device *netdev,
skip_nic_stats = true;
break;
}
+ if (queue_id < 0 || queue_id >= priv->rx_cfg.num_queues) {
+ net_err_ratelimited("Invalid rxq id in NIC stats\n");
+ continue;
+ }
rx_qid_to_stats_idx[queue_id] = stats_idx;
}
/* walk RX rings */
@@ -277,6 +306,7 @@ gve_get_ethtool_stats(struct net_device *netdev,
start =
u64_stats_fetch_begin(&priv->rx[ring].statss);
tmp_rx_bytes = rx->rbytes;
+ tmp_rx_hsplit_bytes = rx->rx_hsplit_bytes;
tmp_rx_skb_alloc_fail = rx->rx_skb_alloc_fail;
tmp_rx_buf_alloc_fail = rx->rx_buf_alloc_fail;
tmp_rx_desc_err_dropped_pkt =
@@ -284,6 +314,7 @@ gve_get_ethtool_stats(struct net_device *netdev,
} while (u64_stats_fetch_retry(&priv->rx[ring].statss,
start));
data[i++] = tmp_rx_bytes;
+ data[i++] = tmp_rx_hsplit_bytes;
data[i++] = rx->rx_cont_packet_cnt;
data[i++] = rx->rx_frag_flip_cnt;
data[i++] = rx->rx_frag_copy_cnt;
@@ -295,11 +326,11 @@ gve_get_ethtool_stats(struct net_device *netdev,
data[i++] = rx->rx_copybreak_pkt;
data[i++] = rx->rx_copied_pkt;
/* stats from NIC */
- if (skip_nic_stats) {
+ stats_idx = rx_qid_to_stats_idx[ring];
+ if (skip_nic_stats || stats_idx < 0) {
/* skip NIC rx stats */
i += NIC_RX_STATS_REPORT_NUM;
} else {
- stats_idx = rx_qid_to_stats_idx[ring];
for (j = 0; j < NIC_RX_STATS_REPORT_NUM; j++) {
u64 value =
be64_to_cpu(report_stats[stats_idx + j].value);
@@ -325,7 +356,8 @@ gve_get_ethtool_stats(struct net_device *netdev,
/* For tx cross-reporting stats, start from nic tx stats in report */
base_stats_idx = max_stats_idx;
- max_stats_idx = NIC_TX_STATS_REPORT_NUM * num_tx_queues +
+ max_stats_idx = NIC_TX_STATS_REPORT_NUM *
+ (num_tx_queues - num_stopped_txqs) +
max_stats_idx;
/* Preprocess the stats report for tx, map queue id to start index */
skip_nic_stats = false;
@@ -339,6 +371,10 @@ gve_get_ethtool_stats(struct net_device *netdev,
skip_nic_stats = true;
break;
}
+ if (queue_id < 0 || queue_id >= num_tx_queues) {
+ net_err_ratelimited("Invalid txq id in NIC stats\n");
+ continue;
+ }
tx_qid_to_stats_idx[queue_id] = stats_idx;
}
/* walk TX rings */
@@ -356,7 +392,9 @@ gve_get_ethtool_stats(struct net_device *netdev,
*/
data[i++] = 0;
data[i++] = 0;
- data[i++] = tx->dqo_tx.tail - tx->dqo_tx.head;
+ data[i++] =
+ (tx->dqo_tx.tail - tx->dqo_tx.head) &
+ tx->mask;
}
do {
start =
@@ -370,20 +408,18 @@ gve_get_ethtool_stats(struct net_device *netdev,
data[i++] = gve_tx_load_event_counter(priv, tx);
data[i++] = tx->dma_mapping_error;
/* stats from NIC */
- if (skip_nic_stats) {
+ stats_idx = tx_qid_to_stats_idx[ring];
+ if (skip_nic_stats || stats_idx < 0) {
/* skip NIC tx stats */
i += NIC_TX_STATS_REPORT_NUM;
} else {
- stats_idx = tx_qid_to_stats_idx[ring];
for (j = 0; j < NIC_TX_STATS_REPORT_NUM; j++) {
u64 value =
be64_to_cpu(report_stats[stats_idx + j].value);
data[i++] = value;
}
}
- /* XDP xsk counters */
- data[i++] = tx->xdp_xsk_wakeup;
- data[i++] = tx->xdp_xsk_done;
+ /* XDP counters */
do {
start = u64_stats_fetch_begin(&priv->tx[ring].statss);
data[i] = tx->xdp_xsk_sent;
@@ -415,6 +451,11 @@ gve_get_ethtool_stats(struct net_device *netdev,
data[i++] = priv->adminq_set_driver_parameter_cnt;
data[i++] = priv->adminq_report_stats_cnt;
data[i++] = priv->adminq_report_link_speed_cnt;
+ data[i++] = priv->adminq_get_ptype_map_cnt;
+ data[i++] = priv->adminq_query_flow_rules_cnt;
+ data[i++] = priv->adminq_cfg_flow_rule_cnt;
+ data[i++] = priv->adminq_cfg_rss_cnt;
+ data[i++] = priv->adminq_query_rss_cnt;
}
static void gve_get_channels(struct net_device *netdev,
@@ -436,11 +477,12 @@ static int gve_set_channels(struct net_device *netdev,
struct ethtool_channels *cmd)
{
struct gve_priv *priv = netdev_priv(netdev);
- struct gve_queue_config new_tx_cfg = priv->tx_cfg;
- struct gve_queue_config new_rx_cfg = priv->rx_cfg;
+ struct gve_tx_queue_config new_tx_cfg = priv->tx_cfg;
+ struct gve_rx_queue_config new_rx_cfg = priv->rx_cfg;
struct ethtool_channels old_settings;
int new_tx = cmd->tx_count;
int new_rx = cmd->rx_count;
+ bool reset_rss = false;
gve_get_channels(netdev, &old_settings);
@@ -451,22 +493,27 @@ static int gve_set_channels(struct net_device *netdev,
if (!new_rx || !new_tx)
return -EINVAL;
- if (priv->num_xdp_queues &&
- (new_tx != new_rx || (2 * new_tx > priv->tx_cfg.max_queues))) {
- dev_err(&priv->pdev->dev, "XDP load failed: The number of configured RX queues should be equal to the number of configured TX queues and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues");
- return -EINVAL;
- }
+ if (priv->xdp_prog) {
+ if (new_tx != new_rx ||
+ (2 * new_tx > priv->tx_cfg.max_queues)) {
+ dev_err(&priv->pdev->dev, "The number of configured RX queues should be equal to the number of configured TX queues and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues when XDP program is installed");
+ return -EINVAL;
+ }
- if (!netif_carrier_ok(netdev)) {
- priv->tx_cfg.num_queues = new_tx;
- priv->rx_cfg.num_queues = new_rx;
- return 0;
+ /* One XDP TX queue per RX queue. */
+ new_tx_cfg.num_xdp_queues = new_rx;
+ } else {
+ new_tx_cfg.num_xdp_queues = 0;
}
+ if (new_rx != priv->rx_cfg.num_queues &&
+ priv->cache_rss_config && !netif_is_rxfh_configured(netdev))
+ reset_rss = true;
+
new_tx_cfg.num_queues = new_tx;
new_rx_cfg.num_queues = new_rx;
- return gve_adjust_queues(priv, new_rx_cfg, new_tx_cfg);
+ return gve_adjust_queues(priv, new_rx_cfg, new_tx_cfg, reset_rss);
}
static void gve_get_ringparam(struct net_device *netdev,
@@ -476,10 +523,94 @@ static void gve_get_ringparam(struct net_device *netdev,
{
struct gve_priv *priv = netdev_priv(netdev);
- cmd->rx_max_pending = priv->rx_desc_cnt;
- cmd->tx_max_pending = priv->tx_desc_cnt;
+ cmd->rx_max_pending = priv->max_rx_desc_cnt;
+ cmd->tx_max_pending = priv->max_tx_desc_cnt;
cmd->rx_pending = priv->rx_desc_cnt;
cmd->tx_pending = priv->tx_desc_cnt;
+
+ if (!gve_header_split_supported(priv))
+ kernel_cmd->tcp_data_split = ETHTOOL_TCP_DATA_SPLIT_UNKNOWN;
+ else if (priv->header_split_enabled)
+ kernel_cmd->tcp_data_split = ETHTOOL_TCP_DATA_SPLIT_ENABLED;
+ else
+ kernel_cmd->tcp_data_split = ETHTOOL_TCP_DATA_SPLIT_DISABLED;
+}
+
+static int gve_adjust_ring_sizes(struct gve_priv *priv,
+ u16 new_tx_desc_cnt,
+ u16 new_rx_desc_cnt)
+{
+ struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0};
+ struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0};
+ int err;
+
+ /* get current queue configuration */
+ gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg);
+
+ /* copy over the new ring_size from ethtool */
+ tx_alloc_cfg.ring_size = new_tx_desc_cnt;
+ rx_alloc_cfg.ring_size = new_rx_desc_cnt;
+
+ if (netif_running(priv->dev)) {
+ err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg);
+ if (err)
+ return err;
+ }
+
+ /* Set new ring_size for the next up */
+ priv->tx_desc_cnt = new_tx_desc_cnt;
+ priv->rx_desc_cnt = new_rx_desc_cnt;
+
+ return 0;
+}
+
+static int gve_validate_req_ring_size(struct gve_priv *priv, u16 new_tx_desc_cnt,
+ u16 new_rx_desc_cnt)
+{
+ /* check for valid range */
+ if (new_tx_desc_cnt < priv->min_tx_desc_cnt ||
+ new_tx_desc_cnt > priv->max_tx_desc_cnt ||
+ new_rx_desc_cnt < priv->min_rx_desc_cnt ||
+ new_rx_desc_cnt > priv->max_rx_desc_cnt) {
+ dev_err(&priv->pdev->dev, "Requested descriptor count out of range\n");
+ return -EINVAL;
+ }
+
+ if (!is_power_of_2(new_tx_desc_cnt) || !is_power_of_2(new_rx_desc_cnt)) {
+ dev_err(&priv->pdev->dev, "Requested descriptor count has to be a power of 2\n");
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int gve_set_ringparam(struct net_device *netdev,
+ struct ethtool_ringparam *cmd,
+ struct kernel_ethtool_ringparam *kernel_cmd,
+ struct netlink_ext_ack *extack)
+{
+ struct gve_priv *priv = netdev_priv(netdev);
+ u16 new_tx_cnt, new_rx_cnt;
+ int err;
+
+ err = gve_set_hsplit_config(priv, kernel_cmd->tcp_data_split);
+ if (err)
+ return err;
+
+ if (cmd->tx_pending == priv->tx_desc_cnt && cmd->rx_pending == priv->rx_desc_cnt)
+ return 0;
+
+ if (!priv->modify_ring_size_enabled) {
+ dev_err(&priv->pdev->dev, "Modify ring size is not supported.\n");
+ return -EOPNOTSUPP;
+ }
+
+ new_tx_cnt = cmd->tx_pending;
+ new_rx_cnt = cmd->rx_pending;
+
+ if (gve_validate_req_ring_size(priv, new_tx_cnt, new_rx_cnt))
+ return -EINVAL;
+
+ return gve_adjust_ring_sizes(priv, new_tx_cnt, new_rx_cnt);
}
static int gve_user_reset(struct net_device *netdev, u32 *flags)
@@ -518,8 +649,7 @@ static int gve_set_tunable(struct net_device *netdev,
switch (etuna->id) {
case ETHTOOL_RX_COPYBREAK:
{
- u32 max_copybreak = gve_is_gqi(priv) ?
- GVE_DEFAULT_RX_BUFFER_SIZE : priv->data_buffer_size_dqo;
+ u32 max_copybreak = priv->rx_cfg.packet_buffer_size;
len = *(u32 *)value;
if (len > max_copybreak)
@@ -575,7 +705,7 @@ static int gve_set_priv_flags(struct net_device *netdev, u32 flags)
memset(priv->stats_report->stats, 0, (tx_stats_num + rx_stats_num) *
sizeof(struct stats));
- del_timer_sync(&priv->stats_report_timer);
+ timer_delete_sync(&priv->stats_report_timer);
}
return 0;
}
@@ -653,8 +783,154 @@ static int gve_set_coalesce(struct net_device *netdev,
return 0;
}
+static int gve_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd)
+{
+ struct gve_priv *priv = netdev_priv(netdev);
+ int err = 0;
+
+ if (!(netdev->features & NETIF_F_NTUPLE))
+ return -EOPNOTSUPP;
+
+ switch (cmd->cmd) {
+ case ETHTOOL_SRXCLSRLINS:
+ err = gve_add_flow_rule(priv, cmd);
+ break;
+ case ETHTOOL_SRXCLSRLDEL:
+ err = gve_del_flow_rule(priv, cmd);
+ break;
+ case ETHTOOL_SRXFH:
+ err = -EOPNOTSUPP;
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ break;
+ }
+
+ return err;
+}
+
+static int gve_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd, u32 *rule_locs)
+{
+ struct gve_priv *priv = netdev_priv(netdev);
+ int err = 0;
+
+ switch (cmd->cmd) {
+ case ETHTOOL_GRXRINGS:
+ cmd->data = priv->rx_cfg.num_queues;
+ break;
+ case ETHTOOL_GRXCLSRLCNT:
+ if (!priv->max_flow_rules)
+ return -EOPNOTSUPP;
+
+ err = gve_adminq_query_flow_rules(priv, GVE_FLOW_RULE_QUERY_STATS, 0);
+ if (err)
+ return err;
+
+ cmd->rule_cnt = priv->num_flow_rules;
+ cmd->data = priv->max_flow_rules;
+ break;
+ case ETHTOOL_GRXCLSRULE:
+ err = gve_get_flow_rule_entry(priv, cmd);
+ break;
+ case ETHTOOL_GRXCLSRLALL:
+ err = gve_get_flow_rule_ids(priv, cmd, (u32 *)rule_locs);
+ break;
+ case ETHTOOL_GRXFH:
+ err = -EOPNOTSUPP;
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ break;
+ }
+
+ return err;
+}
+
+static u32 gve_get_rxfh_key_size(struct net_device *netdev)
+{
+ struct gve_priv *priv = netdev_priv(netdev);
+
+ return priv->rss_key_size;
+}
+
+static u32 gve_get_rxfh_indir_size(struct net_device *netdev)
+{
+ struct gve_priv *priv = netdev_priv(netdev);
+
+ return priv->rss_lut_size;
+}
+
+static void gve_get_rss_config_cache(struct gve_priv *priv,
+ struct ethtool_rxfh_param *rxfh)
+{
+ struct gve_rss_config *rss_config = &priv->rss_config;
+
+ rxfh->hfunc = ETH_RSS_HASH_TOP;
+
+ if (rxfh->key) {
+ rxfh->key_size = priv->rss_key_size;
+ memcpy(rxfh->key, rss_config->hash_key, priv->rss_key_size);
+ }
+
+ if (rxfh->indir) {
+ rxfh->indir_size = priv->rss_lut_size;
+ memcpy(rxfh->indir, rss_config->hash_lut,
+ priv->rss_lut_size * sizeof(*rxfh->indir));
+ }
+}
+
+static int gve_get_rxfh(struct net_device *netdev, struct ethtool_rxfh_param *rxfh)
+{
+ struct gve_priv *priv = netdev_priv(netdev);
+
+ if (!priv->rss_key_size || !priv->rss_lut_size)
+ return -EOPNOTSUPP;
+
+ if (priv->cache_rss_config) {
+ gve_get_rss_config_cache(priv, rxfh);
+ return 0;
+ }
+
+ return gve_adminq_query_rss_config(priv, rxfh);
+}
+
+static void gve_set_rss_config_cache(struct gve_priv *priv,
+ struct ethtool_rxfh_param *rxfh)
+{
+ struct gve_rss_config *rss_config = &priv->rss_config;
+
+ if (rxfh->key)
+ memcpy(rss_config->hash_key, rxfh->key, priv->rss_key_size);
+
+ if (rxfh->indir)
+ memcpy(rss_config->hash_lut, rxfh->indir,
+ priv->rss_lut_size * sizeof(*rxfh->indir));
+}
+
+static int gve_set_rxfh(struct net_device *netdev, struct ethtool_rxfh_param *rxfh,
+ struct netlink_ext_ack *extack)
+{
+ struct gve_priv *priv = netdev_priv(netdev);
+ int err;
+
+ if (!priv->rss_key_size || !priv->rss_lut_size)
+ return -EOPNOTSUPP;
+
+ err = gve_adminq_configure_rss(priv, rxfh);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Fail to configure RSS config");
+ return err;
+ }
+
+ if (priv->cache_rss_config)
+ gve_set_rss_config_cache(priv, rxfh);
+
+ return 0;
+}
+
const struct ethtool_ops gve_ethtool_ops = {
.supported_coalesce_params = ETHTOOL_COALESCE_USECS,
+ .supported_ring_params = ETHTOOL_RING_USE_TCP_DATA_SPLIT,
.get_drvinfo = gve_get_drvinfo,
.get_strings = gve_get_strings,
.get_sset_count = gve_get_sset_count,
@@ -663,14 +939,22 @@ const struct ethtool_ops gve_ethtool_ops = {
.get_msglevel = gve_get_msglevel,
.set_channels = gve_set_channels,
.get_channels = gve_get_channels,
+ .set_rxnfc = gve_set_rxnfc,
+ .get_rxnfc = gve_get_rxnfc,
+ .get_rxfh_indir_size = gve_get_rxfh_indir_size,
+ .get_rxfh_key_size = gve_get_rxfh_key_size,
+ .get_rxfh = gve_get_rxfh,
+ .set_rxfh = gve_set_rxfh,
.get_link = ethtool_op_get_link,
.get_coalesce = gve_get_coalesce,
.set_coalesce = gve_set_coalesce,
.get_ringparam = gve_get_ringparam,
+ .set_ringparam = gve_set_ringparam,
.reset = gve_user_reset,
.get_tunable = gve_get_tunable,
.set_tunable = gve_set_tunable,
.get_priv_flags = gve_get_priv_flags,
.set_priv_flags = gve_set_priv_flags,
- .get_link_ksettings = gve_get_link_ksettings
+ .get_link_ksettings = gve_get_link_ksettings,
+ .get_ts_info = ethtool_op_get_ts_info,
};
diff --git a/drivers/net/ethernet/google/gve/gve_flow_rule.c b/drivers/net/ethernet/google/gve/gve_flow_rule.c
new file mode 100644
index 000000000000..0bb8cd1876a3
--- /dev/null
+++ b/drivers/net/ethernet/google/gve/gve_flow_rule.c
@@ -0,0 +1,298 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Google virtual Ethernet (gve) driver
+ *
+ * Copyright (C) 2015-2024 Google LLC
+ */
+
+#include "gve.h"
+#include "gve_adminq.h"
+
+static
+int gve_fill_ethtool_flow_spec(struct ethtool_rx_flow_spec *fsp,
+ struct gve_adminq_queried_flow_rule *rule)
+{
+ struct gve_adminq_flow_rule *flow_rule = &rule->flow_rule;
+ static const u16 flow_type_lut[] = {
+ [GVE_FLOW_TYPE_TCPV4] = TCP_V4_FLOW,
+ [GVE_FLOW_TYPE_UDPV4] = UDP_V4_FLOW,
+ [GVE_FLOW_TYPE_SCTPV4] = SCTP_V4_FLOW,
+ [GVE_FLOW_TYPE_AHV4] = AH_V4_FLOW,
+ [GVE_FLOW_TYPE_ESPV4] = ESP_V4_FLOW,
+ [GVE_FLOW_TYPE_TCPV6] = TCP_V6_FLOW,
+ [GVE_FLOW_TYPE_UDPV6] = UDP_V6_FLOW,
+ [GVE_FLOW_TYPE_SCTPV6] = SCTP_V6_FLOW,
+ [GVE_FLOW_TYPE_AHV6] = AH_V6_FLOW,
+ [GVE_FLOW_TYPE_ESPV6] = ESP_V6_FLOW,
+ };
+
+ if (be16_to_cpu(flow_rule->flow_type) >= ARRAY_SIZE(flow_type_lut))
+ return -EINVAL;
+
+ fsp->flow_type = flow_type_lut[be16_to_cpu(flow_rule->flow_type)];
+
+ memset(&fsp->h_u, 0, sizeof(fsp->h_u));
+ memset(&fsp->h_ext, 0, sizeof(fsp->h_ext));
+ memset(&fsp->m_u, 0, sizeof(fsp->m_u));
+ memset(&fsp->m_ext, 0, sizeof(fsp->m_ext));
+
+ switch (fsp->flow_type) {
+ case TCP_V4_FLOW:
+ case UDP_V4_FLOW:
+ case SCTP_V4_FLOW:
+ fsp->h_u.tcp_ip4_spec.ip4src = flow_rule->key.src_ip[0];
+ fsp->h_u.tcp_ip4_spec.ip4dst = flow_rule->key.dst_ip[0];
+ fsp->h_u.tcp_ip4_spec.psrc = flow_rule->key.src_port;
+ fsp->h_u.tcp_ip4_spec.pdst = flow_rule->key.dst_port;
+ fsp->h_u.tcp_ip4_spec.tos = flow_rule->key.tos;
+ fsp->m_u.tcp_ip4_spec.ip4src = flow_rule->mask.src_ip[0];
+ fsp->m_u.tcp_ip4_spec.ip4dst = flow_rule->mask.dst_ip[0];
+ fsp->m_u.tcp_ip4_spec.psrc = flow_rule->mask.src_port;
+ fsp->m_u.tcp_ip4_spec.pdst = flow_rule->mask.dst_port;
+ fsp->m_u.tcp_ip4_spec.tos = flow_rule->mask.tos;
+ break;
+ case AH_V4_FLOW:
+ case ESP_V4_FLOW:
+ fsp->h_u.ah_ip4_spec.ip4src = flow_rule->key.src_ip[0];
+ fsp->h_u.ah_ip4_spec.ip4dst = flow_rule->key.dst_ip[0];
+ fsp->h_u.ah_ip4_spec.spi = flow_rule->key.spi;
+ fsp->h_u.ah_ip4_spec.tos = flow_rule->key.tos;
+ fsp->m_u.ah_ip4_spec.ip4src = flow_rule->mask.src_ip[0];
+ fsp->m_u.ah_ip4_spec.ip4dst = flow_rule->mask.dst_ip[0];
+ fsp->m_u.ah_ip4_spec.spi = flow_rule->mask.spi;
+ fsp->m_u.ah_ip4_spec.tos = flow_rule->mask.tos;
+ break;
+ case TCP_V6_FLOW:
+ case UDP_V6_FLOW:
+ case SCTP_V6_FLOW:
+ memcpy(fsp->h_u.tcp_ip6_spec.ip6src, &flow_rule->key.src_ip,
+ sizeof(struct in6_addr));
+ memcpy(fsp->h_u.tcp_ip6_spec.ip6dst, &flow_rule->key.dst_ip,
+ sizeof(struct in6_addr));
+ fsp->h_u.tcp_ip6_spec.psrc = flow_rule->key.src_port;
+ fsp->h_u.tcp_ip6_spec.pdst = flow_rule->key.dst_port;
+ fsp->h_u.tcp_ip6_spec.tclass = flow_rule->key.tclass;
+ memcpy(fsp->m_u.tcp_ip6_spec.ip6src, &flow_rule->mask.src_ip,
+ sizeof(struct in6_addr));
+ memcpy(fsp->m_u.tcp_ip6_spec.ip6dst, &flow_rule->mask.dst_ip,
+ sizeof(struct in6_addr));
+ fsp->m_u.tcp_ip6_spec.psrc = flow_rule->mask.src_port;
+ fsp->m_u.tcp_ip6_spec.pdst = flow_rule->mask.dst_port;
+ fsp->m_u.tcp_ip6_spec.tclass = flow_rule->mask.tclass;
+ break;
+ case AH_V6_FLOW:
+ case ESP_V6_FLOW:
+ memcpy(fsp->h_u.ah_ip6_spec.ip6src, &flow_rule->key.src_ip,
+ sizeof(struct in6_addr));
+ memcpy(fsp->h_u.ah_ip6_spec.ip6dst, &flow_rule->key.dst_ip,
+ sizeof(struct in6_addr));
+ fsp->h_u.ah_ip6_spec.spi = flow_rule->key.spi;
+ fsp->h_u.ah_ip6_spec.tclass = flow_rule->key.tclass;
+ memcpy(fsp->m_u.ah_ip6_spec.ip6src, &flow_rule->mask.src_ip,
+ sizeof(struct in6_addr));
+ memcpy(fsp->m_u.ah_ip6_spec.ip6dst, &flow_rule->mask.dst_ip,
+ sizeof(struct in6_addr));
+ fsp->m_u.ah_ip6_spec.spi = flow_rule->mask.spi;
+ fsp->m_u.ah_ip6_spec.tclass = flow_rule->mask.tclass;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ fsp->ring_cookie = be16_to_cpu(flow_rule->action);
+
+ return 0;
+}
+
+static int gve_generate_flow_rule(struct gve_priv *priv, struct ethtool_rx_flow_spec *fsp,
+ struct gve_adminq_flow_rule *rule)
+{
+ static const u16 flow_type_lut[] = {
+ [TCP_V4_FLOW] = GVE_FLOW_TYPE_TCPV4,
+ [UDP_V4_FLOW] = GVE_FLOW_TYPE_UDPV4,
+ [SCTP_V4_FLOW] = GVE_FLOW_TYPE_SCTPV4,
+ [AH_V4_FLOW] = GVE_FLOW_TYPE_AHV4,
+ [ESP_V4_FLOW] = GVE_FLOW_TYPE_ESPV4,
+ [TCP_V6_FLOW] = GVE_FLOW_TYPE_TCPV6,
+ [UDP_V6_FLOW] = GVE_FLOW_TYPE_UDPV6,
+ [SCTP_V6_FLOW] = GVE_FLOW_TYPE_SCTPV6,
+ [AH_V6_FLOW] = GVE_FLOW_TYPE_AHV6,
+ [ESP_V6_FLOW] = GVE_FLOW_TYPE_ESPV6,
+ };
+ u32 flow_type;
+
+ if (fsp->ring_cookie == RX_CLS_FLOW_DISC)
+ return -EOPNOTSUPP;
+
+ if (fsp->ring_cookie >= priv->rx_cfg.num_queues)
+ return -EINVAL;
+
+ rule->action = cpu_to_be16(fsp->ring_cookie);
+
+ flow_type = fsp->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT | FLOW_RSS);
+ if (!flow_type || flow_type >= ARRAY_SIZE(flow_type_lut))
+ return -EINVAL;
+
+ rule->flow_type = cpu_to_be16(flow_type_lut[flow_type]);
+
+ switch (flow_type) {
+ case TCP_V4_FLOW:
+ case UDP_V4_FLOW:
+ case SCTP_V4_FLOW:
+ rule->key.src_ip[0] = fsp->h_u.tcp_ip4_spec.ip4src;
+ rule->key.dst_ip[0] = fsp->h_u.tcp_ip4_spec.ip4dst;
+ rule->key.src_port = fsp->h_u.tcp_ip4_spec.psrc;
+ rule->key.dst_port = fsp->h_u.tcp_ip4_spec.pdst;
+ rule->mask.src_ip[0] = fsp->m_u.tcp_ip4_spec.ip4src;
+ rule->mask.dst_ip[0] = fsp->m_u.tcp_ip4_spec.ip4dst;
+ rule->mask.src_port = fsp->m_u.tcp_ip4_spec.psrc;
+ rule->mask.dst_port = fsp->m_u.tcp_ip4_spec.pdst;
+ break;
+ case AH_V4_FLOW:
+ case ESP_V4_FLOW:
+ rule->key.src_ip[0] = fsp->h_u.tcp_ip4_spec.ip4src;
+ rule->key.dst_ip[0] = fsp->h_u.tcp_ip4_spec.ip4dst;
+ rule->key.spi = fsp->h_u.ah_ip4_spec.spi;
+ rule->mask.src_ip[0] = fsp->m_u.tcp_ip4_spec.ip4src;
+ rule->mask.dst_ip[0] = fsp->m_u.tcp_ip4_spec.ip4dst;
+ rule->mask.spi = fsp->m_u.ah_ip4_spec.spi;
+ break;
+ case TCP_V6_FLOW:
+ case UDP_V6_FLOW:
+ case SCTP_V6_FLOW:
+ memcpy(&rule->key.src_ip, fsp->h_u.tcp_ip6_spec.ip6src,
+ sizeof(struct in6_addr));
+ memcpy(&rule->key.dst_ip, fsp->h_u.tcp_ip6_spec.ip6dst,
+ sizeof(struct in6_addr));
+ rule->key.src_port = fsp->h_u.tcp_ip6_spec.psrc;
+ rule->key.dst_port = fsp->h_u.tcp_ip6_spec.pdst;
+ memcpy(&rule->mask.src_ip, fsp->m_u.tcp_ip6_spec.ip6src,
+ sizeof(struct in6_addr));
+ memcpy(&rule->mask.dst_ip, fsp->m_u.tcp_ip6_spec.ip6dst,
+ sizeof(struct in6_addr));
+ rule->mask.src_port = fsp->m_u.tcp_ip6_spec.psrc;
+ rule->mask.dst_port = fsp->m_u.tcp_ip6_spec.pdst;
+ break;
+ case AH_V6_FLOW:
+ case ESP_V6_FLOW:
+ memcpy(&rule->key.src_ip, fsp->h_u.usr_ip6_spec.ip6src,
+ sizeof(struct in6_addr));
+ memcpy(&rule->key.dst_ip, fsp->h_u.usr_ip6_spec.ip6dst,
+ sizeof(struct in6_addr));
+ rule->key.spi = fsp->h_u.ah_ip6_spec.spi;
+ memcpy(&rule->mask.src_ip, fsp->m_u.usr_ip6_spec.ip6src,
+ sizeof(struct in6_addr));
+ memcpy(&rule->mask.dst_ip, fsp->m_u.usr_ip6_spec.ip6dst,
+ sizeof(struct in6_addr));
+ rule->key.spi = fsp->h_u.ah_ip6_spec.spi;
+ break;
+ default:
+ /* not doing un-parsed flow types */
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int gve_get_flow_rule_entry(struct gve_priv *priv, struct ethtool_rxnfc *cmd)
+{
+ struct gve_adminq_queried_flow_rule *rules_cache = priv->flow_rules_cache.rules_cache;
+ struct ethtool_rx_flow_spec *fsp = (struct ethtool_rx_flow_spec *)&cmd->fs;
+ u32 *cache_num = &priv->flow_rules_cache.rules_cache_num;
+ struct gve_adminq_queried_flow_rule *rule = NULL;
+ int err = 0;
+ u32 i;
+
+ if (!priv->max_flow_rules)
+ return -EOPNOTSUPP;
+
+ if (!priv->flow_rules_cache.rules_cache_synced ||
+ fsp->location < be32_to_cpu(rules_cache[0].location) ||
+ fsp->location > be32_to_cpu(rules_cache[*cache_num - 1].location)) {
+ err = gve_adminq_query_flow_rules(priv, GVE_FLOW_RULE_QUERY_RULES, fsp->location);
+ if (err)
+ return err;
+
+ priv->flow_rules_cache.rules_cache_synced = true;
+ }
+
+ for (i = 0; i < *cache_num; i++) {
+ if (fsp->location == be32_to_cpu(rules_cache[i].location)) {
+ rule = &rules_cache[i];
+ break;
+ }
+ }
+
+ if (!rule)
+ return -EINVAL;
+
+ err = gve_fill_ethtool_flow_spec(fsp, rule);
+
+ return err;
+}
+
+int gve_get_flow_rule_ids(struct gve_priv *priv, struct ethtool_rxnfc *cmd, u32 *rule_locs)
+{
+ __be32 *rule_ids_cache = priv->flow_rules_cache.rule_ids_cache;
+ u32 *cache_num = &priv->flow_rules_cache.rule_ids_cache_num;
+ u32 starting_rule_id = 0;
+ u32 i = 0, j = 0;
+ int err = 0;
+
+ if (!priv->max_flow_rules)
+ return -EOPNOTSUPP;
+
+ do {
+ err = gve_adminq_query_flow_rules(priv, GVE_FLOW_RULE_QUERY_IDS,
+ starting_rule_id);
+ if (err)
+ return err;
+
+ for (i = 0; i < *cache_num; i++) {
+ if (j >= cmd->rule_cnt)
+ return -EMSGSIZE;
+
+ rule_locs[j++] = be32_to_cpu(rule_ids_cache[i]);
+ starting_rule_id = be32_to_cpu(rule_ids_cache[i]) + 1;
+ }
+ } while (*cache_num != 0);
+ cmd->data = priv->max_flow_rules;
+
+ return err;
+}
+
+int gve_add_flow_rule(struct gve_priv *priv, struct ethtool_rxnfc *cmd)
+{
+ struct ethtool_rx_flow_spec *fsp = &cmd->fs;
+ struct gve_adminq_flow_rule *rule = NULL;
+ int err;
+
+ if (!priv->max_flow_rules)
+ return -EOPNOTSUPP;
+
+ rule = kvzalloc(sizeof(*rule), GFP_KERNEL);
+ if (!rule)
+ return -ENOMEM;
+
+ err = gve_generate_flow_rule(priv, fsp, rule);
+ if (err)
+ goto out;
+
+ err = gve_adminq_add_flow_rule(priv, rule, fsp->location);
+
+out:
+ kvfree(rule);
+ if (err)
+ dev_err(&priv->pdev->dev, "Failed to add the flow rule: %u", fsp->location);
+
+ return err;
+}
+
+int gve_del_flow_rule(struct gve_priv *priv, struct ethtool_rxnfc *cmd)
+{
+ struct ethtool_rx_flow_spec *fsp = (struct ethtool_rx_flow_spec *)&cmd->fs;
+
+ if (!priv->max_flow_rules)
+ return -EOPNOTSUPP;
+
+ return gve_adminq_del_flow_rule(priv, fsp->location);
+}
diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
index 619bf63ec935..e1ffbd561fac 100644
--- a/drivers/net/ethernet/google/gve/gve_main.c
+++ b/drivers/net/ethernet/google/gve/gve_main.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: (GPL-2.0 OR MIT)
/* Google virtual Ethernet (gve) driver
*
- * Copyright (C) 2015-2021 Google, Inc.
+ * Copyright (C) 2015-2024 Google LLC
*/
#include <linux/bpf.h>
@@ -9,6 +9,7 @@
#include <linux/etherdevice.h>
#include <linux/filter.h>
#include <linux/interrupt.h>
+#include <linux/irq.h>
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/sched.h>
@@ -16,12 +17,14 @@
#include <linux/workqueue.h>
#include <linux/utsname.h>
#include <linux/version.h>
+#include <net/netdev_queues.h>
#include <net/sch_generic.h>
#include <net/xdp_sock_drv.h>
#include "gve.h"
#include "gve_dqo.h"
#include "gve_adminq.h"
#include "gve_register.h"
+#include "gve_utils.h"
#define GVE_DEFAULT_RX_COPYBREAK (256)
@@ -138,6 +141,86 @@ static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s)
}
}
+static int gve_alloc_flow_rule_caches(struct gve_priv *priv)
+{
+ struct gve_flow_rules_cache *flow_rules_cache = &priv->flow_rules_cache;
+ int err = 0;
+
+ if (!priv->max_flow_rules)
+ return 0;
+
+ flow_rules_cache->rules_cache =
+ kvcalloc(GVE_FLOW_RULES_CACHE_SIZE, sizeof(*flow_rules_cache->rules_cache),
+ GFP_KERNEL);
+ if (!flow_rules_cache->rules_cache) {
+ dev_err(&priv->pdev->dev, "Cannot alloc flow rules cache\n");
+ return -ENOMEM;
+ }
+
+ flow_rules_cache->rule_ids_cache =
+ kvcalloc(GVE_FLOW_RULE_IDS_CACHE_SIZE, sizeof(*flow_rules_cache->rule_ids_cache),
+ GFP_KERNEL);
+ if (!flow_rules_cache->rule_ids_cache) {
+ dev_err(&priv->pdev->dev, "Cannot alloc flow rule ids cache\n");
+ err = -ENOMEM;
+ goto free_rules_cache;
+ }
+
+ return 0;
+
+free_rules_cache:
+ kvfree(flow_rules_cache->rules_cache);
+ flow_rules_cache->rules_cache = NULL;
+ return err;
+}
+
+static void gve_free_flow_rule_caches(struct gve_priv *priv)
+{
+ struct gve_flow_rules_cache *flow_rules_cache = &priv->flow_rules_cache;
+
+ kvfree(flow_rules_cache->rule_ids_cache);
+ flow_rules_cache->rule_ids_cache = NULL;
+ kvfree(flow_rules_cache->rules_cache);
+ flow_rules_cache->rules_cache = NULL;
+}
+
+static int gve_alloc_rss_config_cache(struct gve_priv *priv)
+{
+ struct gve_rss_config *rss_config = &priv->rss_config;
+
+ if (!priv->cache_rss_config)
+ return 0;
+
+ rss_config->hash_key = kcalloc(priv->rss_key_size,
+ sizeof(rss_config->hash_key[0]),
+ GFP_KERNEL);
+ if (!rss_config->hash_key)
+ return -ENOMEM;
+
+ rss_config->hash_lut = kcalloc(priv->rss_lut_size,
+ sizeof(rss_config->hash_lut[0]),
+ GFP_KERNEL);
+ if (!rss_config->hash_lut)
+ goto free_rss_key_cache;
+
+ return 0;
+
+free_rss_key_cache:
+ kfree(rss_config->hash_key);
+ rss_config->hash_key = NULL;
+ return -ENOMEM;
+}
+
+static void gve_free_rss_config_cache(struct gve_priv *priv)
+{
+ struct gve_rss_config *rss_config = &priv->rss_config;
+
+ kfree(rss_config->hash_key);
+ kfree(rss_config->hash_lut);
+
+ memset(rss_config, 0, sizeof(*rss_config));
+}
+
static int gve_alloc_counter_array(struct gve_priv *priv)
{
priv->counter_array =
@@ -219,7 +302,7 @@ static void gve_free_stats_report(struct gve_priv *priv)
if (!priv->stats_report)
return;
- del_timer_sync(&priv->stats_report_timer);
+ timer_delete_sync(&priv->stats_report_timer);
dma_free_coherent(&priv->pdev->dev, priv->stats_report_len,
priv->stats_report, priv->stats_report_bus);
priv->stats_report = NULL;
@@ -252,7 +335,19 @@ static irqreturn_t gve_intr_dqo(int irq, void *arg)
return IRQ_HANDLED;
}
-static int gve_napi_poll(struct napi_struct *napi, int budget)
+static int gve_is_napi_on_home_cpu(struct gve_priv *priv, u32 irq)
+{
+ int cpu_curr = smp_processor_id();
+ const struct cpumask *aff_mask;
+
+ aff_mask = irq_get_effective_affinity_mask(irq);
+ if (unlikely(!aff_mask))
+ return 1;
+
+ return cpumask_test_cpu(cpu_curr, aff_mask);
+}
+
+int gve_napi_poll(struct napi_struct *napi, int budget)
{
struct gve_notify_block *block;
__be32 __iomem *irq_doorbell;
@@ -275,6 +370,14 @@ static int gve_napi_poll(struct napi_struct *napi, int budget)
if (block->rx) {
work_done = gve_rx_poll(block, budget);
+
+ /* Poll XSK TX as part of RX NAPI. Setup re-poll based on max of
+ * TX and RX work done.
+ */
+ if (priv->xdp_prog)
+ work_done = max_t(int, work_done,
+ gve_xsk_tx_poll(block, budget));
+
reschedule |= work_done == budget;
}
@@ -302,7 +405,7 @@ static int gve_napi_poll(struct napi_struct *napi, int budget)
return work_done;
}
-static int gve_napi_poll_dqo(struct napi_struct *napi, int budget)
+int gve_napi_poll_dqo(struct napi_struct *napi, int budget)
{
struct gve_notify_block *block =
container_of(napi, struct gve_notify_block, napi);
@@ -321,8 +424,21 @@ static int gve_napi_poll_dqo(struct napi_struct *napi, int budget)
reschedule |= work_done == budget;
}
- if (reschedule)
- return budget;
+ if (reschedule) {
+ /* Reschedule by returning budget only if already on the correct
+ * cpu.
+ */
+ if (likely(gve_is_napi_on_home_cpu(priv, block->irq)))
+ return budget;
+
+ /* If not on the cpu with which this queue's irq has affinity
+ * with, we avoid rescheduling napi and arm the irq instead so
+ * that napi gets rescheduled back eventually onto the right
+ * cpu.
+ */
+ if (work_done == budget)
+ work_done--;
+ }
if (likely(napi_complete_done(napi, work_done))) {
/* Enable interrupts again.
@@ -427,6 +543,7 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv)
"Failed to receive msix vector %d\n", i);
goto abort_with_some_ntfy_blocks;
}
+ block->irq = priv->msix_vectors[msix_idx].vector;
irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
get_cpu_mask(i % active_cpus));
block->irq_db_index = &priv->irq_db_indices[i].index;
@@ -440,6 +557,7 @@ abort_with_some_ntfy_blocks:
irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
NULL);
free_irq(priv->msix_vectors[msix_idx].vector, block);
+ block->irq = 0;
}
kvfree(priv->ntfy_blocks);
priv->ntfy_blocks = NULL;
@@ -473,6 +591,7 @@ static void gve_free_notify_blocks(struct gve_priv *priv)
irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
NULL);
free_irq(priv->msix_vectors[msix_idx].vector, block);
+ block->irq = 0;
}
free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
kvfree(priv->ntfy_blocks);
@@ -490,9 +609,15 @@ static int gve_setup_device_resources(struct gve_priv *priv)
{
int err;
- err = gve_alloc_counter_array(priv);
+ err = gve_alloc_flow_rule_caches(priv);
if (err)
return err;
+ err = gve_alloc_rss_config_cache(priv);
+ if (err)
+ goto abort_with_flow_rule_caches;
+ err = gve_alloc_counter_array(priv);
+ if (err)
+ goto abort_with_rss_config_cache;
err = gve_alloc_notify_blocks(priv);
if (err)
goto abort_with_counter;
@@ -526,6 +651,12 @@ static int gve_setup_device_resources(struct gve_priv *priv)
}
}
+ err = gve_init_rss_config(priv, priv->rx_cfg.num_queues);
+ if (err) {
+ dev_err(&priv->pdev->dev, "Failed to init RSS config");
+ goto abort_with_ptype_lut;
+ }
+
err = gve_adminq_report_stats(priv, priv->stats_report_len,
priv->stats_report_bus,
GVE_STATS_REPORT_TIMER_PERIOD);
@@ -544,6 +675,10 @@ abort_with_ntfy_blocks:
gve_free_notify_blocks(priv);
abort_with_counter:
gve_free_counter_array(priv);
+abort_with_rss_config_cache:
+ gve_free_rss_config_cache(priv);
+abort_with_flow_rule_caches:
+ gve_free_flow_rule_caches(priv);
return err;
}
@@ -556,6 +691,12 @@ static void gve_teardown_device_resources(struct gve_priv *priv)
/* Tell device its resources are being freed */
if (gve_get_device_resources_ok(priv)) {
+ err = gve_flow_rules_reset(priv);
+ if (err) {
+ dev_err(&priv->pdev->dev,
+ "Failed to reset flow rules: err=%d\n", err);
+ gve_trigger_reset(priv);
+ }
/* detach the stats report */
err = gve_adminq_report_stats(priv, 0, 0x0, GVE_STATS_REPORT_TIMER_PERIOD);
if (err) {
@@ -575,155 +716,131 @@ static void gve_teardown_device_resources(struct gve_priv *priv)
kvfree(priv->ptype_lut_dqo);
priv->ptype_lut_dqo = NULL;
+ gve_free_flow_rule_caches(priv);
+ gve_free_rss_config_cache(priv);
gve_free_counter_array(priv);
gve_free_notify_blocks(priv);
gve_free_stats_report(priv);
gve_clear_device_resources_ok(priv);
}
-static void gve_add_napi(struct gve_priv *priv, int ntfy_idx,
- int (*gve_poll)(struct napi_struct *, int))
+static int gve_unregister_qpl(struct gve_priv *priv,
+ struct gve_queue_page_list *qpl)
{
- struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
+ int err;
- netif_napi_add(priv->dev, &block->napi, gve_poll);
-}
+ if (!qpl)
+ return 0;
-static void gve_remove_napi(struct gve_priv *priv, int ntfy_idx)
-{
- struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
+ err = gve_adminq_unregister_page_list(priv, qpl->id);
+ if (err) {
+ netif_err(priv, drv, priv->dev,
+ "Failed to unregister queue page list %d\n",
+ qpl->id);
+ return err;
+ }
- netif_napi_del(&block->napi);
+ priv->num_registered_pages -= qpl->num_entries;
+ return 0;
}
-static int gve_register_xdp_qpls(struct gve_priv *priv)
+static int gve_register_qpl(struct gve_priv *priv,
+ struct gve_queue_page_list *qpl)
{
- int start_id;
+ int pages;
int err;
- int i;
- start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv));
- for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) {
- err = gve_adminq_register_page_list(priv, &priv->qpls[i]);
- if (err) {
- netif_err(priv, drv, priv->dev,
- "failed to register queue page list %d\n",
- priv->qpls[i].id);
- /* This failure will trigger a reset - no need to clean
- * up
- */
- return err;
- }
- }
- return 0;
-}
+ if (!qpl)
+ return 0;
-static int gve_register_qpls(struct gve_priv *priv)
-{
- int start_id;
- int err;
- int i;
+ pages = qpl->num_entries;
- start_id = gve_tx_start_qpl_id(priv);
- for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) {
- err = gve_adminq_register_page_list(priv, &priv->qpls[i]);
- if (err) {
- netif_err(priv, drv, priv->dev,
- "failed to register queue page list %d\n",
- priv->qpls[i].id);
- /* This failure will trigger a reset - no need to clean
- * up
- */
- return err;
- }
+ if (pages + priv->num_registered_pages > priv->max_registered_pages) {
+ netif_err(priv, drv, priv->dev,
+ "Reached max number of registered pages %llu > %llu\n",
+ pages + priv->num_registered_pages,
+ priv->max_registered_pages);
+ return -EINVAL;
}
- start_id = gve_rx_start_qpl_id(priv);
- for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) {
- err = gve_adminq_register_page_list(priv, &priv->qpls[i]);
- if (err) {
- netif_err(priv, drv, priv->dev,
- "failed to register queue page list %d\n",
- priv->qpls[i].id);
- /* This failure will trigger a reset - no need to clean
- * up
- */
- return err;
- }
+ err = gve_adminq_register_page_list(priv, qpl);
+ if (err) {
+ netif_err(priv, drv, priv->dev,
+ "failed to register queue page list %d\n",
+ qpl->id);
+ return err;
}
+
+ priv->num_registered_pages += pages;
return 0;
}
-static int gve_unregister_xdp_qpls(struct gve_priv *priv)
+static struct gve_queue_page_list *gve_tx_get_qpl(struct gve_priv *priv, int idx)
{
- int start_id;
- int err;
- int i;
+ struct gve_tx_ring *tx = &priv->tx[idx];
- start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv));
- for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) {
- err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id);
- /* This failure will trigger a reset - no need to clean up */
- if (err) {
- netif_err(priv, drv, priv->dev,
- "Failed to unregister queue page list %d\n",
- priv->qpls[i].id);
- return err;
- }
- }
- return 0;
+ if (gve_is_gqi(priv))
+ return tx->tx_fifo.qpl;
+ else
+ return tx->dqo.qpl;
}
-static int gve_unregister_qpls(struct gve_priv *priv)
+static struct gve_queue_page_list *gve_rx_get_qpl(struct gve_priv *priv, int idx)
+{
+ struct gve_rx_ring *rx = &priv->rx[idx];
+
+ if (gve_is_gqi(priv))
+ return rx->data.qpl;
+ else
+ return rx->dqo.qpl;
+}
+
+static int gve_register_qpls(struct gve_priv *priv)
{
- int start_id;
+ int num_tx_qpls, num_rx_qpls;
int err;
int i;
- start_id = gve_tx_start_qpl_id(priv);
- for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) {
- err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id);
- /* This failure will trigger a reset - no need to clean up */
- if (err) {
- netif_err(priv, drv, priv->dev,
- "Failed to unregister queue page list %d\n",
- priv->qpls[i].id);
+ num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_is_qpl(priv));
+ num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv));
+
+ for (i = 0; i < num_tx_qpls; i++) {
+ err = gve_register_qpl(priv, gve_tx_get_qpl(priv, i));
+ if (err)
return err;
- }
}
- start_id = gve_rx_start_qpl_id(priv);
- for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) {
- err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id);
- /* This failure will trigger a reset - no need to clean up */
- if (err) {
- netif_err(priv, drv, priv->dev,
- "Failed to unregister queue page list %d\n",
- priv->qpls[i].id);
+ for (i = 0; i < num_rx_qpls; i++) {
+ err = gve_register_qpl(priv, gve_rx_get_qpl(priv, i));
+ if (err)
return err;
- }
}
+
return 0;
}
-static int gve_create_xdp_rings(struct gve_priv *priv)
+static int gve_unregister_qpls(struct gve_priv *priv)
{
+ int num_tx_qpls, num_rx_qpls;
int err;
+ int i;
- err = gve_adminq_create_tx_queues(priv,
- gve_xdp_tx_start_queue_id(priv),
- priv->num_xdp_queues);
- if (err) {
- netif_err(priv, drv, priv->dev, "failed to create %d XDP tx queues\n",
- priv->num_xdp_queues);
- /* This failure will trigger a reset - no need to clean
- * up
- */
- return err;
+ num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_is_qpl(priv));
+ num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv));
+
+ for (i = 0; i < num_tx_qpls; i++) {
+ err = gve_unregister_qpl(priv, gve_tx_get_qpl(priv, i));
+ /* This failure will trigger a reset - no need to clean */
+ if (err)
+ return err;
}
- netif_dbg(priv, drv, priv->dev, "created %d XDP tx queues\n",
- priv->num_xdp_queues);
+ for (i = 0; i < num_rx_qpls; i++) {
+ err = gve_unregister_qpl(priv, gve_rx_get_qpl(priv, i));
+ /* This failure will trigger a reset - no need to clean */
+ if (err)
+ return err;
+ }
return 0;
}
@@ -776,143 +893,101 @@ static int gve_create_rings(struct gve_priv *priv)
return 0;
}
-static void add_napi_init_xdp_sync_stats(struct gve_priv *priv,
- int (*napi_poll)(struct napi_struct *napi,
- int budget))
+static void init_xdp_sync_stats(struct gve_priv *priv)
{
int start_id = gve_xdp_tx_start_queue_id(priv);
int i;
- /* Add xdp tx napi & init sync stats*/
- for (i = start_id; i < start_id + priv->num_xdp_queues; i++) {
+ /* Init stats */
+ for (i = start_id; i < start_id + priv->tx_cfg.num_xdp_queues; i++) {
int ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
u64_stats_init(&priv->tx[i].statss);
priv->tx[i].ntfy_id = ntfy_idx;
- gve_add_napi(priv, ntfy_idx, napi_poll);
}
}
-static void add_napi_init_sync_stats(struct gve_priv *priv,
- int (*napi_poll)(struct napi_struct *napi,
- int budget))
+static void gve_init_sync_stats(struct gve_priv *priv)
{
int i;
- /* Add tx napi & init sync stats*/
- for (i = 0; i < gve_num_tx_queues(priv); i++) {
- int ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
-
+ for (i = 0; i < priv->tx_cfg.num_queues; i++)
u64_stats_init(&priv->tx[i].statss);
- priv->tx[i].ntfy_id = ntfy_idx;
- gve_add_napi(priv, ntfy_idx, napi_poll);
- }
- /* Add rx napi & init sync stats*/
- for (i = 0; i < priv->rx_cfg.num_queues; i++) {
- int ntfy_idx = gve_rx_idx_to_ntfy(priv, i);
+ /* Init stats for XDP TX queues */
+ init_xdp_sync_stats(priv);
+
+ for (i = 0; i < priv->rx_cfg.num_queues; i++)
u64_stats_init(&priv->rx[i].statss);
- priv->rx[i].ntfy_id = ntfy_idx;
- gve_add_napi(priv, ntfy_idx, napi_poll);
- }
}
-static void gve_tx_free_rings(struct gve_priv *priv, int start_id, int num_rings)
+static void gve_tx_get_curr_alloc_cfg(struct gve_priv *priv,
+ struct gve_tx_alloc_rings_cfg *cfg)
{
- if (gve_is_gqi(priv)) {
- gve_tx_free_rings_gqi(priv, start_id, num_rings);
- } else {
- gve_tx_free_rings_dqo(priv);
- }
+ cfg->qcfg = &priv->tx_cfg;
+ cfg->raw_addressing = !gve_is_qpl(priv);
+ cfg->ring_size = priv->tx_desc_cnt;
+ cfg->num_xdp_rings = cfg->qcfg->num_xdp_queues;
+ cfg->tx = priv->tx;
}
-static int gve_alloc_xdp_rings(struct gve_priv *priv)
+static void gve_tx_stop_rings(struct gve_priv *priv, int num_rings)
{
- int start_id;
- int err = 0;
+ int i;
- if (!priv->num_xdp_queues)
- return 0;
+ if (!priv->tx)
+ return;
- start_id = gve_xdp_tx_start_queue_id(priv);
- err = gve_tx_alloc_rings(priv, start_id, priv->num_xdp_queues);
- if (err)
- return err;
- add_napi_init_xdp_sync_stats(priv, gve_napi_poll);
+ for (i = 0; i < num_rings; i++) {
+ if (gve_is_gqi(priv))
+ gve_tx_stop_ring_gqi(priv, i);
+ else
+ gve_tx_stop_ring_dqo(priv, i);
+ }
+}
- return 0;
+static void gve_tx_start_rings(struct gve_priv *priv, int num_rings)
+{
+ int i;
+
+ for (i = 0; i < num_rings; i++) {
+ if (gve_is_gqi(priv))
+ gve_tx_start_ring_gqi(priv, i);
+ else
+ gve_tx_start_ring_dqo(priv, i);
+ }
}
-static int gve_alloc_rings(struct gve_priv *priv)
+static int gve_queues_mem_alloc(struct gve_priv *priv,
+ struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
+ struct gve_rx_alloc_rings_cfg *rx_alloc_cfg)
{
int err;
- /* Setup tx rings */
- priv->tx = kvcalloc(priv->tx_cfg.max_queues, sizeof(*priv->tx),
- GFP_KERNEL);
- if (!priv->tx)
- return -ENOMEM;
-
if (gve_is_gqi(priv))
- err = gve_tx_alloc_rings(priv, 0, gve_num_tx_queues(priv));
+ err = gve_tx_alloc_rings_gqi(priv, tx_alloc_cfg);
else
- err = gve_tx_alloc_rings_dqo(priv);
+ err = gve_tx_alloc_rings_dqo(priv, tx_alloc_cfg);
if (err)
- goto free_tx;
-
- /* Setup rx rings */
- priv->rx = kvcalloc(priv->rx_cfg.max_queues, sizeof(*priv->rx),
- GFP_KERNEL);
- if (!priv->rx) {
- err = -ENOMEM;
- goto free_tx_queue;
- }
+ return err;
if (gve_is_gqi(priv))
- err = gve_rx_alloc_rings(priv);
+ err = gve_rx_alloc_rings_gqi(priv, rx_alloc_cfg);
else
- err = gve_rx_alloc_rings_dqo(priv);
+ err = gve_rx_alloc_rings_dqo(priv, rx_alloc_cfg);
if (err)
- goto free_rx;
-
- if (gve_is_gqi(priv))
- add_napi_init_sync_stats(priv, gve_napi_poll);
- else
- add_napi_init_sync_stats(priv, gve_napi_poll_dqo);
+ goto free_tx;
return 0;
-free_rx:
- kvfree(priv->rx);
- priv->rx = NULL;
-free_tx_queue:
- gve_tx_free_rings(priv, 0, gve_num_tx_queues(priv));
free_tx:
- kvfree(priv->tx);
- priv->tx = NULL;
+ if (gve_is_gqi(priv))
+ gve_tx_free_rings_gqi(priv, tx_alloc_cfg);
+ else
+ gve_tx_free_rings_dqo(priv, tx_alloc_cfg);
return err;
}
-static int gve_destroy_xdp_rings(struct gve_priv *priv)
-{
- int start_id;
- int err;
-
- start_id = gve_xdp_tx_start_queue_id(priv);
- err = gve_adminq_destroy_tx_queues(priv,
- start_id,
- priv->num_xdp_queues);
- if (err) {
- netif_err(priv, drv, priv->dev,
- "failed to destroy XDP queues\n");
- /* This failure will trigger a reset - no need to clean up */
- return err;
- }
- netif_dbg(priv, drv, priv->dev, "destroyed XDP queues\n");
-
- return 0;
-}
-
static int gve_destroy_rings(struct gve_priv *priv)
{
int num_tx_queues = gve_num_tx_queues(priv);
@@ -937,52 +1012,16 @@ static int gve_destroy_rings(struct gve_priv *priv)
return 0;
}
-static void gve_rx_free_rings(struct gve_priv *priv)
+static void gve_queues_mem_free(struct gve_priv *priv,
+ struct gve_tx_alloc_rings_cfg *tx_cfg,
+ struct gve_rx_alloc_rings_cfg *rx_cfg)
{
- if (gve_is_gqi(priv))
- gve_rx_free_rings_gqi(priv);
- else
- gve_rx_free_rings_dqo(priv);
-}
-
-static void gve_free_xdp_rings(struct gve_priv *priv)
-{
- int ntfy_idx, start_id;
- int i;
-
- start_id = gve_xdp_tx_start_queue_id(priv);
- if (priv->tx) {
- for (i = start_id; i < start_id + priv->num_xdp_queues; i++) {
- ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
- gve_remove_napi(priv, ntfy_idx);
- }
- gve_tx_free_rings(priv, start_id, priv->num_xdp_queues);
- }
-}
-
-static void gve_free_rings(struct gve_priv *priv)
-{
- int num_tx_queues = gve_num_tx_queues(priv);
- int ntfy_idx;
- int i;
-
- if (priv->tx) {
- for (i = 0; i < num_tx_queues; i++) {
- ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
- gve_remove_napi(priv, ntfy_idx);
- }
- gve_tx_free_rings(priv, 0, num_tx_queues);
- kvfree(priv->tx);
- priv->tx = NULL;
- }
- if (priv->rx) {
- for (i = 0; i < priv->rx_cfg.num_queues; i++) {
- ntfy_idx = gve_rx_idx_to_ntfy(priv, i);
- gve_remove_napi(priv, ntfy_idx);
- }
- gve_rx_free_rings(priv);
- kvfree(priv->rx);
- priv->rx = NULL;
+ if (gve_is_gqi(priv)) {
+ gve_tx_free_rings_gqi(priv, tx_cfg);
+ gve_rx_free_rings_gqi(priv, rx_cfg);
+ } else {
+ gve_tx_free_rings_dqo(priv, tx_cfg);
+ gve_rx_free_rings_dqo(priv, rx_cfg);
}
}
@@ -1004,44 +1043,41 @@ int gve_alloc_page(struct gve_priv *priv, struct device *dev,
return 0;
}
-static int gve_alloc_queue_page_list(struct gve_priv *priv, u32 id,
- int pages)
+struct gve_queue_page_list *gve_alloc_queue_page_list(struct gve_priv *priv,
+ u32 id, int pages)
{
- struct gve_queue_page_list *qpl = &priv->qpls[id];
+ struct gve_queue_page_list *qpl;
int err;
int i;
- if (pages + priv->num_registered_pages > priv->max_registered_pages) {
- netif_err(priv, drv, priv->dev,
- "Reached max number of registered pages %llu > %llu\n",
- pages + priv->num_registered_pages,
- priv->max_registered_pages);
- return -EINVAL;
- }
+ qpl = kvzalloc(sizeof(*qpl), GFP_KERNEL);
+ if (!qpl)
+ return NULL;
qpl->id = id;
qpl->num_entries = 0;
qpl->pages = kvcalloc(pages, sizeof(*qpl->pages), GFP_KERNEL);
- /* caller handles clean up */
if (!qpl->pages)
- return -ENOMEM;
+ goto abort;
+
qpl->page_buses = kvcalloc(pages, sizeof(*qpl->page_buses), GFP_KERNEL);
- /* caller handles clean up */
if (!qpl->page_buses)
- return -ENOMEM;
+ goto abort;
for (i = 0; i < pages; i++) {
err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i],
&qpl->page_buses[i],
gve_qpl_dma_dir(priv, id), GFP_KERNEL);
- /* caller handles clean up */
if (err)
- return -ENOMEM;
+ goto abort;
qpl->num_entries++;
}
- priv->num_registered_pages += pages;
- return 0;
+ return qpl;
+
+abort:
+ gve_free_queue_page_list(priv, qpl, id);
+ return NULL;
}
void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma,
@@ -1053,13 +1089,16 @@ void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma,
put_page(page);
}
-static void gve_free_queue_page_list(struct gve_priv *priv, u32 id)
+void gve_free_queue_page_list(struct gve_priv *priv,
+ struct gve_queue_page_list *qpl,
+ u32 id)
{
- struct gve_queue_page_list *qpl = &priv->qpls[id];
int i;
- if (!qpl->pages)
+ if (!qpl)
return;
+ if (!qpl->pages)
+ goto free_qpl;
if (!qpl->page_buses)
goto free_pages;
@@ -1072,115 +1111,8 @@ static void gve_free_queue_page_list(struct gve_priv *priv, u32 id)
free_pages:
kvfree(qpl->pages);
qpl->pages = NULL;
- priv->num_registered_pages -= qpl->num_entries;
-}
-
-static int gve_alloc_xdp_qpls(struct gve_priv *priv)
-{
- int start_id;
- int i, j;
- int err;
-
- start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv));
- for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) {
- err = gve_alloc_queue_page_list(priv, i,
- priv->tx_pages_per_qpl);
- if (err)
- goto free_qpls;
- }
-
- return 0;
-
-free_qpls:
- for (j = start_id; j <= i; j++)
- gve_free_queue_page_list(priv, j);
- return err;
-}
-
-static int gve_alloc_qpls(struct gve_priv *priv)
-{
- int max_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues;
- int page_count;
- int start_id;
- int i, j;
- int err;
-
- if (!gve_is_qpl(priv))
- return 0;
-
- priv->qpls = kvcalloc(max_queues, sizeof(*priv->qpls), GFP_KERNEL);
- if (!priv->qpls)
- return -ENOMEM;
-
- start_id = gve_tx_start_qpl_id(priv);
- page_count = priv->tx_pages_per_qpl;
- for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) {
- err = gve_alloc_queue_page_list(priv, i,
- page_count);
- if (err)
- goto free_qpls;
- }
-
- start_id = gve_rx_start_qpl_id(priv);
-
- /* For GQI_QPL number of pages allocated have 1:1 relationship with
- * number of descriptors. For DQO, number of pages required are
- * more than descriptors (because of out of order completions).
- */
- page_count = priv->queue_format == GVE_GQI_QPL_FORMAT ?
- priv->rx_data_slot_cnt : priv->rx_pages_per_qpl;
- for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) {
- err = gve_alloc_queue_page_list(priv, i,
- page_count);
- if (err)
- goto free_qpls;
- }
-
- priv->qpl_cfg.qpl_map_size = BITS_TO_LONGS(max_queues) *
- sizeof(unsigned long) * BITS_PER_BYTE;
- priv->qpl_cfg.qpl_id_map = kvcalloc(BITS_TO_LONGS(max_queues),
- sizeof(unsigned long), GFP_KERNEL);
- if (!priv->qpl_cfg.qpl_id_map) {
- err = -ENOMEM;
- goto free_qpls;
- }
-
- return 0;
-
-free_qpls:
- for (j = 0; j <= i; j++)
- gve_free_queue_page_list(priv, j);
- kvfree(priv->qpls);
- priv->qpls = NULL;
- return err;
-}
-
-static void gve_free_xdp_qpls(struct gve_priv *priv)
-{
- int start_id;
- int i;
-
- start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv));
- for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++)
- gve_free_queue_page_list(priv, i);
-}
-
-static void gve_free_qpls(struct gve_priv *priv)
-{
- int max_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues;
- int i;
-
- if (!priv->qpls)
- return;
-
- kvfree(priv->qpl_cfg.qpl_id_map);
- priv->qpl_cfg.qpl_id_map = NULL;
-
- for (i = 0; i < max_queues; i++)
- gve_free_queue_page_list(priv, i);
-
- kvfree(priv->qpls);
- priv->qpls = NULL;
+free_qpl:
+ kvfree(qpl);
}
/* Use this to schedule a reset when the device is capable of continuing
@@ -1206,7 +1138,7 @@ static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev)
int i, j;
u32 tx_qid;
- if (!priv->num_xdp_queues)
+ if (!priv->tx_cfg.num_xdp_queues)
return 0;
for (i = 0; i < priv->rx_cfg.num_queues; i++) {
@@ -1217,8 +1149,14 @@ static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev)
napi->napi_id);
if (err)
goto err;
- err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq,
- MEM_TYPE_PAGE_SHARED, NULL);
+ if (gve_is_qpl(priv))
+ err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq,
+ MEM_TYPE_PAGE_SHARED,
+ NULL);
+ else
+ err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq,
+ MEM_TYPE_PAGE_POOL,
+ rx->dqo.page_pool);
if (err)
goto err;
rx->xsk_pool = xsk_get_pool_from_qid(dev, i);
@@ -1236,7 +1174,7 @@ static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev)
}
}
- for (i = 0; i < priv->num_xdp_queues; i++) {
+ for (i = 0; i < priv->tx_cfg.num_xdp_queues; i++) {
tx_qid = gve_xdp_tx_queue_id(priv, i);
priv->tx[tx_qid].xsk_pool = xsk_get_pool_from_qid(dev, i);
}
@@ -1257,7 +1195,7 @@ static void gve_unreg_xdp_info(struct gve_priv *priv)
{
int i, tx_qid;
- if (!priv->num_xdp_queues)
+ if (!priv->tx_cfg.num_xdp_queues || !priv->rx || !priv->tx)
return;
for (i = 0; i < priv->rx_cfg.num_queues; i++) {
@@ -1270,7 +1208,7 @@ static void gve_unreg_xdp_info(struct gve_priv *priv)
}
}
- for (i = 0; i < priv->num_xdp_queues; i++) {
+ for (i = 0; i < priv->tx_cfg.num_xdp_queues; i++) {
tx_qid = gve_xdp_tx_queue_id(priv, i);
priv->tx[tx_qid].xsk_pool = NULL;
}
@@ -1278,58 +1216,128 @@ static void gve_unreg_xdp_info(struct gve_priv *priv)
static void gve_drain_page_cache(struct gve_priv *priv)
{
- struct page_frag_cache *nc;
int i;
- for (i = 0; i < priv->rx_cfg.num_queues; i++) {
- nc = &priv->rx[i].page_cache;
- if (nc->va) {
- __page_frag_cache_drain(virt_to_page(nc->va),
- nc->pagecnt_bias);
- nc->va = NULL;
- }
- }
+ for (i = 0; i < priv->rx_cfg.num_queues; i++)
+ page_frag_cache_drain(&priv->rx[i].page_cache);
}
-static int gve_open(struct net_device *dev)
+static void gve_rx_get_curr_alloc_cfg(struct gve_priv *priv,
+ struct gve_rx_alloc_rings_cfg *cfg)
{
- struct gve_priv *priv = netdev_priv(dev);
- int err;
+ cfg->qcfg_rx = &priv->rx_cfg;
+ cfg->qcfg_tx = &priv->tx_cfg;
+ cfg->raw_addressing = !gve_is_qpl(priv);
+ cfg->enable_header_split = priv->header_split_enabled;
+ cfg->ring_size = priv->rx_desc_cnt;
+ cfg->packet_buffer_size = priv->rx_cfg.packet_buffer_size;
+ cfg->rx = priv->rx;
+ cfg->xdp = !!cfg->qcfg_tx->num_xdp_queues;
+}
- if (priv->xdp_prog)
- priv->num_xdp_queues = priv->rx_cfg.num_queues;
+void gve_get_curr_alloc_cfgs(struct gve_priv *priv,
+ struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
+ struct gve_rx_alloc_rings_cfg *rx_alloc_cfg)
+{
+ gve_tx_get_curr_alloc_cfg(priv, tx_alloc_cfg);
+ gve_rx_get_curr_alloc_cfg(priv, rx_alloc_cfg);
+}
+
+static void gve_rx_start_ring(struct gve_priv *priv, int i)
+{
+ if (gve_is_gqi(priv))
+ gve_rx_start_ring_gqi(priv, i);
else
- priv->num_xdp_queues = 0;
+ gve_rx_start_ring_dqo(priv, i);
+}
- err = gve_alloc_qpls(priv);
- if (err)
- return err;
+static void gve_rx_start_rings(struct gve_priv *priv, int num_rings)
+{
+ int i;
- err = gve_alloc_rings(priv);
- if (err)
- goto free_qpls;
+ for (i = 0; i < num_rings; i++)
+ gve_rx_start_ring(priv, i);
+}
+
+static void gve_rx_stop_ring(struct gve_priv *priv, int i)
+{
+ if (gve_is_gqi(priv))
+ gve_rx_stop_ring_gqi(priv, i);
+ else
+ gve_rx_stop_ring_dqo(priv, i);
+}
+
+static void gve_rx_stop_rings(struct gve_priv *priv, int num_rings)
+{
+ int i;
+
+ if (!priv->rx)
+ return;
+
+ for (i = 0; i < num_rings; i++)
+ gve_rx_stop_ring(priv, i);
+}
+
+static void gve_queues_mem_remove(struct gve_priv *priv)
+{
+ struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0};
+ struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0};
+
+ gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg);
+ gve_queues_mem_free(priv, &tx_alloc_cfg, &rx_alloc_cfg);
+ priv->tx = NULL;
+ priv->rx = NULL;
+}
+
+/* The passed-in queue memory is stored into priv and the queues are made live.
+ * No memory is allocated. Passed-in memory is freed on errors.
+ */
+static int gve_queues_start(struct gve_priv *priv,
+ struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
+ struct gve_rx_alloc_rings_cfg *rx_alloc_cfg)
+{
+ struct net_device *dev = priv->dev;
+ int err;
+
+ /* Record new resources into priv */
+ priv->tx = tx_alloc_cfg->tx;
+ priv->rx = rx_alloc_cfg->rx;
+
+ /* Record new configs into priv */
+ priv->tx_cfg = *tx_alloc_cfg->qcfg;
+ priv->tx_cfg.num_xdp_queues = tx_alloc_cfg->num_xdp_rings;
+ priv->rx_cfg = *rx_alloc_cfg->qcfg_rx;
+ priv->tx_desc_cnt = tx_alloc_cfg->ring_size;
+ priv->rx_desc_cnt = rx_alloc_cfg->ring_size;
+
+ gve_tx_start_rings(priv, gve_num_tx_queues(priv));
+ gve_rx_start_rings(priv, rx_alloc_cfg->qcfg_rx->num_queues);
+ gve_init_sync_stats(priv);
err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues);
if (err)
- goto free_rings;
+ goto stop_and_free_rings;
err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues);
if (err)
- goto free_rings;
+ goto stop_and_free_rings;
err = gve_reg_xdp_info(priv, dev);
if (err)
- goto free_rings;
+ goto stop_and_free_rings;
+
+ if (rx_alloc_cfg->reset_rss) {
+ err = gve_init_rss_config(priv, priv->rx_cfg.num_queues);
+ if (err)
+ goto reset;
+ }
err = gve_register_qpls(priv);
if (err)
goto reset;
- if (!gve_is_gqi(priv)) {
- /* Hard code this for now. This may be tuned in the future for
- * performance.
- */
- priv->data_buffer_size_dqo = GVE_DEFAULT_RX_BUFFER_SIZE;
- }
+ priv->header_split_enabled = rx_alloc_cfg->enable_header_split;
+ priv->rx_cfg.packet_buffer_size = rx_alloc_cfg->packet_buffer_size;
+
err = gve_create_rings(priv);
if (err)
goto reset;
@@ -1346,32 +1354,49 @@ static int gve_open(struct net_device *dev)
priv->interface_up_cnt++;
return 0;
-free_rings:
- gve_free_rings(priv);
-free_qpls:
- gve_free_qpls(priv);
- return err;
-
reset:
- /* This must have been called from a reset due to the rtnl lock
- * so just return at this point.
- */
if (gve_get_reset_in_progress(priv))
- return err;
- /* Otherwise reset before returning */
+ goto stop_and_free_rings;
gve_reset_and_teardown(priv, true);
/* if this fails there is nothing we can do so just ignore the return */
gve_reset_recovery(priv, false);
/* return the original error */
return err;
+stop_and_free_rings:
+ gve_tx_stop_rings(priv, gve_num_tx_queues(priv));
+ gve_rx_stop_rings(priv, priv->rx_cfg.num_queues);
+ gve_queues_mem_remove(priv);
+ return err;
}
-static int gve_close(struct net_device *dev)
+static int gve_open(struct net_device *dev)
{
+ struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0};
+ struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0};
struct gve_priv *priv = netdev_priv(dev);
int err;
- netif_carrier_off(dev);
+ gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg);
+
+ err = gve_queues_mem_alloc(priv, &tx_alloc_cfg, &rx_alloc_cfg);
+ if (err)
+ return err;
+
+ /* No need to free on error: ownership of resources is lost after
+ * calling gve_queues_start.
+ */
+ err = gve_queues_start(priv, &tx_alloc_cfg, &rx_alloc_cfg);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int gve_queues_stop(struct gve_priv *priv)
+{
+ int err;
+
+ netif_carrier_off(priv->dev);
if (gve_get_device_rings_ok(priv)) {
gve_turndown(priv);
gve_drain_page_cache(priv);
@@ -1383,11 +1408,13 @@ static int gve_close(struct net_device *dev)
goto err;
gve_clear_device_rings_ok(priv);
}
- del_timer_sync(&priv->stats_report_timer);
+ timer_delete_sync(&priv->stats_report_timer);
gve_unreg_xdp_info(priv);
- gve_free_rings(priv);
- gve_free_qpls(priv);
+
+ gve_tx_stop_rings(priv, gve_num_tx_queues(priv));
+ gve_rx_stop_rings(priv, priv->rx_cfg.num_queues);
+
priv->interface_down_cnt++;
return 0;
@@ -1402,62 +1429,19 @@ err:
return gve_reset_recovery(priv, false);
}
-static int gve_remove_xdp_queues(struct gve_priv *priv)
+static int gve_close(struct net_device *dev)
{
+ struct gve_priv *priv = netdev_priv(dev);
int err;
- err = gve_destroy_xdp_rings(priv);
+ err = gve_queues_stop(priv);
if (err)
return err;
- err = gve_unregister_xdp_qpls(priv);
- if (err)
- return err;
-
- gve_unreg_xdp_info(priv);
- gve_free_xdp_rings(priv);
- gve_free_xdp_qpls(priv);
- priv->num_xdp_queues = 0;
+ gve_queues_mem_remove(priv);
return 0;
}
-static int gve_add_xdp_queues(struct gve_priv *priv)
-{
- int err;
-
- priv->num_xdp_queues = priv->tx_cfg.num_queues;
-
- err = gve_alloc_xdp_qpls(priv);
- if (err)
- goto err;
-
- err = gve_alloc_xdp_rings(priv);
- if (err)
- goto free_xdp_qpls;
-
- err = gve_reg_xdp_info(priv, priv->dev);
- if (err)
- goto free_xdp_rings;
-
- err = gve_register_xdp_qpls(priv);
- if (err)
- goto free_xdp_rings;
-
- err = gve_create_xdp_rings(priv);
- if (err)
- goto free_xdp_rings;
-
- return 0;
-
-free_xdp_rings:
- gve_free_xdp_rings(priv);
-free_xdp_qpls:
- gve_free_xdp_qpls(priv);
-err:
- priv->num_xdp_queues = 0;
- return err;
-}
-
static void gve_handle_link_status(struct gve_priv *priv, bool link_status)
{
if (!gve_get_napi_enabled(priv))
@@ -1475,6 +1459,19 @@ static void gve_handle_link_status(struct gve_priv *priv, bool link_status)
}
}
+static int gve_configure_rings_xdp(struct gve_priv *priv,
+ u16 num_xdp_rings)
+{
+ struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0};
+ struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0};
+
+ gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg);
+ tx_alloc_cfg.num_xdp_rings = num_xdp_rings;
+
+ rx_alloc_cfg.xdp = !!num_xdp_rings;
+ return gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg);
+}
+
static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog,
struct netlink_ext_ack *extack)
{
@@ -1483,33 +1480,30 @@ static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog,
u32 status;
old_prog = READ_ONCE(priv->xdp_prog);
- if (!netif_carrier_ok(priv->dev)) {
+ if (!netif_running(priv->dev)) {
WRITE_ONCE(priv->xdp_prog, prog);
if (old_prog)
bpf_prog_put(old_prog);
+
+ /* Update priv XDP queue configuration */
+ priv->tx_cfg.num_xdp_queues = priv->xdp_prog ?
+ priv->rx_cfg.num_queues : 0;
return 0;
}
- gve_turndown(priv);
- if (!old_prog && prog) {
- // Allocate XDP TX queues if an XDP program is
- // being installed
- err = gve_add_xdp_queues(priv);
- if (err)
- goto out;
- } else if (old_prog && !prog) {
- // Remove XDP TX queues if an XDP program is
- // being uninstalled
- err = gve_remove_xdp_queues(priv);
- if (err)
- goto out;
- }
+ if (!old_prog && prog)
+ err = gve_configure_rings_xdp(priv, priv->rx_cfg.num_queues);
+ else if (old_prog && !prog)
+ err = gve_configure_rings_xdp(priv, 0);
+
+ if (err)
+ goto out;
+
WRITE_ONCE(priv->xdp_prog, prog);
if (old_prog)
bpf_prog_put(old_prog);
out:
- gve_turnup(priv);
status = ioread32be(&priv->reg_bar0->device_status);
gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
return err;
@@ -1540,8 +1534,8 @@ static int gve_xsk_pool_enable(struct net_device *dev,
if (err)
return err;
- /* If XDP prog is not installed, return */
- if (!priv->xdp_prog)
+ /* If XDP prog is not installed or interface is down, return. */
+ if (!priv->xdp_prog || !netif_running(dev))
return 0;
rx = &priv->rx[qid];
@@ -1586,21 +1580,16 @@ static int gve_xsk_pool_disable(struct net_device *dev,
if (qid >= priv->rx_cfg.num_queues)
return -EINVAL;
- /* If XDP prog is not installed, unmap DMA and return */
- if (!priv->xdp_prog)
- goto done;
-
- tx_qid = gve_xdp_tx_queue_id(priv, qid);
- if (!netif_running(dev)) {
- priv->rx[qid].xsk_pool = NULL;
- xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq);
- priv->tx[tx_qid].xsk_pool = NULL;
+ /* If XDP prog is not installed or interface is down, unmap DMA and
+ * return.
+ */
+ if (!priv->xdp_prog || !netif_running(dev))
goto done;
- }
napi_rx = &priv->ntfy_blocks[priv->rx[qid].ntfy_id].napi;
napi_disable(napi_rx); /* make sure current rx poll is done */
+ tx_qid = gve_xdp_tx_queue_id(priv, qid);
napi_tx = &priv->ntfy_blocks[priv->tx[tx_qid].ntfy_id].napi;
napi_disable(napi_tx); /* make sure current tx poll is done */
@@ -1626,24 +1615,20 @@ done:
static int gve_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags)
{
struct gve_priv *priv = netdev_priv(dev);
- int tx_queue_id = gve_xdp_tx_queue_id(priv, queue_id);
+ struct napi_struct *napi;
+
+ if (!gve_get_napi_enabled(priv))
+ return -ENETDOWN;
if (queue_id >= priv->rx_cfg.num_queues || !priv->xdp_prog)
return -EINVAL;
- if (flags & XDP_WAKEUP_TX) {
- struct gve_tx_ring *tx = &priv->tx[tx_queue_id];
- struct napi_struct *napi =
- &priv->ntfy_blocks[tx->ntfy_id].napi;
-
- if (!napi_if_scheduled_mark_missed(napi)) {
- /* Call local_bh_enable to trigger SoftIRQ processing */
- local_bh_disable();
- napi_schedule(napi);
- local_bh_enable();
- }
-
- tx->xdp_xsk_wakeup++;
+ napi = &priv->ntfy_blocks[gve_rx_idx_to_ntfy(priv, queue_id)].napi;
+ if (!napi_if_scheduled_mark_missed(napi)) {
+ /* Call local_bh_enable to trigger SoftIRQ processing */
+ local_bh_disable();
+ napi_schedule(napi);
+ local_bh_enable();
}
return 0;
@@ -1652,6 +1637,7 @@ static int gve_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags)
static int verify_xdp_configuration(struct net_device *dev)
{
struct gve_priv *priv = netdev_priv(dev);
+ u16 max_xdp_mtu;
if (dev->features & NETIF_F_LRO) {
netdev_warn(dev, "XDP is not supported when LRO is on.\n");
@@ -1664,7 +1650,11 @@ static int verify_xdp_configuration(struct net_device *dev)
return -EOPNOTSUPP;
}
- if (dev->mtu > GVE_DEFAULT_RX_BUFFER_SIZE - sizeof(struct ethhdr) - GVE_RX_PAD) {
+ max_xdp_mtu = priv->rx_cfg.packet_buffer_size - sizeof(struct ethhdr);
+ if (priv->queue_format == GVE_GQI_QPL_FORMAT)
+ max_xdp_mtu -= GVE_RX_PAD;
+
+ if (dev->mtu > max_xdp_mtu) {
netdev_warn(dev, "XDP is not supported for mtu %d.\n",
dev->mtu);
return -EOPNOTSUPP;
@@ -1702,42 +1692,103 @@ static int gve_xdp(struct net_device *dev, struct netdev_bpf *xdp)
}
}
-int gve_adjust_queues(struct gve_priv *priv,
- struct gve_queue_config new_rx_config,
- struct gve_queue_config new_tx_config)
+int gve_init_rss_config(struct gve_priv *priv, u16 num_queues)
+{
+ struct gve_rss_config *rss_config = &priv->rss_config;
+ struct ethtool_rxfh_param rxfh = {0};
+ u16 i;
+
+ if (!priv->cache_rss_config)
+ return 0;
+
+ for (i = 0; i < priv->rss_lut_size; i++)
+ rss_config->hash_lut[i] =
+ ethtool_rxfh_indir_default(i, num_queues);
+
+ netdev_rss_key_fill(rss_config->hash_key, priv->rss_key_size);
+
+ rxfh.hfunc = ETH_RSS_HASH_TOP;
+
+ return gve_adminq_configure_rss(priv, &rxfh);
+}
+
+int gve_flow_rules_reset(struct gve_priv *priv)
+{
+ if (!priv->max_flow_rules)
+ return 0;
+
+ return gve_adminq_reset_flow_rules(priv);
+}
+
+int gve_adjust_config(struct gve_priv *priv,
+ struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
+ struct gve_rx_alloc_rings_cfg *rx_alloc_cfg)
{
int err;
- if (netif_carrier_ok(priv->dev)) {
- /* To make this process as simple as possible we teardown the
- * device, set the new configuration, and then bring the device
- * up again.
- */
- err = gve_close(priv->dev);
- /* we have already tried to reset in close,
- * just fail at this point
+ /* Allocate resources for the new confiugration */
+ err = gve_queues_mem_alloc(priv, tx_alloc_cfg, rx_alloc_cfg);
+ if (err) {
+ netif_err(priv, drv, priv->dev,
+ "Adjust config failed to alloc new queues");
+ return err;
+ }
+
+ /* Teardown the device and free existing resources */
+ err = gve_close(priv->dev);
+ if (err) {
+ netif_err(priv, drv, priv->dev,
+ "Adjust config failed to close old queues");
+ gve_queues_mem_free(priv, tx_alloc_cfg, rx_alloc_cfg);
+ return err;
+ }
+
+ /* Bring the device back up again with the new resources. */
+ err = gve_queues_start(priv, tx_alloc_cfg, rx_alloc_cfg);
+ if (err) {
+ netif_err(priv, drv, priv->dev,
+ "Adjust config failed to start new queues, !!! DISABLING ALL QUEUES !!!\n");
+ /* No need to free on error: ownership of resources is lost after
+ * calling gve_queues_start.
*/
- if (err)
- return err;
- priv->tx_cfg = new_tx_config;
- priv->rx_cfg = new_rx_config;
+ gve_turndown(priv);
+ return err;
+ }
- err = gve_open(priv->dev);
- if (err)
- goto err;
+ return 0;
+}
- return 0;
+int gve_adjust_queues(struct gve_priv *priv,
+ struct gve_rx_queue_config new_rx_config,
+ struct gve_tx_queue_config new_tx_config,
+ bool reset_rss)
+{
+ struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0};
+ struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0};
+ int err;
+
+ gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg);
+
+ /* Relay the new config from ethtool */
+ tx_alloc_cfg.qcfg = &new_tx_config;
+ rx_alloc_cfg.qcfg_tx = &new_tx_config;
+ rx_alloc_cfg.qcfg_rx = &new_rx_config;
+ rx_alloc_cfg.reset_rss = reset_rss;
+
+ if (netif_running(priv->dev)) {
+ err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg);
+ return err;
}
/* Set the config for the next up. */
+ if (reset_rss) {
+ err = gve_init_rss_config(priv, new_rx_config.num_queues);
+ if (err)
+ return err;
+ }
priv->tx_cfg = new_tx_config;
priv->rx_cfg = new_rx_config;
return 0;
-err:
- netif_err(priv, drv, priv->dev,
- "Adjust queues failed! !!! DISABLING ALL QUEUES !!!\n");
- gve_turndown(priv);
- return err;
}
static void gve_turndown(struct gve_priv *priv)
@@ -1755,20 +1806,37 @@ static void gve_turndown(struct gve_priv *priv)
int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
- napi_disable(&block->napi);
+ if (!gve_tx_was_added_to_block(priv, idx))
+ continue;
+
+ if (idx < priv->tx_cfg.num_queues)
+ netif_queue_set_napi(priv->dev, idx,
+ NETDEV_QUEUE_TYPE_TX, NULL);
+
+ napi_disable_locked(&block->napi);
}
for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
- napi_disable(&block->napi);
+ if (!gve_rx_was_added_to_block(priv, idx))
+ continue;
+
+ netif_queue_set_napi(priv->dev, idx, NETDEV_QUEUE_TYPE_RX,
+ NULL);
+ napi_disable_locked(&block->napi);
}
/* Stop tx queues */
netif_tx_disable(priv->dev);
+ xdp_features_clear_redirect_target_locked(priv->dev);
+
gve_clear_napi_enabled(priv);
gve_clear_report_stats(priv);
+
+ /* Make sure that all traffic is finished processing. */
+ synchronize_net();
}
static void gve_turnup(struct gve_priv *priv)
@@ -1783,30 +1851,71 @@ static void gve_turnup(struct gve_priv *priv)
int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
- napi_enable(&block->napi);
+ if (!gve_tx_was_added_to_block(priv, idx))
+ continue;
+
+ napi_enable_locked(&block->napi);
+
+ if (idx < priv->tx_cfg.num_queues)
+ netif_queue_set_napi(priv->dev, idx,
+ NETDEV_QUEUE_TYPE_TX,
+ &block->napi);
+
if (gve_is_gqi(priv)) {
iowrite32be(0, gve_irq_doorbell(priv, block));
} else {
gve_set_itr_coalesce_usecs_dqo(priv, block,
priv->tx_coalesce_usecs);
}
+
+ /* Any descs written by the NIC before this barrier will be
+ * handled by the one-off napi schedule below. Whereas any
+ * descs after the barrier will generate interrupts.
+ */
+ mb();
+ napi_schedule(&block->napi);
}
for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
- napi_enable(&block->napi);
+ if (!gve_rx_was_added_to_block(priv, idx))
+ continue;
+
+ napi_enable_locked(&block->napi);
+ netif_queue_set_napi(priv->dev, idx, NETDEV_QUEUE_TYPE_RX,
+ &block->napi);
+
if (gve_is_gqi(priv)) {
iowrite32be(0, gve_irq_doorbell(priv, block));
} else {
gve_set_itr_coalesce_usecs_dqo(priv, block,
priv->rx_coalesce_usecs);
}
+
+ /* Any descs written by the NIC before this barrier will be
+ * handled by the one-off napi schedule below. Whereas any
+ * descs after the barrier will generate interrupts.
+ */
+ mb();
+ napi_schedule(&block->napi);
}
+ if (priv->tx_cfg.num_xdp_queues && gve_supports_xdp_xmit(priv))
+ xdp_features_set_redirect_target_locked(priv->dev, false);
+
gve_set_napi_enabled(priv);
}
+static void gve_turnup_and_check_status(struct gve_priv *priv)
+{
+ u32 status;
+
+ gve_turnup(priv);
+ status = ioread32be(&priv->reg_bar0->device_status);
+ gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
+}
+
static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct gve_notify_block *block;
@@ -1853,39 +1962,82 @@ out:
priv->tx_timeo_cnt++;
}
+u16 gve_get_pkt_buf_size(const struct gve_priv *priv, bool enable_hsplit)
+{
+ if (enable_hsplit && priv->max_rx_buffer_size >= GVE_MAX_RX_BUFFER_SIZE)
+ return GVE_MAX_RX_BUFFER_SIZE;
+ else
+ return GVE_DEFAULT_RX_BUFFER_SIZE;
+}
+
+/* header-split is not supported on non-DQO_RDA yet even if device advertises it */
+bool gve_header_split_supported(const struct gve_priv *priv)
+{
+ return priv->header_buf_size && priv->queue_format == GVE_DQO_RDA_FORMAT;
+}
+
+int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split)
+{
+ struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0};
+ struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0};
+ bool enable_hdr_split;
+ int err = 0;
+
+ if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_UNKNOWN)
+ return 0;
+
+ if (!gve_header_split_supported(priv)) {
+ dev_err(&priv->pdev->dev, "Header-split not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_ENABLED)
+ enable_hdr_split = true;
+ else
+ enable_hdr_split = false;
+
+ if (enable_hdr_split == priv->header_split_enabled)
+ return 0;
+
+ gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg);
+
+ rx_alloc_cfg.enable_header_split = enable_hdr_split;
+ rx_alloc_cfg.packet_buffer_size = gve_get_pkt_buf_size(priv, enable_hdr_split);
+
+ if (netif_running(priv->dev))
+ err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg);
+ return err;
+}
+
static int gve_set_features(struct net_device *netdev,
netdev_features_t features)
{
const netdev_features_t orig_features = netdev->features;
+ struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0};
+ struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0};
struct gve_priv *priv = netdev_priv(netdev);
int err;
+ gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg);
+
if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) {
netdev->features ^= NETIF_F_LRO;
- if (netif_carrier_ok(netdev)) {
- /* To make this process as simple as possible we
- * teardown the device, set the new configuration,
- * and then bring the device up again.
- */
- err = gve_close(netdev);
- /* We have already tried to reset in close, just fail
- * at this point.
- */
- if (err)
- goto err;
-
- err = gve_open(netdev);
+ if (netif_running(netdev)) {
+ err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg);
if (err)
- goto err;
+ goto revert_features;
}
}
+ if ((netdev->features & NETIF_F_NTUPLE) && !(features & NETIF_F_NTUPLE)) {
+ err = gve_flow_rules_reset(priv);
+ if (err)
+ goto revert_features;
+ }
return 0;
-err:
- /* Reverts the change on error. */
+
+revert_features:
netdev->features = orig_features;
- netif_err(priv, drv, netdev,
- "Set features failed! !!! DISABLING ALL QUEUES !!!\n");
return err;
}
@@ -1925,7 +2077,9 @@ static void gve_handle_reset(struct gve_priv *priv)
if (gve_get_do_reset(priv)) {
rtnl_lock();
+ netdev_lock(priv->dev);
gve_reset(priv, false);
+ netdev_unlock(priv->dev);
rtnl_unlock();
}
}
@@ -2021,14 +2175,17 @@ static void gve_service_task(struct work_struct *work)
static void gve_set_netdev_xdp_features(struct gve_priv *priv)
{
+ xdp_features_t xdp_features;
+
if (priv->queue_format == GVE_GQI_QPL_FORMAT) {
- priv->dev->xdp_features = NETDEV_XDP_ACT_BASIC;
- priv->dev->xdp_features |= NETDEV_XDP_ACT_REDIRECT;
- priv->dev->xdp_features |= NETDEV_XDP_ACT_NDO_XMIT;
- priv->dev->xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY;
+ xdp_features = NETDEV_XDP_ACT_BASIC;
+ xdp_features |= NETDEV_XDP_ACT_REDIRECT;
+ xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY;
} else {
- priv->dev->xdp_features = 0;
+ xdp_features = 0;
}
+
+ xdp_set_features_flag_locked(priv->dev, xdp_features);
}
static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
@@ -2051,6 +2208,8 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
goto err;
}
+ priv->num_registered_pages = 0;
+
if (skip_describe_device)
goto setup_device;
@@ -2080,7 +2239,6 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
if (!gve_is_gqi(priv))
netif_set_tso_max_size(priv->dev, GVE_DQO_TX_MAX);
- priv->num_registered_pages = 0;
priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK;
/* gvnic has one Notification Block per MSI-x vector, except for the
* management vector
@@ -2101,6 +2259,7 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues,
priv->rx_cfg.num_queues);
}
+ priv->tx_cfg.num_xdp_queues = 0;
dev_info(&priv->pdev->dev, "TX queues %d, RX queues %d\n",
priv->tx_cfg.num_queues, priv->rx_cfg.num_queues);
@@ -2164,7 +2323,7 @@ err:
int gve_reset(struct gve_priv *priv, bool attempt_teardown)
{
- bool was_up = netif_carrier_ok(priv->dev);
+ bool was_up = netif_running(priv->dev);
int err;
dev_info(&priv->pdev->dev, "Performing reset\n");
@@ -2215,6 +2374,188 @@ static void gve_write_version(u8 __iomem *driver_version_register)
writeb('\n', driver_version_register);
}
+static int gve_rx_queue_stop(struct net_device *dev, void *per_q_mem, int idx)
+{
+ struct gve_priv *priv = netdev_priv(dev);
+ struct gve_rx_ring *gve_per_q_mem;
+ int err;
+
+ if (!priv->rx)
+ return -EAGAIN;
+
+ /* Destroying queue 0 while other queues exist is not supported in DQO */
+ if (!gve_is_gqi(priv) && idx == 0)
+ return -ERANGE;
+
+ /* Single-queue destruction requires quiescence on all queues */
+ gve_turndown(priv);
+
+ /* This failure will trigger a reset - no need to clean up */
+ err = gve_adminq_destroy_single_rx_queue(priv, idx);
+ if (err)
+ return err;
+
+ if (gve_is_qpl(priv)) {
+ /* This failure will trigger a reset - no need to clean up */
+ err = gve_unregister_qpl(priv, gve_rx_get_qpl(priv, idx));
+ if (err)
+ return err;
+ }
+
+ gve_rx_stop_ring(priv, idx);
+
+ /* Turn the unstopped queues back up */
+ gve_turnup_and_check_status(priv);
+
+ gve_per_q_mem = (struct gve_rx_ring *)per_q_mem;
+ *gve_per_q_mem = priv->rx[idx];
+ memset(&priv->rx[idx], 0, sizeof(priv->rx[idx]));
+ return 0;
+}
+
+static void gve_rx_queue_mem_free(struct net_device *dev, void *per_q_mem)
+{
+ struct gve_priv *priv = netdev_priv(dev);
+ struct gve_rx_alloc_rings_cfg cfg = {0};
+ struct gve_rx_ring *gve_per_q_mem;
+
+ gve_per_q_mem = (struct gve_rx_ring *)per_q_mem;
+ gve_rx_get_curr_alloc_cfg(priv, &cfg);
+
+ if (gve_is_gqi(priv))
+ gve_rx_free_ring_gqi(priv, gve_per_q_mem, &cfg);
+ else
+ gve_rx_free_ring_dqo(priv, gve_per_q_mem, &cfg);
+}
+
+static int gve_rx_queue_mem_alloc(struct net_device *dev, void *per_q_mem,
+ int idx)
+{
+ struct gve_priv *priv = netdev_priv(dev);
+ struct gve_rx_alloc_rings_cfg cfg = {0};
+ struct gve_rx_ring *gve_per_q_mem;
+ int err;
+
+ if (!priv->rx)
+ return -EAGAIN;
+
+ gve_per_q_mem = (struct gve_rx_ring *)per_q_mem;
+ gve_rx_get_curr_alloc_cfg(priv, &cfg);
+
+ if (gve_is_gqi(priv))
+ err = gve_rx_alloc_ring_gqi(priv, &cfg, gve_per_q_mem, idx);
+ else
+ err = gve_rx_alloc_ring_dqo(priv, &cfg, gve_per_q_mem, idx);
+
+ return err;
+}
+
+static int gve_rx_queue_start(struct net_device *dev, void *per_q_mem, int idx)
+{
+ struct gve_priv *priv = netdev_priv(dev);
+ struct gve_rx_ring *gve_per_q_mem;
+ int err;
+
+ if (!priv->rx)
+ return -EAGAIN;
+
+ gve_per_q_mem = (struct gve_rx_ring *)per_q_mem;
+ priv->rx[idx] = *gve_per_q_mem;
+
+ /* Single-queue creation requires quiescence on all queues */
+ gve_turndown(priv);
+
+ gve_rx_start_ring(priv, idx);
+
+ if (gve_is_qpl(priv)) {
+ /* This failure will trigger a reset - no need to clean up */
+ err = gve_register_qpl(priv, gve_rx_get_qpl(priv, idx));
+ if (err)
+ goto abort;
+ }
+
+ /* This failure will trigger a reset - no need to clean up */
+ err = gve_adminq_create_single_rx_queue(priv, idx);
+ if (err)
+ goto abort;
+
+ if (gve_is_gqi(priv))
+ gve_rx_write_doorbell(priv, &priv->rx[idx]);
+ else
+ gve_rx_post_buffers_dqo(&priv->rx[idx]);
+
+ /* Turn the unstopped queues back up */
+ gve_turnup_and_check_status(priv);
+ return 0;
+
+abort:
+ gve_rx_stop_ring(priv, idx);
+
+ /* All failures in this func result in a reset, by clearing the struct
+ * at idx, we prevent a double free when that reset runs. The reset,
+ * which needs the rtnl lock, will not run till this func returns and
+ * its caller gives up the lock.
+ */
+ memset(&priv->rx[idx], 0, sizeof(priv->rx[idx]));
+ return err;
+}
+
+static const struct netdev_queue_mgmt_ops gve_queue_mgmt_ops = {
+ .ndo_queue_mem_size = sizeof(struct gve_rx_ring),
+ .ndo_queue_mem_alloc = gve_rx_queue_mem_alloc,
+ .ndo_queue_mem_free = gve_rx_queue_mem_free,
+ .ndo_queue_start = gve_rx_queue_start,
+ .ndo_queue_stop = gve_rx_queue_stop,
+};
+
+static void gve_get_rx_queue_stats(struct net_device *dev, int idx,
+ struct netdev_queue_stats_rx *rx_stats)
+{
+ struct gve_priv *priv = netdev_priv(dev);
+ struct gve_rx_ring *rx = &priv->rx[idx];
+ unsigned int start;
+
+ do {
+ start = u64_stats_fetch_begin(&rx->statss);
+ rx_stats->packets = rx->rpackets;
+ rx_stats->bytes = rx->rbytes;
+ rx_stats->alloc_fail = rx->rx_skb_alloc_fail +
+ rx->rx_buf_alloc_fail;
+ } while (u64_stats_fetch_retry(&rx->statss, start));
+}
+
+static void gve_get_tx_queue_stats(struct net_device *dev, int idx,
+ struct netdev_queue_stats_tx *tx_stats)
+{
+ struct gve_priv *priv = netdev_priv(dev);
+ struct gve_tx_ring *tx = &priv->tx[idx];
+ unsigned int start;
+
+ do {
+ start = u64_stats_fetch_begin(&tx->statss);
+ tx_stats->packets = tx->pkt_done;
+ tx_stats->bytes = tx->bytes_done;
+ } while (u64_stats_fetch_retry(&tx->statss, start));
+}
+
+static void gve_get_base_stats(struct net_device *dev,
+ struct netdev_queue_stats_rx *rx,
+ struct netdev_queue_stats_tx *tx)
+{
+ rx->packets = 0;
+ rx->bytes = 0;
+ rx->alloc_fail = 0;
+
+ tx->packets = 0;
+ tx->bytes = 0;
+}
+
+static const struct netdev_stat_ops gve_stat_ops = {
+ .get_queue_stats_rx = gve_get_rx_queue_stats,
+ .get_queue_stats_tx = gve_get_tx_queue_stats,
+ .get_base_stats = gve_get_base_stats,
+};
+
static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
{
int max_tx_queues, max_rx_queues;
@@ -2269,6 +2610,8 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
pci_set_drvdata(pdev, dev);
dev->ethtool_ops = &gve_ethtool_ops;
dev->netdev_ops = &gve_netdev_ops;
+ dev->queue_mgmt_ops = &gve_queue_mgmt_ops;
+ dev->stat_ops = &gve_stat_ops;
/* Set default and supported features.
*
@@ -2297,6 +2640,8 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
priv->service_task_flags = 0x0;
priv->state_flags = 0x0;
priv->ethtool_flags = 0x0;
+ priv->rx_cfg.packet_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE;
+ priv->max_rx_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE;
gve_set_probe_in_progress(priv);
priv->gve_wq = alloc_ordered_workqueue("gve", 0);
@@ -2314,6 +2659,9 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (err)
goto abort_with_wq;
+ if (!gve_is_gqi(priv) && !gve_is_qpl(priv))
+ dev->netmem_tx = true;
+
err = register_netdev(dev);
if (err)
goto abort_with_gve_init;
@@ -2368,9 +2716,10 @@ static void gve_shutdown(struct pci_dev *pdev)
{
struct net_device *netdev = pci_get_drvdata(pdev);
struct gve_priv *priv = netdev_priv(netdev);
- bool was_up = netif_carrier_ok(priv->dev);
+ bool was_up = netif_running(priv->dev);
rtnl_lock();
+ netdev_lock(netdev);
if (was_up && gve_close(priv->dev)) {
/* If the dev was up, attempt to close, if close fails, reset */
gve_reset_and_teardown(priv, was_up);
@@ -2378,6 +2727,7 @@ static void gve_shutdown(struct pci_dev *pdev)
/* If the dev wasn't up or close worked, finish tearing down */
gve_teardown_priv_resources(priv);
}
+ netdev_unlock(netdev);
rtnl_unlock();
}
@@ -2386,10 +2736,11 @@ static int gve_suspend(struct pci_dev *pdev, pm_message_t state)
{
struct net_device *netdev = pci_get_drvdata(pdev);
struct gve_priv *priv = netdev_priv(netdev);
- bool was_up = netif_carrier_ok(priv->dev);
+ bool was_up = netif_running(priv->dev);
priv->suspend_cnt++;
rtnl_lock();
+ netdev_lock(netdev);
if (was_up && gve_close(priv->dev)) {
/* If the dev was up, attempt to close, if close fails, reset */
gve_reset_and_teardown(priv, was_up);
@@ -2398,6 +2749,7 @@ static int gve_suspend(struct pci_dev *pdev, pm_message_t state)
gve_teardown_priv_resources(priv);
}
priv->up_before_suspend = was_up;
+ netdev_unlock(netdev);
rtnl_unlock();
return 0;
}
@@ -2410,7 +2762,9 @@ static int gve_resume(struct pci_dev *pdev)
priv->resume_cnt++;
rtnl_lock();
+ netdev_lock(netdev);
err = gve_reset_recovery(priv, priv->up_before_suspend);
+ netdev_unlock(netdev);
rtnl_unlock();
return err;
}
diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c
index 76615d47e055..90e875c1832f 100644
--- a/drivers/net/ethernet/google/gve/gve_rx.c
+++ b/drivers/net/ethernet/google/gve/gve_rx.c
@@ -23,11 +23,16 @@ static void gve_rx_free_buffer(struct device *dev,
gve_free_page(dev, page_info->page, dma, DMA_FROM_DEVICE);
}
-static void gve_rx_unfill_pages(struct gve_priv *priv, struct gve_rx_ring *rx)
+static void gve_rx_unfill_pages(struct gve_priv *priv,
+ struct gve_rx_ring *rx,
+ struct gve_rx_alloc_rings_cfg *cfg)
{
u32 slots = rx->mask + 1;
int i;
+ if (!rx->data.page_info)
+ return;
+
if (rx->data.raw_addressing) {
for (i = 0; i < slots; i++)
gve_rx_free_buffer(&priv->pdev->dev, &rx->data.page_info[i],
@@ -36,8 +41,6 @@ static void gve_rx_unfill_pages(struct gve_priv *priv, struct gve_rx_ring *rx)
for (i = 0; i < slots; i++)
page_ref_sub(rx->data.page_info[i].page,
rx->data.page_info[i].pagecnt_bias - 1);
- gve_unassign_qpl(priv, rx->data.qpl->id);
- rx->data.qpl = NULL;
for (i = 0; i < rx->qpl_copy_pool_mask + 1; i++) {
page_ref_sub(rx->qpl_copy_pool[i].page,
@@ -49,42 +52,104 @@ static void gve_rx_unfill_pages(struct gve_priv *priv, struct gve_rx_ring *rx)
rx->data.page_info = NULL;
}
-static void gve_rx_free_ring(struct gve_priv *priv, int idx)
+static void gve_rx_ctx_clear(struct gve_rx_ctx *ctx)
+{
+ ctx->skb_head = NULL;
+ ctx->skb_tail = NULL;
+ ctx->total_size = 0;
+ ctx->frag_cnt = 0;
+ ctx->drop_pkt = false;
+}
+
+static void gve_rx_init_ring_state_gqi(struct gve_rx_ring *rx)
+{
+ rx->desc.seqno = 1;
+ rx->cnt = 0;
+ gve_rx_ctx_clear(&rx->ctx);
+}
+
+static void gve_rx_reset_ring_gqi(struct gve_priv *priv, int idx)
{
struct gve_rx_ring *rx = &priv->rx[idx];
+ const u32 slots = priv->rx_desc_cnt;
+ size_t size;
+
+ /* Reset desc ring */
+ if (rx->desc.desc_ring) {
+ size = slots * sizeof(rx->desc.desc_ring[0]);
+ memset(rx->desc.desc_ring, 0, size);
+ }
+
+ /* Reset q_resources */
+ if (rx->q_resources)
+ memset(rx->q_resources, 0, sizeof(*rx->q_resources));
+
+ gve_rx_init_ring_state_gqi(rx);
+}
+
+void gve_rx_stop_ring_gqi(struct gve_priv *priv, int idx)
+{
+ int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
+
+ if (!gve_rx_was_added_to_block(priv, idx))
+ return;
+
+ gve_remove_napi(priv, ntfy_idx);
+ gve_rx_remove_from_block(priv, idx);
+ gve_rx_reset_ring_gqi(priv, idx);
+}
+
+void gve_rx_free_ring_gqi(struct gve_priv *priv, struct gve_rx_ring *rx,
+ struct gve_rx_alloc_rings_cfg *cfg)
+{
struct device *dev = &priv->pdev->dev;
u32 slots = rx->mask + 1;
+ int idx = rx->q_num;
size_t bytes;
+ u32 qpl_id;
- gve_rx_remove_from_block(priv, idx);
-
- bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt;
- dma_free_coherent(dev, bytes, rx->desc.desc_ring, rx->desc.bus);
- rx->desc.desc_ring = NULL;
+ if (rx->desc.desc_ring) {
+ bytes = sizeof(struct gve_rx_desc) * cfg->ring_size;
+ dma_free_coherent(dev, bytes, rx->desc.desc_ring, rx->desc.bus);
+ rx->desc.desc_ring = NULL;
+ }
- dma_free_coherent(dev, sizeof(*rx->q_resources),
- rx->q_resources, rx->q_resources_bus);
- rx->q_resources = NULL;
+ if (rx->q_resources) {
+ dma_free_coherent(dev, sizeof(*rx->q_resources),
+ rx->q_resources, rx->q_resources_bus);
+ rx->q_resources = NULL;
+ }
- gve_rx_unfill_pages(priv, rx);
+ gve_rx_unfill_pages(priv, rx, cfg);
- bytes = sizeof(*rx->data.data_ring) * slots;
- dma_free_coherent(dev, bytes, rx->data.data_ring,
- rx->data.data_bus);
- rx->data.data_ring = NULL;
+ if (rx->data.data_ring) {
+ bytes = sizeof(*rx->data.data_ring) * slots;
+ dma_free_coherent(dev, bytes, rx->data.data_ring,
+ rx->data.data_bus);
+ rx->data.data_ring = NULL;
+ }
kvfree(rx->qpl_copy_pool);
rx->qpl_copy_pool = NULL;
+ if (rx->data.qpl) {
+ qpl_id = gve_get_rx_qpl_id(cfg->qcfg_tx, idx);
+ gve_free_queue_page_list(priv, rx->data.qpl, qpl_id);
+ rx->data.qpl = NULL;
+ }
+
netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx);
}
-static void gve_setup_rx_buffer(struct gve_rx_slot_page_info *page_info,
- dma_addr_t addr, struct page *page, __be64 *slot_addr)
+static void gve_setup_rx_buffer(struct gve_rx_ring *rx,
+ struct gve_rx_slot_page_info *page_info,
+ dma_addr_t addr, struct page *page,
+ __be64 *slot_addr)
{
page_info->page = page;
page_info->page_offset = 0;
page_info->page_address = page_address(page);
+ page_info->buf_size = rx->packet_buffer_size;
*slot_addr = cpu_to_be64(addr);
/* The page already has 1 ref */
page_ref_add(page, INT_MAX - 1);
@@ -93,7 +158,8 @@ static void gve_setup_rx_buffer(struct gve_rx_slot_page_info *page_info,
static int gve_rx_alloc_buffer(struct gve_priv *priv, struct device *dev,
struct gve_rx_slot_page_info *page_info,
- union gve_rx_data_slot *data_slot)
+ union gve_rx_data_slot *data_slot,
+ struct gve_rx_ring *rx)
{
struct page *page;
dma_addr_t dma;
@@ -101,14 +167,19 @@ static int gve_rx_alloc_buffer(struct gve_priv *priv, struct device *dev,
err = gve_alloc_page(priv, dev, &page, &dma, DMA_FROM_DEVICE,
GFP_ATOMIC);
- if (err)
+ if (err) {
+ u64_stats_update_begin(&rx->statss);
+ rx->rx_buf_alloc_fail++;
+ u64_stats_update_end(&rx->statss);
return err;
+ }
- gve_setup_rx_buffer(page_info, dma, page, &data_slot->addr);
+ gve_setup_rx_buffer(rx, page_info, dma, page, &data_slot->addr);
return 0;
}
-static int gve_prefill_rx_pages(struct gve_rx_ring *rx)
+static int gve_rx_prefill_pages(struct gve_rx_ring *rx,
+ struct gve_rx_alloc_rings_cfg *cfg)
{
struct gve_priv *priv = rx->gve;
u32 slots;
@@ -126,25 +197,19 @@ static int gve_prefill_rx_pages(struct gve_rx_ring *rx)
if (!rx->data.page_info)
return -ENOMEM;
- if (!rx->data.raw_addressing) {
- rx->data.qpl = gve_assign_rx_qpl(priv, rx->q_num);
- if (!rx->data.qpl) {
- kvfree(rx->data.page_info);
- rx->data.page_info = NULL;
- return -ENOMEM;
- }
- }
for (i = 0; i < slots; i++) {
if (!rx->data.raw_addressing) {
struct page *page = rx->data.qpl->pages[i];
dma_addr_t addr = i * PAGE_SIZE;
- gve_setup_rx_buffer(&rx->data.page_info[i], addr, page,
+ gve_setup_rx_buffer(rx, &rx->data.page_info[i], addr,
+ page,
&rx->data.data_ring[i].qpl_offset);
continue;
}
- err = gve_rx_alloc_buffer(priv, &priv->pdev->dev, &rx->data.page_info[i],
- &rx->data.data_ring[i]);
+ err = gve_rx_alloc_buffer(priv, &priv->pdev->dev,
+ &rx->data.page_info[i],
+ &rx->data.data_ring[i], rx);
if (err)
goto alloc_err_rda;
}
@@ -161,6 +226,7 @@ static int gve_prefill_rx_pages(struct gve_rx_ring *rx)
rx->qpl_copy_pool[j].page = page;
rx->qpl_copy_pool[j].page_offset = 0;
rx->qpl_copy_pool[j].page_address = page_address(page);
+ rx->qpl_copy_pool[j].buf_size = rx->packet_buffer_size;
/* The page already has 1 ref. */
page_ref_add(page, INT_MAX - 1);
@@ -185,9 +251,6 @@ alloc_err_qpl:
page_ref_sub(rx->data.page_info[i].page,
rx->data.page_info[i].pagecnt_bias - 1);
- gve_unassign_qpl(priv, rx->data.qpl->id);
- rx->data.qpl = NULL;
-
return err;
alloc_err_rda:
@@ -198,22 +261,25 @@ alloc_err_rda:
return err;
}
-static void gve_rx_ctx_clear(struct gve_rx_ctx *ctx)
+void gve_rx_start_ring_gqi(struct gve_priv *priv, int idx)
{
- ctx->skb_head = NULL;
- ctx->skb_tail = NULL;
- ctx->total_size = 0;
- ctx->frag_cnt = 0;
- ctx->drop_pkt = false;
+ int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
+
+ gve_rx_add_to_block(priv, idx);
+ gve_add_napi(priv, ntfy_idx, gve_napi_poll);
}
-static int gve_rx_alloc_ring(struct gve_priv *priv, int idx)
+int gve_rx_alloc_ring_gqi(struct gve_priv *priv,
+ struct gve_rx_alloc_rings_cfg *cfg,
+ struct gve_rx_ring *rx,
+ int idx)
{
- struct gve_rx_ring *rx = &priv->rx[idx];
struct device *hdev = &priv->pdev->dev;
+ u32 slots = cfg->ring_size;
int filled_pages;
+ int qpl_page_cnt;
+ u32 qpl_id = 0;
size_t bytes;
- u32 slots;
int err;
netif_dbg(priv, drv, priv->dev, "allocating rx ring\n");
@@ -222,10 +288,10 @@ static int gve_rx_alloc_ring(struct gve_priv *priv, int idx)
rx->gve = priv;
rx->q_num = idx;
+ rx->packet_buffer_size = cfg->packet_buffer_size;
- slots = priv->rx_data_slot_cnt;
rx->mask = slots - 1;
- rx->data.raw_addressing = priv->queue_format == GVE_GQI_RDA_FORMAT;
+ rx->data.raw_addressing = cfg->raw_addressing;
/* alloc rx data ring */
bytes = sizeof(*rx->data.data_ring) * slots;
@@ -246,10 +312,22 @@ static int gve_rx_alloc_ring(struct gve_priv *priv, int idx)
goto abort_with_slots;
}
- filled_pages = gve_prefill_rx_pages(rx);
+ if (!rx->data.raw_addressing) {
+ qpl_id = gve_get_rx_qpl_id(cfg->qcfg_tx, rx->q_num);
+ qpl_page_cnt = cfg->ring_size;
+
+ rx->data.qpl = gve_alloc_queue_page_list(priv, qpl_id,
+ qpl_page_cnt);
+ if (!rx->data.qpl) {
+ err = -ENOMEM;
+ goto abort_with_copy_pool;
+ }
+ }
+
+ filled_pages = gve_rx_prefill_pages(rx, cfg);
if (filled_pages < 0) {
err = -ENOMEM;
- goto abort_with_copy_pool;
+ goto abort_with_qpl;
}
rx->fill_cnt = filled_pages;
/* Ensure data ring slots (packet buffers) are visible. */
@@ -269,23 +347,17 @@ static int gve_rx_alloc_ring(struct gve_priv *priv, int idx)
(unsigned long)rx->data.data_bus);
/* alloc rx desc ring */
- bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt;
+ bytes = sizeof(struct gve_rx_desc) * cfg->ring_size;
rx->desc.desc_ring = dma_alloc_coherent(hdev, bytes, &rx->desc.bus,
GFP_KERNEL);
if (!rx->desc.desc_ring) {
err = -ENOMEM;
goto abort_with_q_resources;
}
- rx->cnt = 0;
- rx->db_threshold = priv->rx_desc_cnt / 2;
- rx->desc.seqno = 1;
+ rx->db_threshold = slots / 2;
+ gve_rx_init_ring_state_gqi(rx);
- /* Allocating half-page buffers allows page-flipping which is faster
- * than copying or allocating new pages.
- */
- rx->packet_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE;
gve_rx_ctx_clear(&rx->ctx);
- gve_rx_add_to_block(priv, idx);
return 0;
@@ -294,7 +366,12 @@ abort_with_q_resources:
rx->q_resources, rx->q_resources_bus);
rx->q_resources = NULL;
abort_filled:
- gve_rx_unfill_pages(priv, rx);
+ gve_rx_unfill_pages(priv, rx, cfg);
+abort_with_qpl:
+ if (!rx->data.raw_addressing) {
+ gve_free_queue_page_list(priv, rx->data.qpl, qpl_id);
+ rx->data.qpl = NULL;
+ }
abort_with_copy_pool:
kvfree(rx->qpl_copy_pool);
rx->qpl_copy_pool = NULL;
@@ -306,36 +383,52 @@ abort_with_slots:
return err;
}
-int gve_rx_alloc_rings(struct gve_priv *priv)
+int gve_rx_alloc_rings_gqi(struct gve_priv *priv,
+ struct gve_rx_alloc_rings_cfg *cfg)
{
+ struct gve_rx_ring *rx;
int err = 0;
- int i;
+ int i, j;
+
+ rx = kvcalloc(cfg->qcfg_rx->max_queues, sizeof(struct gve_rx_ring),
+ GFP_KERNEL);
+ if (!rx)
+ return -ENOMEM;
- for (i = 0; i < priv->rx_cfg.num_queues; i++) {
- err = gve_rx_alloc_ring(priv, i);
+ for (i = 0; i < cfg->qcfg_rx->num_queues; i++) {
+ err = gve_rx_alloc_ring_gqi(priv, cfg, &rx[i], i);
if (err) {
netif_err(priv, drv, priv->dev,
"Failed to alloc rx ring=%d: err=%d\n",
i, err);
- break;
+ goto cleanup;
}
}
- /* Unallocate if there was an error */
- if (err) {
- int j;
- for (j = 0; j < i; j++)
- gve_rx_free_ring(priv, j);
- }
+ cfg->rx = rx;
+ return 0;
+
+cleanup:
+ for (j = 0; j < i; j++)
+ gve_rx_free_ring_gqi(priv, &rx[j], cfg);
+ kvfree(rx);
return err;
}
-void gve_rx_free_rings_gqi(struct gve_priv *priv)
+void gve_rx_free_rings_gqi(struct gve_priv *priv,
+ struct gve_rx_alloc_rings_cfg *cfg)
{
+ struct gve_rx_ring *rx = cfg->rx;
int i;
- for (i = 0; i < priv->rx_cfg.num_queues; i++)
- gve_rx_free_ring(priv, i);
+ if (!rx)
+ return;
+
+ for (i = 0; i < cfg->qcfg_rx->num_queues; i++)
+ gve_rx_free_ring_gqi(priv, &rx[i], cfg);
+
+ kvfree(rx);
+ cfg->rx = NULL;
}
void gve_rx_write_doorbell(struct gve_priv *priv, struct gve_rx_ring *rx)
@@ -502,7 +595,7 @@ static struct sk_buff *gve_rx_copy_to_pool(struct gve_rx_ring *rx,
copy_page_info->pad = page_info->pad;
skb = gve_rx_add_frags(napi, copy_page_info,
- rx->packet_buffer_size, len, ctx);
+ copy_page_info->buf_size, len, ctx);
if (unlikely(!skb))
return NULL;
@@ -542,7 +635,8 @@ gve_rx_qpl(struct device *dev, struct net_device *netdev,
* device.
*/
if (page_info->can_flip) {
- skb = gve_rx_add_frags(napi, page_info, rx->packet_buffer_size, len, ctx);
+ skb = gve_rx_add_frags(napi, page_info, page_info->buf_size,
+ len, ctx);
/* No point in recycling if we didn't get the skb */
if (skb) {
/* Make sure that the page isn't freed. */
@@ -592,7 +686,7 @@ static struct sk_buff *gve_rx_skb(struct gve_priv *priv, struct gve_rx_ring *rx,
skb = gve_rx_raw_addressing(&priv->pdev->dev, netdev,
page_info, len, napi,
data_slot,
- rx->packet_buffer_size, ctx);
+ page_info->buf_size, ctx);
} else {
skb = gve_rx_qpl(&priv->pdev->dev, netdev, rx,
page_info, len, napi, data_slot);
@@ -767,7 +861,7 @@ static void gve_rx(struct gve_rx_ring *rx, netdev_features_t feat,
void *old_data;
int xdp_act;
- xdp_init_buff(&xdp, rx->packet_buffer_size, &rx->xdp_rxq);
+ xdp_init_buff(&xdp, page_info->buf_size, &rx->xdp_rxq);
xdp_prepare_buff(&xdp, page_info->page_address +
page_info->page_offset, GVE_RX_PAD,
len, false);
@@ -896,10 +990,7 @@ static bool gve_rx_refill_buffers(struct gve_priv *priv, struct gve_rx_ring *rx)
gve_rx_free_buffer(dev, page_info, data_slot);
page_info->page = NULL;
if (gve_rx_alloc_buffer(priv, dev, page_info,
- data_slot)) {
- u64_stats_update_begin(&rx->statss);
- rx->rx_buf_alloc_fail++;
- u64_stats_update_end(&rx->statss);
+ data_slot, rx)) {
break;
}
}
diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c
index f281e42a7ef9..dcb0545baa50 100644
--- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c
+++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c
@@ -16,203 +16,125 @@
#include <net/ipv6.h>
#include <net/tcp.h>
-static int gve_buf_ref_cnt(struct gve_rx_buf_state_dqo *bs)
+static void gve_rx_free_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx)
{
- return page_count(bs->page_info.page) - bs->page_info.pagecnt_bias;
-}
-
-static void gve_free_page_dqo(struct gve_priv *priv,
- struct gve_rx_buf_state_dqo *bs,
- bool free_page)
-{
- page_ref_sub(bs->page_info.page, bs->page_info.pagecnt_bias - 1);
- if (free_page)
- gve_free_page(&priv->pdev->dev, bs->page_info.page, bs->addr,
- DMA_FROM_DEVICE);
- bs->page_info.page = NULL;
-}
-
-static struct gve_rx_buf_state_dqo *gve_alloc_buf_state(struct gve_rx_ring *rx)
-{
- struct gve_rx_buf_state_dqo *buf_state;
- s16 buffer_id;
-
- buffer_id = rx->dqo.free_buf_states;
- if (unlikely(buffer_id == -1))
- return NULL;
-
- buf_state = &rx->dqo.buf_states[buffer_id];
-
- /* Remove buf_state from free list */
- rx->dqo.free_buf_states = buf_state->next;
-
- /* Point buf_state to itself to mark it as allocated */
- buf_state->next = buffer_id;
-
- return buf_state;
-}
-
-static bool gve_buf_state_is_allocated(struct gve_rx_ring *rx,
- struct gve_rx_buf_state_dqo *buf_state)
-{
- s16 buffer_id = buf_state - rx->dqo.buf_states;
-
- return buf_state->next == buffer_id;
-}
-
-static void gve_free_buf_state(struct gve_rx_ring *rx,
- struct gve_rx_buf_state_dqo *buf_state)
-{
- s16 buffer_id = buf_state - rx->dqo.buf_states;
+ struct device *hdev = &priv->pdev->dev;
+ int buf_count = rx->dqo.bufq.mask + 1;
- buf_state->next = rx->dqo.free_buf_states;
- rx->dqo.free_buf_states = buffer_id;
+ if (rx->dqo.hdr_bufs.data) {
+ dma_free_coherent(hdev, priv->header_buf_size * buf_count,
+ rx->dqo.hdr_bufs.data, rx->dqo.hdr_bufs.addr);
+ rx->dqo.hdr_bufs.data = NULL;
+ }
}
-static struct gve_rx_buf_state_dqo *
-gve_dequeue_buf_state(struct gve_rx_ring *rx, struct gve_index_list *list)
+static void gve_rx_init_ring_state_dqo(struct gve_rx_ring *rx,
+ const u32 buffer_queue_slots,
+ const u32 completion_queue_slots)
{
- struct gve_rx_buf_state_dqo *buf_state;
- s16 buffer_id;
-
- buffer_id = list->head;
- if (unlikely(buffer_id == -1))
- return NULL;
-
- buf_state = &rx->dqo.buf_states[buffer_id];
-
- /* Remove buf_state from list */
- list->head = buf_state->next;
- if (buf_state->next == -1)
- list->tail = -1;
-
- /* Point buf_state to itself to mark it as allocated */
- buf_state->next = buffer_id;
-
- return buf_state;
-}
+ int i;
-static void gve_enqueue_buf_state(struct gve_rx_ring *rx,
- struct gve_index_list *list,
- struct gve_rx_buf_state_dqo *buf_state)
-{
- s16 buffer_id = buf_state - rx->dqo.buf_states;
+ /* Set buffer queue state */
+ rx->dqo.bufq.mask = buffer_queue_slots - 1;
+ rx->dqo.bufq.head = 0;
+ rx->dqo.bufq.tail = 0;
- buf_state->next = -1;
+ /* Set completion queue state */
+ rx->dqo.complq.num_free_slots = completion_queue_slots;
+ rx->dqo.complq.mask = completion_queue_slots - 1;
+ rx->dqo.complq.cur_gen_bit = 0;
+ rx->dqo.complq.head = 0;
- if (list->head == -1) {
- list->head = buffer_id;
- list->tail = buffer_id;
- } else {
- int tail = list->tail;
+ /* Set RX SKB context */
+ rx->ctx.skb_head = NULL;
+ rx->ctx.skb_tail = NULL;
- rx->dqo.buf_states[tail].next = buffer_id;
- list->tail = buffer_id;
+ /* Set up linked list of buffer IDs */
+ if (rx->dqo.buf_states) {
+ for (i = 0; i < rx->dqo.num_buf_states - 1; i++)
+ rx->dqo.buf_states[i].next = i + 1;
+ rx->dqo.buf_states[rx->dqo.num_buf_states - 1].next = -1;
}
+
+ rx->dqo.free_buf_states = 0;
+ rx->dqo.recycled_buf_states.head = -1;
+ rx->dqo.recycled_buf_states.tail = -1;
+ rx->dqo.used_buf_states.head = -1;
+ rx->dqo.used_buf_states.tail = -1;
}
-static struct gve_rx_buf_state_dqo *
-gve_get_recycled_buf_state(struct gve_rx_ring *rx)
+static void gve_rx_reset_ring_dqo(struct gve_priv *priv, int idx)
{
- struct gve_rx_buf_state_dqo *buf_state;
+ struct gve_rx_ring *rx = &priv->rx[idx];
+ size_t size;
int i;
- /* Recycled buf states are immediately usable. */
- buf_state = gve_dequeue_buf_state(rx, &rx->dqo.recycled_buf_states);
- if (likely(buf_state))
- return buf_state;
-
- if (unlikely(rx->dqo.used_buf_states.head == -1))
- return NULL;
+ const u32 buffer_queue_slots = priv->rx_desc_cnt;
+ const u32 completion_queue_slots = priv->rx_desc_cnt;
- /* Used buf states are only usable when ref count reaches 0, which means
- * no SKBs refer to them.
- *
- * Search a limited number before giving up.
- */
- for (i = 0; i < 5; i++) {
- buf_state = gve_dequeue_buf_state(rx, &rx->dqo.used_buf_states);
- if (gve_buf_ref_cnt(buf_state) == 0) {
- rx->dqo.used_buf_states_cnt--;
- return buf_state;
- }
+ /* Reset buffer queue */
+ if (rx->dqo.bufq.desc_ring) {
+ size = sizeof(rx->dqo.bufq.desc_ring[0]) *
+ buffer_queue_slots;
+ memset(rx->dqo.bufq.desc_ring, 0, size);
+ }
- gve_enqueue_buf_state(rx, &rx->dqo.used_buf_states, buf_state);
+ /* Reset completion queue */
+ if (rx->dqo.complq.desc_ring) {
+ size = sizeof(rx->dqo.complq.desc_ring[0]) *
+ completion_queue_slots;
+ memset(rx->dqo.complq.desc_ring, 0, size);
}
- /* For QPL, we cannot allocate any new buffers and must
- * wait for the existing ones to be available.
- */
- if (rx->dqo.qpl)
- return NULL;
+ /* Reset q_resources */
+ if (rx->q_resources)
+ memset(rx->q_resources, 0, sizeof(*rx->q_resources));
- /* If there are no free buf states discard an entry from
- * `used_buf_states` so it can be used.
- */
- if (unlikely(rx->dqo.free_buf_states == -1)) {
- buf_state = gve_dequeue_buf_state(rx, &rx->dqo.used_buf_states);
- if (gve_buf_ref_cnt(buf_state) == 0)
- return buf_state;
+ /* Reset buf states */
+ if (rx->dqo.buf_states) {
+ for (i = 0; i < rx->dqo.num_buf_states; i++) {
+ struct gve_rx_buf_state_dqo *bs = &rx->dqo.buf_states[i];
- gve_free_page_dqo(rx->gve, buf_state, true);
- gve_free_buf_state(rx, buf_state);
+ if (rx->dqo.page_pool)
+ gve_free_to_page_pool(rx, bs, false);
+ else
+ gve_free_qpl_page_dqo(bs);
+ }
}
- return NULL;
+ gve_rx_init_ring_state_dqo(rx, buffer_queue_slots,
+ completion_queue_slots);
}
-static int gve_alloc_page_dqo(struct gve_rx_ring *rx,
- struct gve_rx_buf_state_dqo *buf_state)
+void gve_rx_stop_ring_dqo(struct gve_priv *priv, int idx)
{
- struct gve_priv *priv = rx->gve;
- u32 idx;
-
- if (!rx->dqo.qpl) {
- int err;
-
- err = gve_alloc_page(priv, &priv->pdev->dev,
- &buf_state->page_info.page,
- &buf_state->addr,
- DMA_FROM_DEVICE, GFP_ATOMIC);
- if (err)
- return err;
- } else {
- idx = rx->dqo.next_qpl_page_idx;
- if (idx >= priv->rx_pages_per_qpl) {
- net_err_ratelimited("%s: Out of QPL pages\n",
- priv->dev->name);
- return -ENOMEM;
- }
- buf_state->page_info.page = rx->dqo.qpl->pages[idx];
- buf_state->addr = rx->dqo.qpl->page_buses[idx];
- rx->dqo.next_qpl_page_idx++;
- }
- buf_state->page_info.page_offset = 0;
- buf_state->page_info.page_address =
- page_address(buf_state->page_info.page);
- buf_state->last_single_ref_offset = 0;
+ int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
+ struct gve_rx_ring *rx = &priv->rx[idx];
- /* The page already has 1 ref. */
- page_ref_add(buf_state->page_info.page, INT_MAX - 1);
- buf_state->page_info.pagecnt_bias = INT_MAX;
+ if (!gve_rx_was_added_to_block(priv, idx))
+ return;
- return 0;
+ if (rx->dqo.page_pool)
+ page_pool_disable_direct_recycling(rx->dqo.page_pool);
+ gve_remove_napi(priv, ntfy_idx);
+ gve_rx_remove_from_block(priv, idx);
+ gve_rx_reset_ring_dqo(priv, idx);
}
-static void gve_rx_free_ring_dqo(struct gve_priv *priv, int idx)
+void gve_rx_free_ring_dqo(struct gve_priv *priv, struct gve_rx_ring *rx,
+ struct gve_rx_alloc_rings_cfg *cfg)
{
- struct gve_rx_ring *rx = &priv->rx[idx];
struct device *hdev = &priv->pdev->dev;
size_t completion_queue_slots;
size_t buffer_queue_slots;
+ int idx = rx->q_num;
size_t size;
+ u32 qpl_id;
int i;
completion_queue_slots = rx->dqo.complq.mask + 1;
buffer_queue_slots = rx->dqo.bufq.mask + 1;
- gve_rx_remove_from_block(priv, idx);
-
if (rx->q_resources) {
dma_free_coherent(hdev, sizeof(*rx->q_resources),
rx->q_resources, rx->q_resources_bus);
@@ -221,12 +143,16 @@ static void gve_rx_free_ring_dqo(struct gve_priv *priv, int idx)
for (i = 0; i < rx->dqo.num_buf_states; i++) {
struct gve_rx_buf_state_dqo *bs = &rx->dqo.buf_states[i];
- /* Only free page for RDA. QPL pages are freed in gve_main. */
- if (bs->page_info.page)
- gve_free_page_dqo(priv, bs, !rx->dqo.qpl);
+
+ if (rx->dqo.page_pool)
+ gve_free_to_page_pool(rx, bs, false);
+ else
+ gve_free_qpl_page_dqo(bs);
}
+
if (rx->dqo.qpl) {
- gve_unassign_qpl(priv, rx->dqo.qpl->id);
+ qpl_id = gve_get_rx_qpl_id(cfg->qcfg_tx, rx->q_num);
+ gve_free_queue_page_list(priv, rx->dqo.qpl, qpl_id);
rx->dqo.qpl = NULL;
}
@@ -248,50 +174,78 @@ static void gve_rx_free_ring_dqo(struct gve_priv *priv, int idx)
kvfree(rx->dqo.buf_states);
rx->dqo.buf_states = NULL;
+ if (rx->dqo.page_pool) {
+ page_pool_destroy(rx->dqo.page_pool);
+ rx->dqo.page_pool = NULL;
+ }
+
+ gve_rx_free_hdr_bufs(priv, rx);
+
netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx);
}
-static int gve_rx_alloc_ring_dqo(struct gve_priv *priv, int idx)
+static int gve_rx_alloc_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx,
+ const u32 buf_count)
+{
+ struct device *hdev = &priv->pdev->dev;
+
+ rx->dqo.hdr_bufs.data = dma_alloc_coherent(hdev, priv->header_buf_size * buf_count,
+ &rx->dqo.hdr_bufs.addr, GFP_KERNEL);
+ if (!rx->dqo.hdr_bufs.data)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void gve_rx_start_ring_dqo(struct gve_priv *priv, int idx)
+{
+ int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
+
+ gve_rx_add_to_block(priv, idx);
+ gve_add_napi(priv, ntfy_idx, gve_napi_poll_dqo);
+}
+
+int gve_rx_alloc_ring_dqo(struct gve_priv *priv,
+ struct gve_rx_alloc_rings_cfg *cfg,
+ struct gve_rx_ring *rx,
+ int idx)
{
- struct gve_rx_ring *rx = &priv->rx[idx];
struct device *hdev = &priv->pdev->dev;
+ struct page_pool *pool;
+ int qpl_page_cnt;
size_t size;
- int i;
+ u32 qpl_id;
- const u32 buffer_queue_slots =
- priv->queue_format == GVE_DQO_RDA_FORMAT ?
- priv->options_dqo_rda.rx_buff_ring_entries : priv->rx_desc_cnt;
- const u32 completion_queue_slots = priv->rx_desc_cnt;
+ const u32 buffer_queue_slots = cfg->ring_size;
+ const u32 completion_queue_slots = cfg->ring_size;
netif_dbg(priv, drv, priv->dev, "allocating rx ring DQO\n");
memset(rx, 0, sizeof(*rx));
rx->gve = priv;
rx->q_num = idx;
- rx->dqo.bufq.mask = buffer_queue_slots - 1;
- rx->dqo.complq.num_free_slots = completion_queue_slots;
- rx->dqo.complq.mask = completion_queue_slots - 1;
- rx->ctx.skb_head = NULL;
- rx->ctx.skb_tail = NULL;
+ rx->packet_buffer_size = cfg->packet_buffer_size;
+
+ if (cfg->xdp) {
+ rx->packet_buffer_truesize = GVE_XDP_RX_BUFFER_SIZE_DQO;
+ rx->rx_headroom = XDP_PACKET_HEADROOM;
+ } else {
+ rx->packet_buffer_truesize = rx->packet_buffer_size;
+ rx->rx_headroom = 0;
+ }
- rx->dqo.num_buf_states = priv->queue_format == GVE_DQO_RDA_FORMAT ?
- min_t(s16, S16_MAX, buffer_queue_slots * 4) :
- priv->rx_pages_per_qpl;
+ rx->dqo.num_buf_states = cfg->raw_addressing ? buffer_queue_slots :
+ gve_get_rx_pages_per_qpl_dqo(cfg->ring_size);
rx->dqo.buf_states = kvcalloc(rx->dqo.num_buf_states,
sizeof(rx->dqo.buf_states[0]),
GFP_KERNEL);
if (!rx->dqo.buf_states)
return -ENOMEM;
- /* Set up linked list of buffer IDs */
- for (i = 0; i < rx->dqo.num_buf_states - 1; i++)
- rx->dqo.buf_states[i].next = i + 1;
-
- rx->dqo.buf_states[rx->dqo.num_buf_states - 1].next = -1;
- rx->dqo.recycled_buf_states.head = -1;
- rx->dqo.recycled_buf_states.tail = -1;
- rx->dqo.used_buf_states.head = -1;
- rx->dqo.used_buf_states.tail = -1;
+ /* Allocate header buffers for header-split */
+ if (cfg->enable_header_split)
+ if (gve_rx_alloc_hdr_bufs(priv, rx, buffer_queue_slots))
+ goto err;
/* Allocate RX completion queue */
size = sizeof(rx->dqo.complq.desc_ring[0]) *
@@ -308,8 +262,18 @@ static int gve_rx_alloc_ring_dqo(struct gve_priv *priv, int idx)
if (!rx->dqo.bufq.desc_ring)
goto err;
- if (priv->queue_format != GVE_DQO_RDA_FORMAT) {
- rx->dqo.qpl = gve_assign_rx_qpl(priv, rx->q_num);
+ if (cfg->raw_addressing) {
+ pool = gve_rx_create_page_pool(priv, rx, cfg->xdp);
+ if (IS_ERR(pool))
+ goto err;
+
+ rx->dqo.page_pool = pool;
+ } else {
+ qpl_id = gve_get_rx_qpl_id(cfg->qcfg_tx, rx->q_num);
+ qpl_page_cnt = gve_get_rx_pages_per_qpl_dqo(cfg->ring_size);
+
+ rx->dqo.qpl = gve_alloc_queue_page_list(priv, qpl_id,
+ qpl_page_cnt);
if (!rx->dqo.qpl)
goto err;
rx->dqo.next_qpl_page_idx = 0;
@@ -320,12 +284,13 @@ static int gve_rx_alloc_ring_dqo(struct gve_priv *priv, int idx)
if (!rx->q_resources)
goto err;
- gve_rx_add_to_block(priv, idx);
+ gve_rx_init_ring_state_dqo(rx, buffer_queue_slots,
+ completion_queue_slots);
return 0;
err:
- gve_rx_free_ring_dqo(priv, idx);
+ gve_rx_free_ring_dqo(priv, rx, cfg);
return -ENOMEM;
}
@@ -337,13 +302,20 @@ void gve_rx_write_doorbell_dqo(const struct gve_priv *priv, int queue_idx)
iowrite32(rx->dqo.bufq.tail, &priv->db_bar2[index]);
}
-int gve_rx_alloc_rings_dqo(struct gve_priv *priv)
+int gve_rx_alloc_rings_dqo(struct gve_priv *priv,
+ struct gve_rx_alloc_rings_cfg *cfg)
{
- int err = 0;
+ struct gve_rx_ring *rx;
+ int err;
int i;
- for (i = 0; i < priv->rx_cfg.num_queues; i++) {
- err = gve_rx_alloc_ring_dqo(priv, i);
+ rx = kvcalloc(cfg->qcfg_rx->max_queues, sizeof(struct gve_rx_ring),
+ GFP_KERNEL);
+ if (!rx)
+ return -ENOMEM;
+
+ for (i = 0; i < cfg->qcfg_rx->num_queues; i++) {
+ err = gve_rx_alloc_ring_dqo(priv, cfg, &rx[i], i);
if (err) {
netif_err(priv, drv, priv->dev,
"Failed to alloc rx ring=%d: err=%d\n",
@@ -352,21 +324,30 @@ int gve_rx_alloc_rings_dqo(struct gve_priv *priv)
}
}
+ cfg->rx = rx;
return 0;
err:
for (i--; i >= 0; i--)
- gve_rx_free_ring_dqo(priv, i);
-
+ gve_rx_free_ring_dqo(priv, &rx[i], cfg);
+ kvfree(rx);
return err;
}
-void gve_rx_free_rings_dqo(struct gve_priv *priv)
+void gve_rx_free_rings_dqo(struct gve_priv *priv,
+ struct gve_rx_alloc_rings_cfg *cfg)
{
+ struct gve_rx_ring *rx = cfg->rx;
int i;
- for (i = 0; i < priv->rx_cfg.num_queues; i++)
- gve_rx_free_ring_dqo(priv, i);
+ if (!rx)
+ return;
+
+ for (i = 0; i < cfg->qcfg_rx->num_queues; i++)
+ gve_rx_free_ring_dqo(priv, &rx[i], cfg);
+
+ kvfree(rx);
+ cfg->rx = NULL;
}
void gve_rx_post_buffers_dqo(struct gve_rx_ring *rx)
@@ -384,26 +365,18 @@ void gve_rx_post_buffers_dqo(struct gve_rx_ring *rx)
num_avail_slots = min_t(u32, num_avail_slots, complq->num_free_slots);
while (num_posted < num_avail_slots) {
struct gve_rx_desc_dqo *desc = &bufq->desc_ring[bufq->tail];
- struct gve_rx_buf_state_dqo *buf_state;
-
- buf_state = gve_get_recycled_buf_state(rx);
- if (unlikely(!buf_state)) {
- buf_state = gve_alloc_buf_state(rx);
- if (unlikely(!buf_state))
- break;
-
- if (unlikely(gve_alloc_page_dqo(rx, buf_state))) {
- u64_stats_update_begin(&rx->statss);
- rx->rx_buf_alloc_fail++;
- u64_stats_update_end(&rx->statss);
- gve_free_buf_state(rx, buf_state);
- break;
- }
+
+ if (unlikely(gve_alloc_buffer(rx, desc))) {
+ u64_stats_update_begin(&rx->statss);
+ rx->rx_buf_alloc_fail++;
+ u64_stats_update_end(&rx->statss);
+ break;
}
- desc->buf_id = cpu_to_le16(buf_state - rx->dqo.buf_states);
- desc->buf_addr = cpu_to_le64(buf_state->addr +
- buf_state->page_info.page_offset);
+ if (rx->dqo.hdr_bufs.data)
+ desc->header_buf_addr =
+ cpu_to_le64(rx->dqo.hdr_bufs.addr +
+ priv->header_buf_size * bufq->tail);
bufq->tail = (bufq->tail + 1) & bufq->mask;
complq->num_free_slots--;
@@ -416,48 +389,6 @@ void gve_rx_post_buffers_dqo(struct gve_rx_ring *rx)
rx->fill_cnt += num_posted;
}
-static void gve_try_recycle_buf(struct gve_priv *priv, struct gve_rx_ring *rx,
- struct gve_rx_buf_state_dqo *buf_state)
-{
- const int data_buffer_size = priv->data_buffer_size_dqo;
- int pagecount;
-
- /* Can't reuse if we only fit one buffer per page */
- if (data_buffer_size * 2 > PAGE_SIZE)
- goto mark_used;
-
- pagecount = gve_buf_ref_cnt(buf_state);
-
- /* Record the offset when we have a single remaining reference.
- *
- * When this happens, we know all of the other offsets of the page are
- * usable.
- */
- if (pagecount == 1) {
- buf_state->last_single_ref_offset =
- buf_state->page_info.page_offset;
- }
-
- /* Use the next buffer sized chunk in the page. */
- buf_state->page_info.page_offset += data_buffer_size;
- buf_state->page_info.page_offset &= (PAGE_SIZE - 1);
-
- /* If we wrap around to the same offset without ever dropping to 1
- * reference, then we don't know if this offset was ever freed.
- */
- if (buf_state->page_info.page_offset ==
- buf_state->last_single_ref_offset) {
- goto mark_used;
- }
-
- gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state);
- return;
-
-mark_used:
- gve_enqueue_buf_state(rx, &rx->dqo.used_buf_states, buf_state);
- rx->dqo.used_buf_states_cnt++;
-}
-
static void gve_rx_skb_csum(struct sk_buff *skb,
const struct gve_rx_compl_desc_dqo *desc,
struct gve_ptype ptype)
@@ -506,11 +437,13 @@ static void gve_rx_skb_hash(struct sk_buff *skb,
skb_set_hash(skb, le32_to_cpu(compl_desc->hash), hash_type);
}
-static void gve_rx_free_skb(struct gve_rx_ring *rx)
+static void gve_rx_free_skb(struct napi_struct *napi, struct gve_rx_ring *rx)
{
if (!rx->ctx.skb_head)
return;
+ if (rx->ctx.skb_head == napi->skb)
+ napi->skb = NULL;
dev_kfree_skb_any(rx->ctx.skb_head);
rx->ctx.skb_head = NULL;
rx->ctx.skb_tail = NULL;
@@ -553,6 +486,25 @@ static int gve_rx_copy_ondemand(struct gve_rx_ring *rx,
return 0;
}
+static void gve_skb_add_rx_frag(struct gve_rx_ring *rx,
+ struct gve_rx_buf_state_dqo *buf_state,
+ int num_frags, u16 buf_len)
+{
+ if (rx->dqo.page_pool) {
+ skb_add_rx_frag_netmem(rx->ctx.skb_tail, num_frags,
+ buf_state->page_info.netmem,
+ buf_state->page_info.page_offset +
+ buf_state->page_info.pad, buf_len,
+ buf_state->page_info.buf_size);
+ } else {
+ skb_add_rx_frag(rx->ctx.skb_tail, num_frags,
+ buf_state->page_info.page,
+ buf_state->page_info.page_offset +
+ buf_state->page_info.pad, buf_len,
+ buf_state->page_info.buf_size);
+ }
+}
+
/* Chains multi skbs for single rx packet.
* Returns 0 if buffer is appended, -1 otherwise.
*/
@@ -570,6 +522,9 @@ static int gve_rx_append_frags(struct napi_struct *napi,
if (!skb)
return -1;
+ if (rx->dqo.page_pool)
+ skb_mark_for_recycle(skb);
+
if (rx->ctx.skb_tail == rx->ctx.skb_head)
skb_shinfo(rx->ctx.skb_head)->frag_list = skb;
else
@@ -580,39 +535,58 @@ static int gve_rx_append_frags(struct napi_struct *napi,
if (rx->ctx.skb_tail != rx->ctx.skb_head) {
rx->ctx.skb_head->len += buf_len;
rx->ctx.skb_head->data_len += buf_len;
- rx->ctx.skb_head->truesize += priv->data_buffer_size_dqo;
+ rx->ctx.skb_head->truesize += buf_state->page_info.buf_size;
}
/* Trigger ondemand page allocation if we are running low on buffers */
if (gve_rx_should_trigger_copy_ondemand(rx))
return gve_rx_copy_ondemand(rx, buf_state, buf_len);
- skb_add_rx_frag(rx->ctx.skb_tail, num_frags,
- buf_state->page_info.page,
- buf_state->page_info.page_offset,
- buf_len, priv->data_buffer_size_dqo);
- gve_dec_pagecnt_bias(&buf_state->page_info);
-
- /* Advances buffer page-offset if page is partially used.
- * Marks buffer as used if page is full.
- */
- gve_try_recycle_buf(priv, rx, buf_state);
+ gve_skb_add_rx_frag(rx, buf_state, num_frags, buf_len);
+ gve_reuse_buffer(rx, buf_state);
return 0;
}
+static void gve_xdp_done_dqo(struct gve_priv *priv, struct gve_rx_ring *rx,
+ struct xdp_buff *xdp, struct bpf_prog *xprog,
+ int xdp_act,
+ struct gve_rx_buf_state_dqo *buf_state)
+{
+ u64_stats_update_begin(&rx->statss);
+ switch (xdp_act) {
+ case XDP_ABORTED:
+ case XDP_DROP:
+ default:
+ rx->xdp_actions[xdp_act]++;
+ break;
+ case XDP_TX:
+ rx->xdp_tx_errors++;
+ break;
+ case XDP_REDIRECT:
+ rx->xdp_redirect_errors++;
+ break;
+ }
+ u64_stats_update_end(&rx->statss);
+ gve_free_buffer(rx, buf_state);
+}
+
/* Returns 0 if descriptor is completed successfully.
* Returns -EINVAL if descriptor is invalid.
* Returns -ENOMEM if data cannot be copied to skb.
*/
static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
const struct gve_rx_compl_desc_dqo *compl_desc,
- int queue_idx)
+ u32 desc_idx, int queue_idx)
{
const u16 buffer_id = le16_to_cpu(compl_desc->buf_id);
+ const bool hbo = compl_desc->header_buffer_overflow;
const bool eop = compl_desc->end_of_packet != 0;
+ const bool hsplit = compl_desc->split_header;
struct gve_rx_buf_state_dqo *buf_state;
struct gve_priv *priv = rx->gve;
+ struct bpf_prog *xprog;
u16 buf_len;
+ u16 hdr_len;
if (unlikely(buffer_id >= rx->dqo.num_buf_states)) {
net_err_ratelimited("%s: Invalid RX buffer_id=%u\n",
@@ -627,21 +601,52 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
}
if (unlikely(compl_desc->rx_error)) {
- gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states,
- buf_state);
+ gve_free_buffer(rx, buf_state);
return -EINVAL;
}
buf_len = compl_desc->packet_len;
+ hdr_len = compl_desc->header_len;
/* Page might have not been used for awhile and was likely last written
* by a different thread.
*/
- prefetch(buf_state->page_info.page);
+ if (rx->dqo.page_pool) {
+ if (!netmem_is_net_iov(buf_state->page_info.netmem))
+ prefetch(netmem_to_page(buf_state->page_info.netmem));
+ } else {
+ prefetch(buf_state->page_info.page);
+ }
+
+ /* Copy the header into the skb in the case of header split */
+ if (hsplit) {
+ int unsplit = 0;
+
+ if (hdr_len && !hbo) {
+ rx->ctx.skb_head = gve_rx_copy_data(priv->dev, napi,
+ rx->dqo.hdr_bufs.data +
+ desc_idx * priv->header_buf_size,
+ hdr_len);
+ if (unlikely(!rx->ctx.skb_head))
+ goto error;
+ rx->ctx.skb_tail = rx->ctx.skb_head;
+
+ if (rx->dqo.page_pool)
+ skb_mark_for_recycle(rx->ctx.skb_head);
+ } else {
+ unsplit = 1;
+ }
+ u64_stats_update_begin(&rx->statss);
+ rx->rx_hsplit_pkt++;
+ rx->rx_hsplit_unsplit_pkt += unsplit;
+ rx->rx_hsplit_bytes += hdr_len;
+ u64_stats_update_end(&rx->statss);
+ }
/* Sync the portion of dma buffer for CPU to read. */
dma_sync_single_range_for_cpu(&priv->pdev->dev, buf_state->addr,
- buf_state->page_info.page_offset,
+ buf_state->page_info.page_offset +
+ buf_state->page_info.pad,
buf_len, DMA_FROM_DEVICE);
/* Append to current skb if one exists. */
@@ -653,6 +658,34 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
return 0;
}
+ xprog = READ_ONCE(priv->xdp_prog);
+ if (xprog) {
+ struct xdp_buff xdp;
+ void *old_data;
+ int xdp_act;
+
+ xdp_init_buff(&xdp, buf_state->page_info.buf_size,
+ &rx->xdp_rxq);
+ xdp_prepare_buff(&xdp,
+ buf_state->page_info.page_address +
+ buf_state->page_info.page_offset,
+ buf_state->page_info.pad,
+ buf_len, false);
+ old_data = xdp.data;
+ xdp_act = bpf_prog_run_xdp(xprog, &xdp);
+ buf_state->page_info.pad += xdp.data - old_data;
+ buf_len = xdp.data_end - xdp.data;
+ if (xdp_act != XDP_PASS) {
+ gve_xdp_done_dqo(priv, rx, &xdp, xprog, xdp_act,
+ buf_state);
+ return 0;
+ }
+
+ u64_stats_update_begin(&rx->statss);
+ rx->xdp_actions[XDP_PASS]++;
+ u64_stats_update_end(&rx->statss);
+ }
+
if (eop && buf_len <= priv->rx_copybreak) {
rx->ctx.skb_head = gve_rx_copy(priv->dev, napi,
&buf_state->page_info, buf_len);
@@ -665,8 +698,7 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
rx->rx_copybreak_pkt++;
u64_stats_update_end(&rx->statss);
- gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states,
- buf_state);
+ gve_free_buffer(rx, buf_state);
return 0;
}
@@ -681,16 +713,15 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
return 0;
}
- skb_add_rx_frag(rx->ctx.skb_head, 0, buf_state->page_info.page,
- buf_state->page_info.page_offset, buf_len,
- priv->data_buffer_size_dqo);
- gve_dec_pagecnt_bias(&buf_state->page_info);
+ if (rx->dqo.page_pool)
+ skb_mark_for_recycle(rx->ctx.skb_head);
- gve_try_recycle_buf(priv, rx, buf_state);
+ gve_skb_add_rx_frag(rx, buf_state, 0, buf_len);
+ gve_reuse_buffer(rx, buf_state);
return 0;
error:
- gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state);
+ gve_free_buffer(rx, buf_state);
return -ENOMEM;
}
@@ -781,9 +812,9 @@ int gve_rx_poll_dqo(struct gve_notify_block *block, int budget)
/* Do not read data until we own the descriptor */
dma_rmb();
- err = gve_rx_dqo(napi, rx, compl_desc, rx->q_num);
+ err = gve_rx_dqo(napi, rx, compl_desc, complq->head, rx->q_num);
if (err < 0) {
- gve_rx_free_skb(rx);
+ gve_rx_free_skb(napi, rx);
u64_stats_update_begin(&rx->statss);
if (err == -ENOMEM)
rx->rx_skb_alloc_fail++;
@@ -826,7 +857,7 @@ int gve_rx_poll_dqo(struct gve_notify_block *block, int budget)
/* gve_rx_complete_skb() will consume skb if successful */
if (gve_rx_complete_skb(rx, napi, compl_desc, feat) != 0) {
- gve_rx_free_skb(rx);
+ gve_rx_free_skb(napi, rx);
u64_stats_update_begin(&rx->statss);
rx->rx_desc_err_dropped_pkt++;
u64_stats_update_end(&rx->statss);
diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c
index 07ba124780df..1b40bf0c811a 100644
--- a/drivers/net/ethernet/google/gve/gve_tx.c
+++ b/drivers/net/ethernet/google/gve/gve_tx.c
@@ -158,15 +158,16 @@ static int gve_clean_xdp_done(struct gve_priv *priv, struct gve_tx_ring *tx,
u32 to_do)
{
struct gve_tx_buffer_state *info;
- u32 clean_end = tx->done + to_do;
u64 pkts = 0, bytes = 0;
size_t space_freed = 0;
u32 xsk_complete = 0;
u32 idx;
+ int i;
- for (; tx->done < clean_end; tx->done++) {
+ for (i = 0; i < to_do; i++) {
idx = tx->done & tx->mask;
info = &tx->info[idx];
+ tx->done++;
if (unlikely(!info->xdp.size))
continue;
@@ -196,29 +197,43 @@ static int gve_clean_xdp_done(struct gve_priv *priv, struct gve_tx_ring *tx,
static int gve_clean_tx_done(struct gve_priv *priv, struct gve_tx_ring *tx,
u32 to_do, bool try_to_wake);
-static void gve_tx_free_ring(struct gve_priv *priv, int idx)
+void gve_tx_stop_ring_gqi(struct gve_priv *priv, int idx)
{
+ int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
struct gve_tx_ring *tx = &priv->tx[idx];
+
+ if (!gve_tx_was_added_to_block(priv, idx))
+ return;
+
+ gve_remove_napi(priv, ntfy_idx);
+ if (tx->q_num < priv->tx_cfg.num_queues)
+ gve_clean_tx_done(priv, tx, priv->tx_desc_cnt, false);
+ else
+ gve_clean_xdp_done(priv, tx, priv->tx_desc_cnt);
+ netdev_tx_reset_queue(tx->netdev_txq);
+ gve_tx_remove_from_block(priv, idx);
+}
+
+static void gve_tx_free_ring_gqi(struct gve_priv *priv, struct gve_tx_ring *tx,
+ struct gve_tx_alloc_rings_cfg *cfg)
+{
struct device *hdev = &priv->pdev->dev;
+ int idx = tx->q_num;
size_t bytes;
+ u32 qpl_id;
u32 slots;
- gve_tx_remove_from_block(priv, idx);
slots = tx->mask + 1;
- if (tx->q_num < priv->tx_cfg.num_queues) {
- gve_clean_tx_done(priv, tx, priv->tx_desc_cnt, false);
- netdev_tx_reset_queue(tx->netdev_txq);
- } else {
- gve_clean_xdp_done(priv, tx, priv->tx_desc_cnt);
- }
-
dma_free_coherent(hdev, sizeof(*tx->q_resources),
tx->q_resources, tx->q_resources_bus);
tx->q_resources = NULL;
- if (!tx->raw_addressing) {
- gve_tx_fifo_release(priv, &tx->tx_fifo);
- gve_unassign_qpl(priv, tx->tx_fifo.qpl->id);
+ if (tx->tx_fifo.qpl) {
+ if (tx->tx_fifo.base)
+ gve_tx_fifo_release(priv, &tx->tx_fifo);
+
+ qpl_id = gve_tx_qpl_id(priv, tx->q_num);
+ gve_free_queue_page_list(priv, tx->tx_fifo.qpl, qpl_id);
tx->tx_fifo.qpl = NULL;
}
@@ -232,11 +247,25 @@ static void gve_tx_free_ring(struct gve_priv *priv, int idx)
netif_dbg(priv, drv, priv->dev, "freed tx queue %d\n", idx);
}
-static int gve_tx_alloc_ring(struct gve_priv *priv, int idx)
+void gve_tx_start_ring_gqi(struct gve_priv *priv, int idx)
{
+ int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
struct gve_tx_ring *tx = &priv->tx[idx];
+
+ gve_tx_add_to_block(priv, idx);
+
+ tx->netdev_txq = netdev_get_tx_queue(priv->dev, idx);
+ gve_add_napi(priv, ntfy_idx, gve_napi_poll);
+}
+
+static int gve_tx_alloc_ring_gqi(struct gve_priv *priv,
+ struct gve_tx_alloc_rings_cfg *cfg,
+ struct gve_tx_ring *tx,
+ int idx)
+{
struct device *hdev = &priv->pdev->dev;
- u32 slots = priv->tx_desc_cnt;
+ int qpl_page_cnt;
+ u32 qpl_id = 0;
size_t bytes;
/* Make sure everything is zeroed to start */
@@ -245,25 +274,30 @@ static int gve_tx_alloc_ring(struct gve_priv *priv, int idx)
spin_lock_init(&tx->xdp_lock);
tx->q_num = idx;
- tx->mask = slots - 1;
+ tx->mask = cfg->ring_size - 1;
/* alloc metadata */
- tx->info = vcalloc(slots, sizeof(*tx->info));
+ tx->info = vcalloc(cfg->ring_size, sizeof(*tx->info));
if (!tx->info)
return -ENOMEM;
/* alloc tx queue */
- bytes = sizeof(*tx->desc) * slots;
+ bytes = sizeof(*tx->desc) * cfg->ring_size;
tx->desc = dma_alloc_coherent(hdev, bytes, &tx->bus, GFP_KERNEL);
if (!tx->desc)
goto abort_with_info;
- tx->raw_addressing = priv->queue_format == GVE_GQI_RDA_FORMAT;
- tx->dev = &priv->pdev->dev;
+ tx->raw_addressing = cfg->raw_addressing;
+ tx->dev = hdev;
if (!tx->raw_addressing) {
- tx->tx_fifo.qpl = gve_assign_tx_qpl(priv, idx);
+ qpl_id = gve_tx_qpl_id(priv, tx->q_num);
+ qpl_page_cnt = priv->tx_pages_per_qpl;
+
+ tx->tx_fifo.qpl = gve_alloc_queue_page_list(priv, qpl_id,
+ qpl_page_cnt);
if (!tx->tx_fifo.qpl)
goto abort_with_desc;
+
/* map Tx FIFO */
if (gve_tx_fifo_init(priv, &tx->tx_fifo))
goto abort_with_qpl;
@@ -277,20 +311,16 @@ static int gve_tx_alloc_ring(struct gve_priv *priv, int idx)
if (!tx->q_resources)
goto abort_with_fifo;
- netif_dbg(priv, drv, priv->dev, "tx[%d]->bus=%lx\n", idx,
- (unsigned long)tx->bus);
- if (idx < priv->tx_cfg.num_queues)
- tx->netdev_txq = netdev_get_tx_queue(priv->dev, idx);
- gve_tx_add_to_block(priv, idx);
-
return 0;
abort_with_fifo:
if (!tx->raw_addressing)
gve_tx_fifo_release(priv, &tx->tx_fifo);
abort_with_qpl:
- if (!tx->raw_addressing)
- gve_unassign_qpl(priv, tx->tx_fifo.qpl->id);
+ if (!tx->raw_addressing) {
+ gve_free_queue_page_list(priv, tx->tx_fifo.qpl, qpl_id);
+ tx->tx_fifo.qpl = NULL;
+ }
abort_with_desc:
dma_free_coherent(hdev, bytes, tx->desc, tx->bus);
tx->desc = NULL;
@@ -300,36 +330,60 @@ abort_with_info:
return -ENOMEM;
}
-int gve_tx_alloc_rings(struct gve_priv *priv, int start_id, int num_rings)
+int gve_tx_alloc_rings_gqi(struct gve_priv *priv,
+ struct gve_tx_alloc_rings_cfg *cfg)
{
+ struct gve_tx_ring *tx = cfg->tx;
+ int total_queues;
int err = 0;
- int i;
+ int i, j;
+
+ total_queues = cfg->qcfg->num_queues + cfg->num_xdp_rings;
+ if (total_queues > cfg->qcfg->max_queues) {
+ netif_err(priv, drv, priv->dev,
+ "Cannot alloc more than the max num of Tx rings\n");
+ return -EINVAL;
+ }
+
+ tx = kvcalloc(cfg->qcfg->max_queues, sizeof(struct gve_tx_ring),
+ GFP_KERNEL);
+ if (!tx)
+ return -ENOMEM;
- for (i = start_id; i < start_id + num_rings; i++) {
- err = gve_tx_alloc_ring(priv, i);
+ for (i = 0; i < total_queues; i++) {
+ err = gve_tx_alloc_ring_gqi(priv, cfg, &tx[i], i);
if (err) {
netif_err(priv, drv, priv->dev,
"Failed to alloc tx ring=%d: err=%d\n",
i, err);
- break;
+ goto cleanup;
}
}
- /* Unallocate if there was an error */
- if (err) {
- int j;
- for (j = start_id; j < i; j++)
- gve_tx_free_ring(priv, j);
- }
+ cfg->tx = tx;
+ return 0;
+
+cleanup:
+ for (j = 0; j < i; j++)
+ gve_tx_free_ring_gqi(priv, &tx[j], cfg);
+ kvfree(tx);
return err;
}
-void gve_tx_free_rings_gqi(struct gve_priv *priv, int start_id, int num_rings)
+void gve_tx_free_rings_gqi(struct gve_priv *priv,
+ struct gve_tx_alloc_rings_cfg *cfg)
{
+ struct gve_tx_ring *tx = cfg->tx;
int i;
- for (i = start_id; i < start_id + num_rings; i++)
- gve_tx_free_ring(priv, i);
+ if (!tx)
+ return;
+
+ for (i = 0; i < cfg->qcfg->num_queues + cfg->qcfg->num_xdp_queues; i++)
+ gve_tx_free_ring_gqi(priv, &tx[i], cfg);
+
+ kvfree(tx);
+ cfg->tx = NULL;
}
/* gve_tx_avail - Calculates the number of slots available in the ring
@@ -776,11 +830,14 @@ int gve_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
struct gve_tx_ring *tx;
int i, err = 0, qid;
- if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+ if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK) || !priv->xdp_prog)
return -EINVAL;
+ if (!gve_get_napi_enabled(priv))
+ return -ENETDOWN;
+
qid = gve_xdp_tx_queue_id(priv,
- smp_processor_id() % priv->num_xdp_queues);
+ smp_processor_id() % priv->tx_cfg.num_xdp_queues);
tx = &priv->tx[qid];
@@ -895,14 +952,10 @@ static int gve_xsk_tx(struct gve_priv *priv, struct gve_tx_ring *tx,
spin_lock(&tx->xdp_lock);
while (sent < budget) {
- if (!gve_can_tx(tx, GVE_TX_START_THRESH))
+ if (!gve_can_tx(tx, GVE_TX_START_THRESH) ||
+ !xsk_tx_peek_desc(tx->xsk_pool, &desc))
goto out;
- if (!xsk_tx_peek_desc(tx->xsk_pool, &desc)) {
- tx->xdp_xsk_done = tx->xdp_xsk_wakeup;
- goto out;
- }
-
data = xsk_buff_raw_get_data(tx->xsk_pool, desc.addr);
nsegs = gve_tx_fill_xdp(priv, tx, data, desc.len, NULL, true);
tx->req += nsegs;
@@ -917,33 +970,41 @@ out:
return sent;
}
+int gve_xsk_tx_poll(struct gve_notify_block *rx_block, int budget)
+{
+ struct gve_rx_ring *rx = rx_block->rx;
+ struct gve_priv *priv = rx->gve;
+ struct gve_tx_ring *tx;
+ int sent = 0;
+
+ tx = &priv->tx[gve_xdp_tx_queue_id(priv, rx->q_num)];
+ if (tx->xsk_pool) {
+ sent = gve_xsk_tx(priv, tx, budget);
+
+ u64_stats_update_begin(&tx->statss);
+ tx->xdp_xsk_sent += sent;
+ u64_stats_update_end(&tx->statss);
+ if (xsk_uses_need_wakeup(tx->xsk_pool))
+ xsk_set_tx_need_wakeup(tx->xsk_pool);
+ }
+
+ return sent;
+}
+
bool gve_xdp_poll(struct gve_notify_block *block, int budget)
{
struct gve_priv *priv = block->priv;
struct gve_tx_ring *tx = block->tx;
u32 nic_done;
- bool repoll;
u32 to_do;
/* Find out how much work there is to be done */
nic_done = gve_tx_load_event_counter(priv, tx);
to_do = min_t(u32, (nic_done - tx->done), budget);
gve_clean_xdp_done(priv, tx, to_do);
- repoll = nic_done != tx->done;
-
- if (tx->xsk_pool) {
- int sent = gve_xsk_tx(priv, tx, budget);
-
- u64_stats_update_begin(&tx->statss);
- tx->xdp_xsk_sent += sent;
- u64_stats_update_end(&tx->statss);
- repoll |= (sent == budget);
- if (xsk_uses_need_wakeup(tx->xsk_pool))
- xsk_set_tx_need_wakeup(tx->xsk_pool);
- }
/* If we still have work we want to repoll */
- return repoll;
+ return nic_done != tx->done;
}
bool gve_tx_poll(struct gve_notify_block *block, int budget)
diff --git a/drivers/net/ethernet/google/gve/gve_tx_dqo.c b/drivers/net/ethernet/google/gve/gve_tx_dqo.c
index f59c4710f118..a27f1574a733 100644
--- a/drivers/net/ethernet/google/gve/gve_tx_dqo.c
+++ b/drivers/net/ethernet/google/gve/gve_tx_dqo.c
@@ -188,13 +188,28 @@ static void gve_tx_clean_pending_packets(struct gve_tx_ring *tx)
}
}
-static void gve_tx_free_ring_dqo(struct gve_priv *priv, int idx)
+void gve_tx_stop_ring_dqo(struct gve_priv *priv, int idx)
{
+ int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
struct gve_tx_ring *tx = &priv->tx[idx];
- struct device *hdev = &priv->pdev->dev;
- size_t bytes;
+ if (!gve_tx_was_added_to_block(priv, idx))
+ return;
+
+ gve_remove_napi(priv, ntfy_idx);
+ gve_clean_tx_done_dqo(priv, tx, /*napi=*/NULL);
+ netdev_tx_reset_queue(tx->netdev_txq);
+ gve_tx_clean_pending_packets(tx);
gve_tx_remove_from_block(priv, idx);
+}
+
+static void gve_tx_free_ring_dqo(struct gve_priv *priv, struct gve_tx_ring *tx,
+ struct gve_tx_alloc_rings_cfg *cfg)
+{
+ struct device *hdev = &priv->pdev->dev;
+ int idx = tx->q_num;
+ size_t bytes;
+ u32 qpl_id;
if (tx->q_resources) {
dma_free_coherent(hdev, sizeof(*tx->q_resources),
@@ -223,7 +238,8 @@ static void gve_tx_free_ring_dqo(struct gve_priv *priv, int idx)
tx->dqo.tx_qpl_buf_next = NULL;
if (tx->dqo.qpl) {
- gve_unassign_qpl(priv, tx->dqo.qpl->id);
+ qpl_id = gve_tx_qpl_id(priv, tx->q_num);
+ gve_free_queue_page_list(priv, tx->dqo.qpl, qpl_id);
tx->dqo.qpl = NULL;
}
@@ -253,25 +269,37 @@ static int gve_tx_qpl_buf_init(struct gve_tx_ring *tx)
return 0;
}
-static int gve_tx_alloc_ring_dqo(struct gve_priv *priv, int idx)
+void gve_tx_start_ring_dqo(struct gve_priv *priv, int idx)
{
+ int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
struct gve_tx_ring *tx = &priv->tx[idx];
+
+ gve_tx_add_to_block(priv, idx);
+
+ tx->netdev_txq = netdev_get_tx_queue(priv->dev, idx);
+ gve_add_napi(priv, ntfy_idx, gve_napi_poll_dqo);
+}
+
+static int gve_tx_alloc_ring_dqo(struct gve_priv *priv,
+ struct gve_tx_alloc_rings_cfg *cfg,
+ struct gve_tx_ring *tx,
+ int idx)
+{
struct device *hdev = &priv->pdev->dev;
int num_pending_packets;
+ int qpl_page_cnt;
size_t bytes;
+ u32 qpl_id;
int i;
memset(tx, 0, sizeof(*tx));
tx->q_num = idx;
- tx->dev = &priv->pdev->dev;
- tx->netdev_txq = netdev_get_tx_queue(priv->dev, idx);
+ tx->dev = hdev;
atomic_set_release(&tx->dqo_compl.hw_tx_head, 0);
/* Queue sizes must be a power of 2 */
- tx->mask = priv->tx_desc_cnt - 1;
- tx->dqo.complq_mask = priv->queue_format == GVE_DQO_RDA_FORMAT ?
- priv->options_dqo_rda.tx_comp_ring_entries - 1 :
- tx->mask;
+ tx->mask = cfg->ring_size - 1;
+ tx->dqo.complq_mask = tx->mask;
/* The max number of pending packets determines the maximum number of
* descriptors which maybe written to the completion queue.
@@ -327,8 +355,12 @@ static int gve_tx_alloc_ring_dqo(struct gve_priv *priv, int idx)
if (!tx->q_resources)
goto err;
- if (gve_is_qpl(priv)) {
- tx->dqo.qpl = gve_assign_tx_qpl(priv, idx);
+ if (!cfg->raw_addressing) {
+ qpl_id = gve_tx_qpl_id(priv, tx->q_num);
+ qpl_page_cnt = priv->tx_pages_per_qpl;
+
+ tx->dqo.qpl = gve_alloc_queue_page_list(priv, qpl_id,
+ qpl_page_cnt);
if (!tx->dqo.qpl)
goto err;
@@ -336,22 +368,35 @@ static int gve_tx_alloc_ring_dqo(struct gve_priv *priv, int idx)
goto err;
}
- gve_tx_add_to_block(priv, idx);
-
return 0;
err:
- gve_tx_free_ring_dqo(priv, idx);
+ gve_tx_free_ring_dqo(priv, tx, cfg);
return -ENOMEM;
}
-int gve_tx_alloc_rings_dqo(struct gve_priv *priv)
+int gve_tx_alloc_rings_dqo(struct gve_priv *priv,
+ struct gve_tx_alloc_rings_cfg *cfg)
{
+ struct gve_tx_ring *tx = cfg->tx;
+ int total_queues;
int err = 0;
- int i;
+ int i, j;
+
+ total_queues = cfg->qcfg->num_queues + cfg->num_xdp_rings;
+ if (total_queues > cfg->qcfg->max_queues) {
+ netif_err(priv, drv, priv->dev,
+ "Cannot alloc more than the max num of Tx rings\n");
+ return -EINVAL;
+ }
+
+ tx = kvcalloc(cfg->qcfg->max_queues, sizeof(struct gve_tx_ring),
+ GFP_KERNEL);
+ if (!tx)
+ return -ENOMEM;
- for (i = 0; i < priv->tx_cfg.num_queues; i++) {
- err = gve_tx_alloc_ring_dqo(priv, i);
+ for (i = 0; i < total_queues; i++) {
+ err = gve_tx_alloc_ring_dqo(priv, cfg, &tx[i], i);
if (err) {
netif_err(priv, drv, priv->dev,
"Failed to alloc tx ring=%d: err=%d\n",
@@ -360,28 +405,30 @@ int gve_tx_alloc_rings_dqo(struct gve_priv *priv)
}
}
+ cfg->tx = tx;
return 0;
err:
- for (i--; i >= 0; i--)
- gve_tx_free_ring_dqo(priv, i);
-
+ for (j = 0; j < i; j++)
+ gve_tx_free_ring_dqo(priv, &tx[j], cfg);
+ kvfree(tx);
return err;
}
-void gve_tx_free_rings_dqo(struct gve_priv *priv)
+void gve_tx_free_rings_dqo(struct gve_priv *priv,
+ struct gve_tx_alloc_rings_cfg *cfg)
{
+ struct gve_tx_ring *tx = cfg->tx;
int i;
- for (i = 0; i < priv->tx_cfg.num_queues; i++) {
- struct gve_tx_ring *tx = &priv->tx[i];
+ if (!tx)
+ return;
- gve_clean_tx_done_dqo(priv, tx, /*napi=*/NULL);
- netdev_tx_reset_queue(tx->netdev_txq);
- gve_tx_clean_pending_packets(tx);
+ for (i = 0; i < cfg->qcfg->num_queues + cfg->qcfg->num_xdp_queues; i++)
+ gve_tx_free_ring_dqo(priv, &tx[i], cfg);
- gve_tx_free_ring_dqo(priv, i);
- }
+ kvfree(tx);
+ cfg->tx = NULL;
}
/* Returns the number of slots available in the ring */
@@ -501,28 +548,18 @@ static int gve_prep_tso(struct sk_buff *skb)
if (unlikely(skb_shinfo(skb)->gso_size < GVE_TX_MIN_TSO_MSS_DQO))
return -1;
+ if (!(skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
+ return -EINVAL;
+
/* Needed because we will modify header. */
err = skb_cow_head(skb, 0);
if (err < 0)
return err;
tcp = tcp_hdr(skb);
-
- /* Remove payload length from checksum. */
paylen = skb->len - skb_transport_offset(skb);
-
- switch (skb_shinfo(skb)->gso_type) {
- case SKB_GSO_TCPV4:
- case SKB_GSO_TCPV6:
- csum_replace_by_diff(&tcp->check,
- (__force __wsum)htonl(paylen));
-
- /* Compute length of segmentation header. */
- header_len = skb_tcp_all_headers(skb);
- break;
- default:
- return -EINVAL;
- }
+ csum_replace_by_diff(&tcp->check, (__force __wsum)htonl(paylen));
+ header_len = skb_tcp_all_headers(skb);
if (unlikely(header_len > GVE_TX_MAX_HDR_SIZE_DQO))
return -EINVAL;
@@ -623,7 +660,8 @@ static int gve_tx_add_skb_no_copy_dqo(struct gve_tx_ring *tx,
goto err;
dma_unmap_len_set(pkt, len[pkt->num_bufs], len);
- dma_unmap_addr_set(pkt, dma[pkt->num_bufs], addr);
+ netmem_dma_unmap_addr_set(skb_frag_netmem(frag), pkt,
+ dma[pkt->num_bufs], addr);
++pkt->num_bufs;
gve_tx_fill_pkt_desc_dqo(tx, desc_idx, skb, len, addr,
@@ -822,22 +860,42 @@ static bool gve_can_send_tso(const struct sk_buff *skb)
const int header_len = skb_tcp_all_headers(skb);
const int gso_size = shinfo->gso_size;
int cur_seg_num_bufs;
+ int prev_frag_size;
int cur_seg_size;
int i;
cur_seg_size = skb_headlen(skb) - header_len;
+ prev_frag_size = skb_headlen(skb);
cur_seg_num_bufs = cur_seg_size > 0;
for (i = 0; i < shinfo->nr_frags; i++) {
if (cur_seg_size >= gso_size) {
cur_seg_size %= gso_size;
cur_seg_num_bufs = cur_seg_size > 0;
+
+ if (prev_frag_size > GVE_TX_MAX_BUF_SIZE_DQO) {
+ int prev_frag_remain = prev_frag_size %
+ GVE_TX_MAX_BUF_SIZE_DQO;
+
+ /* If the last descriptor of the previous frag
+ * is less than cur_seg_size, the segment will
+ * span two descriptors in the previous frag.
+ * Since max gso size (9728) is less than
+ * GVE_TX_MAX_BUF_SIZE_DQO, it is impossible
+ * for the segment to span more than two
+ * descriptors.
+ */
+ if (prev_frag_remain &&
+ cur_seg_size > prev_frag_remain)
+ cur_seg_num_bufs++;
+ }
}
if (unlikely(++cur_seg_num_bufs > max_bufs_per_seg))
return false;
- cur_seg_size += skb_frag_size(&shinfo->frags[i]);
+ prev_frag_size = skb_frag_size(&shinfo->frags[i]);
+ cur_seg_size += prev_frag_size;
}
return true;
@@ -981,8 +1039,9 @@ static void gve_unmap_packet(struct device *dev,
dma_unmap_single(dev, dma_unmap_addr(pkt, dma[0]),
dma_unmap_len(pkt, len[0]), DMA_TO_DEVICE);
for (i = 1; i < pkt->num_bufs; i++) {
- dma_unmap_page(dev, dma_unmap_addr(pkt, dma[i]),
- dma_unmap_len(pkt, len[i]), DMA_TO_DEVICE);
+ netmem_dma_unmap_page_attrs(dev, dma_unmap_addr(pkt, dma[i]),
+ dma_unmap_len(pkt, len[i]),
+ DMA_TO_DEVICE, 0);
}
pkt->num_bufs = 0;
}
@@ -1082,8 +1141,7 @@ static void gve_handle_miss_completion(struct gve_priv *priv,
/* jiffies can wraparound but time comparisons can handle overflows. */
pending_packet->timeout_jiffies =
jiffies +
- msecs_to_jiffies(GVE_REINJECT_COMPL_TIMEOUT *
- MSEC_PER_SEC);
+ secs_to_jiffies(GVE_REINJECT_COMPL_TIMEOUT);
add_to_list(tx, &tx->dqo_compl.miss_completions, pending_packet);
*bytes += pending_packet->skb->len;
@@ -1127,8 +1185,7 @@ static void remove_miss_completions(struct gve_priv *priv,
pending_packet->state = GVE_PACKET_STATE_TIMED_OUT_COMPL;
pending_packet->timeout_jiffies =
jiffies +
- msecs_to_jiffies(GVE_DEALLOCATE_COMPL_TIMEOUT *
- MSEC_PER_SEC);
+ secs_to_jiffies(GVE_DEALLOCATE_COMPL_TIMEOUT);
/* Maintain pending packet in another list so the packet can be
* unallocated at a later time.
*/
diff --git a/drivers/net/ethernet/google/gve/gve_utils.c b/drivers/net/ethernet/google/gve/gve_utils.c
index 26e08d753270..ace9b8698021 100644
--- a/drivers/net/ethernet/google/gve/gve_utils.c
+++ b/drivers/net/ethernet/google/gve/gve_utils.c
@@ -8,6 +8,14 @@
#include "gve_adminq.h"
#include "gve_utils.h"
+bool gve_tx_was_added_to_block(struct gve_priv *priv, int queue_idx)
+{
+ struct gve_notify_block *block =
+ &priv->ntfy_blocks[gve_tx_idx_to_ntfy(priv, queue_idx)];
+
+ return block->tx != NULL;
+}
+
void gve_tx_remove_from_block(struct gve_priv *priv, int queue_idx)
{
struct gve_notify_block *block =
@@ -30,6 +38,14 @@ void gve_tx_add_to_block(struct gve_priv *priv, int queue_idx)
queue_idx);
}
+bool gve_rx_was_added_to_block(struct gve_priv *priv, int queue_idx)
+{
+ struct gve_notify_block *block =
+ &priv->ntfy_blocks[gve_rx_idx_to_ntfy(priv, queue_idx)];
+
+ return block->rx != NULL;
+}
+
void gve_rx_remove_from_block(struct gve_priv *priv, int queue_idx)
{
struct gve_notify_block *block =
@@ -48,11 +64,9 @@ void gve_rx_add_to_block(struct gve_priv *priv, int queue_idx)
rx->ntfy_id = ntfy_idx;
}
-struct sk_buff *gve_rx_copy(struct net_device *dev, struct napi_struct *napi,
- struct gve_rx_slot_page_info *page_info, u16 len)
+struct sk_buff *gve_rx_copy_data(struct net_device *dev, struct napi_struct *napi,
+ u8 *data, u16 len)
{
- void *va = page_info->page_address + page_info->page_offset +
- page_info->pad;
struct sk_buff *skb;
skb = napi_alloc_skb(napi, len);
@@ -60,12 +74,21 @@ struct sk_buff *gve_rx_copy(struct net_device *dev, struct napi_struct *napi,
return NULL;
__skb_put(skb, len);
- skb_copy_to_linear_data_offset(skb, 0, va, len);
+ skb_copy_to_linear_data_offset(skb, 0, data, len);
skb->protocol = eth_type_trans(skb, dev);
return skb;
}
+struct sk_buff *gve_rx_copy(struct net_device *dev, struct napi_struct *napi,
+ struct gve_rx_slot_page_info *page_info, u16 len)
+{
+ void *va = page_info->page_address + page_info->page_offset +
+ page_info->pad;
+
+ return gve_rx_copy_data(dev, napi, va, len);
+}
+
void gve_dec_pagecnt_bias(struct gve_rx_slot_page_info *page_info)
{
page_info->pagecnt_bias--;
@@ -81,3 +104,19 @@ void gve_dec_pagecnt_bias(struct gve_rx_slot_page_info *page_info)
page_ref_add(page_info->page, INT_MAX - pagecount);
}
}
+
+void gve_add_napi(struct gve_priv *priv, int ntfy_idx,
+ int (*gve_poll)(struct napi_struct *, int))
+{
+ struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
+
+ netif_napi_add_locked(priv->dev, &block->napi, gve_poll);
+ netif_napi_set_irq_locked(&block->napi, block->irq);
+}
+
+void gve_remove_napi(struct gve_priv *priv, int ntfy_idx)
+{
+ struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
+
+ netif_napi_del_locked(&block->napi);
+}
diff --git a/drivers/net/ethernet/google/gve/gve_utils.h b/drivers/net/ethernet/google/gve/gve_utils.h
index 324fd98a6112..bf2e9a0adb36 100644
--- a/drivers/net/ethernet/google/gve/gve_utils.h
+++ b/drivers/net/ethernet/google/gve/gve_utils.h
@@ -11,17 +11,25 @@
#include "gve.h"
+bool gve_tx_was_added_to_block(struct gve_priv *priv, int queue_idx);
void gve_tx_remove_from_block(struct gve_priv *priv, int queue_idx);
void gve_tx_add_to_block(struct gve_priv *priv, int queue_idx);
+bool gve_rx_was_added_to_block(struct gve_priv *priv, int queue_idx);
void gve_rx_remove_from_block(struct gve_priv *priv, int queue_idx);
void gve_rx_add_to_block(struct gve_priv *priv, int queue_idx);
+struct sk_buff *gve_rx_copy_data(struct net_device *dev, struct napi_struct *napi,
+ u8 *data, u16 len);
+
struct sk_buff *gve_rx_copy(struct net_device *dev, struct napi_struct *napi,
struct gve_rx_slot_page_info *page_info, u16 len);
/* Decrement pagecnt_bias. Set it back to INT_MAX if it reached zero. */
void gve_dec_pagecnt_bias(struct gve_rx_slot_page_info *page_info);
+void gve_add_napi(struct gve_priv *priv, int ntfy_idx,
+ int (*gve_poll)(struct napi_struct *, int));
+void gve_remove_napi(struct gve_priv *priv, int ntfy_idx);
#endif /* _GVE_UTILS_H */