summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/google/gve/gve.h
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/ethernet/google/gve/gve.h')
-rw-r--r--drivers/net/ethernet/google/gve/gve.h690
1 files changed, 595 insertions, 95 deletions
diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h
index 1d3188e8e3b3..970d5ca8cdde 100644
--- a/drivers/net/ethernet/google/gve/gve.h
+++ b/drivers/net/ethernet/google/gve/gve.h
@@ -1,16 +1,22 @@
/* SPDX-License-Identifier: (GPL-2.0 OR MIT)
* Google virtual Ethernet (gve) driver
*
- * Copyright (C) 2015-2021 Google, Inc.
+ * Copyright (C) 2015-2024 Google LLC
*/
#ifndef _GVE_H_
#define _GVE_H_
#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/ethtool_netlink.h>
#include <linux/netdevice.h>
+#include <linux/net_tstamp.h>
#include <linux/pci.h>
+#include <linux/ptp_clock_kernel.h>
#include <linux/u64_stats_sync.h>
+#include <net/page_pool/helpers.h>
+#include <net/xdp.h>
#include "gve_desc.h"
#include "gve_desc_dqo.h"
@@ -30,7 +36,7 @@
#define GVE_MIN_MSIX 3
/* Numbers of gve tx/rx stats in stats report. */
-#define GVE_TX_STATS_REPORT_NUM 5
+#define GVE_TX_STATS_REPORT_NUM 6
#define GVE_RX_STATS_REPORT_NUM 2
/* Interval to schedule a stats report update, 20000ms. */
@@ -40,12 +46,59 @@
#define NIC_TX_STATS_REPORT_NUM 0
#define NIC_RX_STATS_REPORT_NUM 4
+#define GVE_ADMINQ_BUFFER_SIZE 4096
+
#define GVE_DATA_SLOT_ADDR_PAGE_MASK (~(PAGE_SIZE - 1))
/* PTYPEs are always 10 bits. */
#define GVE_NUM_PTYPES 1024
-#define GVE_RX_BUFFER_SIZE_DQO 2048
+/* Default minimum ring size */
+#define GVE_DEFAULT_MIN_TX_RING_SIZE 256
+#define GVE_DEFAULT_MIN_RX_RING_SIZE 512
+
+#define GVE_DEFAULT_RX_BUFFER_SIZE 2048
+
+#define GVE_XDP_RX_BUFFER_SIZE_DQO 4096
+
+#define GVE_DEFAULT_RX_BUFFER_OFFSET 2048
+
+#define GVE_PAGE_POOL_SIZE_MULTIPLIER 4
+
+#define GVE_FLOW_RULES_CACHE_SIZE \
+ (GVE_ADMINQ_BUFFER_SIZE / sizeof(struct gve_adminq_queried_flow_rule))
+#define GVE_FLOW_RULE_IDS_CACHE_SIZE \
+ (GVE_ADMINQ_BUFFER_SIZE / sizeof(((struct gve_adminq_queried_flow_rule *)0)->location))
+
+#define GVE_RSS_KEY_SIZE 40
+#define GVE_RSS_INDIR_SIZE 128
+
+#define GVE_XDP_ACTIONS 5
+
+#define GVE_GQ_TX_MIN_PKT_DESC_BYTES 182
+
+#define GVE_DEFAULT_HEADER_BUFFER_SIZE 128
+
+#define DQO_QPL_DEFAULT_TX_PAGES 512
+
+/* Maximum TSO size supported on DQO */
+#define GVE_DQO_TX_MAX 0x3FFFF
+
+#define GVE_TX_BUF_SHIFT_DQO 11
+
+/* 2K buffers for DQO-QPL */
+#define GVE_TX_BUF_SIZE_DQO BIT(GVE_TX_BUF_SHIFT_DQO)
+#define GVE_TX_BUFS_PER_PAGE_DQO (PAGE_SIZE >> GVE_TX_BUF_SHIFT_DQO)
+#define GVE_MAX_TX_BUFS_PER_PKT (DIV_ROUND_UP(GVE_DQO_TX_MAX, GVE_TX_BUF_SIZE_DQO))
+
+/* If number of free/recyclable buffers are less than this threshold; driver
+ * allocs and uses a non-qpl page on the receive path of DQO QPL to free
+ * up buffers.
+ * Value is set big enough to post at least 3 64K LRO packet via 2K buffer to NIC.
+ */
+#define GVE_DQO_QPL_ONDEMAND_ALLOC_THRESHOLD 96
+
+#define GVE_DQO_RX_HWTSTAMP_VALID 0x1
/* Each slot in the desc ring has a 1:1 mapping to a slot in the data ring */
struct gve_rx_desc_queue {
@@ -56,11 +109,19 @@ struct gve_rx_desc_queue {
/* The page info for a single slot in the RX data queue */
struct gve_rx_slot_page_info {
- struct page *page;
+ /* netmem is used for DQO RDA mode
+ * page is used in all other modes
+ */
+ union {
+ struct page *page;
+ netmem_ref netmem;
+ };
void *page_address;
u32 page_offset; /* offset to write to in page */
+ unsigned int buf_size;
int pagecnt_bias; /* expected pagecnt if only the driver has a ref */
- u8 can_flip;
+ u16 pad; /* adjustment for rx padding */
+ u8 can_flip; /* tracks if the networking stack is using the page */
};
/* A list of pages registered with the device during setup and used by a queue
@@ -119,11 +180,19 @@ struct gve_rx_compl_queue_dqo {
u32 mask; /* Mask for indices to the size of the ring */
};
+struct gve_header_buf {
+ u8 *data;
+ dma_addr_t addr;
+};
+
/* Stores state for tracking buffers posted to HW */
struct gve_rx_buf_state_dqo {
/* The page posted to HW. */
struct gve_rx_slot_page_info page_info;
+ /* XSK buffer */
+ struct xdp_buff *xsk_buff;
+
/* The DMA address corresponding to `page_info`. */
dma_addr_t addr;
@@ -136,15 +205,47 @@ struct gve_rx_buf_state_dqo {
s16 next;
};
+/* Wrapper for XDP Rx metadata */
+struct gve_xdp_buff {
+ struct xdp_buff xdp;
+ struct gve_priv *gve;
+ const struct gve_rx_compl_desc_dqo *compl_desc;
+};
+
/* `head` and `tail` are indices into an array, or -1 if empty. */
struct gve_index_list {
s16 head;
s16 tail;
};
+/* A single received packet split across multiple buffers may be
+ * reconstructed using the information in this structure.
+ */
+struct gve_rx_ctx {
+ /* head and tail of skb chain for the current packet or NULL if none */
+ struct sk_buff *skb_head;
+ struct sk_buff *skb_tail;
+ u32 total_size;
+ u8 frag_cnt;
+ bool drop_pkt;
+};
+
+struct gve_rx_cnts {
+ u32 ok_pkt_bytes;
+ u16 ok_pkt_cnt;
+ u16 total_pkt_cnt;
+ u16 cont_pkt_cnt;
+ u16 desc_err_pkt_cnt;
+};
+
/* Contains datapath state used to represent an RX queue. */
struct gve_rx_ring {
struct gve_priv *gve;
+
+ u16 packet_buffer_size; /* Size of buffer posted to NIC */
+ u16 packet_buffer_truesize; /* Total size of RX buffer */
+ u16 rx_headroom;
+
union {
/* GQI fields */
struct {
@@ -153,6 +254,10 @@ struct gve_rx_ring {
/* threshold for posting new buffs and descs */
u32 db_threshold;
+
+ u32 qpl_copy_pool_mask;
+ u32 qpl_copy_pool_head;
+ struct gve_rx_slot_page_info *qpl_copy_pool;
};
/* DQO fields. */
@@ -187,33 +292,63 @@ struct gve_rx_ring {
* which cannot be reused yet.
*/
struct gve_index_list used_buf_states;
+
+ /* qpl assigned to this queue */
+ struct gve_queue_page_list *qpl;
+
+ /* index into queue page list */
+ u32 next_qpl_page_idx;
+
+ /* track number of used buffers */
+ u16 used_buf_states_cnt;
+
+ /* Address info of the buffers for header-split */
+ struct gve_header_buf hdr_bufs;
+
+ struct page_pool *page_pool;
} dqo;
};
u64 rbytes; /* free-running bytes received */
+ u64 rx_hsplit_bytes; /* free-running header bytes received */
u64 rpackets; /* free-running packets received */
u32 cnt; /* free-running total number of completed packets */
u32 fill_cnt; /* free-running total number of descs and buffs posted */
u32 mask; /* masks the cnt and fill_cnt to the size of the ring */
+ u64 rx_hsplit_pkt; /* free-running packets with headers split */
u64 rx_copybreak_pkt; /* free-running count of copybreak packets */
u64 rx_copied_pkt; /* free-running total number of copied packets */
u64 rx_skb_alloc_fail; /* free-running count of skb alloc fails */
u64 rx_buf_alloc_fail; /* free-running count of buffer alloc fails */
u64 rx_desc_err_dropped_pkt; /* free-running count of packets dropped by descriptor error */
+ /* free-running count of unsplit packets due to header buffer overflow or hdr_len is 0 */
+ u64 rx_hsplit_unsplit_pkt;
+ u64 rx_cont_packet_cnt; /* free-running multi-fragment packets received */
+ u64 rx_frag_flip_cnt; /* free-running count of rx segments where page_flip was used */
+ u64 rx_frag_copy_cnt; /* free-running count of rx segments copied */
+ u64 rx_frag_alloc_cnt; /* free-running count of rx page allocations */
+ u64 xdp_tx_errors;
+ u64 xdp_redirect_errors;
+ u64 xdp_alloc_fails;
+ u64 xdp_actions[GVE_XDP_ACTIONS];
u32 q_num; /* queue index */
u32 ntfy_id; /* notification block index */
struct gve_queue_resources *q_resources; /* head and tail pointer idx */
dma_addr_t q_resources_bus; /* dma address for the queue resources */
struct u64_stats_sync statss; /* sync stats for 32bit archs */
- /* head and tail of skb chain for the current packet or NULL if none */
- struct sk_buff *skb_head;
- struct sk_buff *skb_tail;
+ struct gve_rx_ctx ctx; /* Info for packet currently being processed in this ring. */
+
+ /* XDP stuff */
+ struct xdp_rxq_info xdp_rxq;
+ struct xsk_buff_pool *xsk_pool;
+ struct page_frag_cache page_cache; /* Page cache to allocate XDP frames */
};
/* A TX desc ring entry */
union gve_tx_desc {
struct gve_tx_pkt_desc pkt; /* first desc for a packet */
+ struct gve_tx_mtd_desc mtd; /* optional metadata descriptor */
struct gve_tx_seg_desc seg; /* subsequent descs for a packet */
};
@@ -224,19 +359,24 @@ struct gve_tx_iovec {
u32 iov_padding; /* padding associated with this segment */
};
-struct gve_tx_dma_buf {
- DEFINE_DMA_UNMAP_ADDR(dma);
- DEFINE_DMA_UNMAP_LEN(len);
-};
-
/* Tracks the memory in the fifo occupied by the skb. Mapped 1:1 to a desc
* ring entry but only used for a pkt_desc not a seg_desc
*/
struct gve_tx_buffer_state {
- struct sk_buff *skb; /* skb for this pkt */
+ union {
+ struct sk_buff *skb; /* skb for this pkt */
+ struct xdp_frame *xdp_frame; /* xdp_frame */
+ };
+ struct {
+ u16 size; /* size of xmitted xdp pkt */
+ u8 is_xsk; /* xsk buff */
+ } xdp;
union {
struct gve_tx_iovec iov[GVE_TX_MAX_IOVEC]; /* segments of this pkt */
- struct gve_tx_dma_buf buf;
+ struct {
+ DEFINE_DMA_UNMAP_ADDR(dma);
+ DEFINE_DMA_UNMAP_LEN(len);
+ };
};
};
@@ -269,10 +409,24 @@ enum gve_packet_state {
GVE_PACKET_STATE_PENDING_REINJECT_COMPL,
/* No valid completion received within the specified timeout. */
GVE_PACKET_STATE_TIMED_OUT_COMPL,
+ /* XSK pending packet has received a packet/reinjection completion, or
+ * has timed out. At this point, the pending packet can be counted by
+ * xsk_tx_complete and freed.
+ */
+ GVE_PACKET_STATE_XSK_COMPLETE,
+};
+
+enum gve_tx_pending_packet_dqo_type {
+ GVE_TX_PENDING_PACKET_DQO_SKB,
+ GVE_TX_PENDING_PACKET_DQO_XDP_FRAME,
+ GVE_TX_PENDING_PACKET_DQO_XSK,
};
struct gve_tx_pending_packet_dqo {
- struct sk_buff *skb; /* skb for this packet */
+ union {
+ struct sk_buff *skb;
+ struct xdp_frame *xdpf;
+ };
/* 0th element corresponds to the linear portion of `skb`, should be
* unmapped with `dma_unmap_single`.
@@ -280,7 +434,14 @@ struct gve_tx_pending_packet_dqo {
* All others correspond to `skb`'s frags and should be unmapped with
* `dma_unmap_page`.
*/
- struct gve_tx_dma_buf bufs[MAX_SKB_FRAGS + 1];
+ union {
+ struct {
+ DEFINE_DMA_UNMAP_ADDR(dma[MAX_SKB_FRAGS + 1]);
+ DEFINE_DMA_UNMAP_LEN(len[MAX_SKB_FRAGS + 1]);
+ };
+ s16 tx_qpl_buf_ids[GVE_MAX_TX_BUFS_PER_PKT];
+ };
+
u16 num_bufs;
/* Linked list index to next element in the list, or -1 if none */
@@ -295,7 +456,10 @@ struct gve_tx_pending_packet_dqo {
/* Identifies the current state of the packet as defined in
* `enum gve_packet_state`.
*/
- u8 state;
+ u8 state : 3;
+
+ /* gve_tx_pending_packet_dqo_type */
+ u8 type : 2;
/* If packet is an outstanding miss completion, then the packet is
* freed if the corresponding re-injection completion is not received
@@ -317,6 +481,9 @@ struct gve_tx_ring {
/* DQO fields. */
struct {
+ /* Spinlock for XDP tx traffic */
+ spinlock_t xdp_lock;
+
/* Linked list of gve_tx_pending_packet_dqo. Index into
* pending_packets, or -1 if empty.
*
@@ -335,6 +502,34 @@ struct gve_tx_ring {
* set.
*/
u32 last_re_idx;
+
+ /* free running number of packet buf descriptors posted */
+ u16 posted_packet_desc_cnt;
+ /* free running number of packet buf descriptors completed */
+ u16 completed_packet_desc_cnt;
+
+ /* QPL fields */
+ struct {
+ /* Linked list of gve_tx_buf_dqo. Index into
+ * tx_qpl_buf_next, or -1 if empty.
+ *
+ * This is a consumer list owned by the TX path. When it
+ * runs out, the producer list is stolen from the
+ * completion handling path
+ * (dqo_compl.free_tx_qpl_buf_head).
+ */
+ s16 free_tx_qpl_buf_head;
+
+ /* Free running count of the number of QPL tx buffers
+ * allocated
+ */
+ u32 alloc_tx_qpl_buf_cnt;
+
+ /* Cached value of `dqo_compl.free_tx_qpl_buf_cnt` */
+ u32 free_tx_qpl_buf_cnt;
+ };
+
+ atomic_t xsk_reorder_queue_tail;
} dqo_tx;
};
@@ -342,8 +537,10 @@ struct gve_tx_ring {
union {
/* GQI fields */
struct {
- /* NIC tail pointer */
- __be32 last_nic_done;
+ /* Spinlock for when cleanup in progress */
+ spinlock_t clean_lock;
+ /* Spinlock for XDP tx traffic */
+ spinlock_t xdp_lock;
};
/* DQO fields. */
@@ -366,6 +563,9 @@ struct gve_tx_ring {
/* Last TX ring index fetched by HW */
atomic_t hw_tx_head;
+ u16 xsk_reorder_queue_head;
+ u16 xsk_reorder_queue_tail;
+
/* List to track pending packets which received a miss
* completion but not a corresponding reinjection.
*/
@@ -376,6 +576,24 @@ struct gve_tx_ring {
* reached a specified timeout.
*/
struct gve_index_list timed_out_completions;
+
+ /* QPL fields */
+ struct {
+ /* Linked list of gve_tx_buf_dqo. Index into
+ * tx_qpl_buf_next, or -1 if empty.
+ *
+ * This is the producer list, owned by the completion
+ * handling path. When the consumer list
+ * (dqo_tx.free_tx_qpl_buf_head) is runs out, this list
+ * will be stolen.
+ */
+ atomic_t free_tx_qpl_buf_head;
+
+ /* Free running count of the number of tx buffers
+ * freed
+ */
+ atomic_t free_tx_qpl_buf_cnt;
+ };
} dqo_compl;
} ____cacheline_aligned;
u64 pkt_done; /* free-running - total packets completed */
@@ -401,7 +619,24 @@ struct gve_tx_ring {
struct gve_tx_pending_packet_dqo *pending_packets;
s16 num_pending_packets;
+ u16 *xsk_reorder_queue;
+
u32 complq_mask; /* complq size is complq_mask + 1 */
+
+ /* QPL fields */
+ struct {
+ /* qpl assigned to this queue */
+ struct gve_queue_page_list *qpl;
+
+ /* Each QPL page is divided into TX bounce buffers
+ * of size GVE_TX_BUF_SIZE_DQO. tx_qpl_buf_next is
+ * an array to manage linked lists of TX buffers.
+ * An entry j at index i implies that j'th buffer
+ * is next on the list after i
+ */
+ s16 *tx_qpl_buf_next;
+ u32 num_tx_qpl_bufs;
+ };
} dqo;
} ____cacheline_aligned;
struct netdev_queue *netdev_txq;
@@ -414,29 +649,44 @@ struct gve_tx_ring {
u32 q_num ____cacheline_aligned; /* queue idx */
u32 stop_queue; /* count of queue stops */
u32 wake_queue; /* count of queue wakes */
+ u32 queue_timeout; /* count of queue timeouts */
u32 ntfy_id; /* notification block index */
+ u32 last_kick_msec; /* Last time the queue was kicked */
dma_addr_t bus; /* dma address of the descr ring */
dma_addr_t q_resources_bus; /* dma address of the queue resources */
dma_addr_t complq_bus_dqo; /* dma address of the dqo.compl_ring */
struct u64_stats_sync statss; /* sync stats for 32bit archs */
+ struct xsk_buff_pool *xsk_pool;
+ u64 xdp_xsk_sent;
+ u64 xdp_xmit;
+ u64 xdp_xmit_errors;
} ____cacheline_aligned;
/* Wraps the info for one irq including the napi struct and the queues
* associated with that irq.
*/
struct gve_notify_block {
- __be32 irq_db_index; /* idx into Bar2 - set by device, must be 1st */
+ __be32 *irq_db_index; /* pointer to idx into Bar2 */
char name[IFNAMSIZ + 16]; /* name registered with the kernel */
struct napi_struct napi; /* kernel napi struct for this block */
struct gve_priv *priv;
struct gve_tx_ring *tx; /* tx rings on this block */
struct gve_rx_ring *rx; /* rx rings on this block */
-} ____cacheline_aligned;
+ u32 irq;
+};
+
+/* Tracks allowed and current rx queue settings */
+struct gve_rx_queue_config {
+ u16 max_queues;
+ u16 num_queues;
+ u16 packet_buffer_size;
+};
-/* Tracks allowed and current queue settings */
-struct gve_queue_config {
+/* Tracks allowed and current tx queue settings */
+struct gve_tx_queue_config {
u16 max_queues;
- u16 num_queues; /* current */
+ u16 num_queues; /* number of TX queues, excluding XDP queues */
+ u16 num_xdp_queues;
};
/* Tracks the available and used qpl IDs */
@@ -445,10 +695,9 @@ struct gve_qpl_config {
unsigned long *qpl_id_map; /* bitmap of used qpl ids */
};
-struct gve_options_dqo_rda {
- u16 tx_comp_ring_entries; /* number of tx_comp descriptors */
- u16 rx_buff_ring_entries; /* number of rx_buff descriptors */
-};
+struct gve_irq_db {
+ __be32 index;
+} ____cacheline_aligned;
struct gve_ptype {
u8 l3_type; /* `gve_l3_type` in gve_adminq.h */
@@ -459,6 +708,36 @@ struct gve_ptype_lut {
struct gve_ptype ptypes[GVE_NUM_PTYPES];
};
+/* Parameters for allocating resources for tx queues */
+struct gve_tx_alloc_rings_cfg {
+ struct gve_tx_queue_config *qcfg;
+
+ u16 num_xdp_rings;
+
+ u16 ring_size;
+ bool raw_addressing;
+
+ /* Allocated resources are returned here */
+ struct gve_tx_ring *tx;
+};
+
+/* Parameters for allocating resources for rx queues */
+struct gve_rx_alloc_rings_cfg {
+ /* tx config is also needed to determine QPL ids */
+ struct gve_rx_queue_config *qcfg_rx;
+ struct gve_tx_queue_config *qcfg_tx;
+
+ u16 ring_size;
+ u16 packet_buffer_size;
+ bool raw_addressing;
+ bool enable_header_split;
+ bool reset_rss;
+ bool xdp;
+
+ /* Allocated resources are returned here */
+ struct gve_rx_ring *rx;
+};
+
/* GVE_QUEUE_FORMAT_UNSPECIFIED must be zero since 0 is the default value
* when the entire configure_device_resources command is zeroed out and the
* queue_format is not specified.
@@ -468,15 +747,60 @@ enum gve_queue_format {
GVE_GQI_RDA_FORMAT = 0x1,
GVE_GQI_QPL_FORMAT = 0x2,
GVE_DQO_RDA_FORMAT = 0x3,
+ GVE_DQO_QPL_FORMAT = 0x4,
+};
+
+struct gve_flow_spec {
+ __be32 src_ip[4];
+ __be32 dst_ip[4];
+ union {
+ struct {
+ __be16 src_port;
+ __be16 dst_port;
+ };
+ __be32 spi;
+ };
+ union {
+ u8 tos;
+ u8 tclass;
+ };
+};
+
+struct gve_flow_rule {
+ u32 location;
+ u16 flow_type;
+ u16 action;
+ struct gve_flow_spec key;
+ struct gve_flow_spec mask;
+};
+
+struct gve_flow_rules_cache {
+ bool rules_cache_synced; /* False if the driver's rules_cache is outdated */
+ struct gve_adminq_queried_flow_rule *rules_cache;
+ __be32 *rule_ids_cache;
+ /* The total number of queried rules that stored in the caches */
+ u32 rules_cache_num;
+ u32 rule_ids_cache_num;
+};
+
+struct gve_rss_config {
+ u8 *hash_key;
+ u32 *hash_lut;
+};
+
+struct gve_ptp {
+ struct ptp_clock_info info;
+ struct ptp_clock *clock;
+ struct gve_priv *priv;
};
struct gve_priv {
struct net_device *dev;
struct gve_tx_ring *tx; /* array of tx_cfg.num_queues */
struct gve_rx_ring *rx; /* array of rx_cfg.num_queues */
- struct gve_queue_page_list *qpls; /* array of num qpls */
struct gve_notify_block *ntfy_blocks; /* array of num_ntfy_blks */
- dma_addr_t ntfy_block_bus;
+ struct gve_irq_db *irq_db_indices; /* array of num_ntfy_blks */
+ dma_addr_t irq_db_indices_bus;
struct msix_entry *msix_vectors; /* array of num_ntfy_blks + 1 */
char mgmt_msix_name[IFNAMSIZ + 16];
u32 mgmt_msix_idx;
@@ -486,17 +810,24 @@ struct gve_priv {
u16 num_event_counters;
u16 tx_desc_cnt; /* num desc per ring */
u16 rx_desc_cnt; /* num desc per ring */
- u16 tx_pages_per_qpl; /* tx buffer length */
- u16 rx_data_slot_cnt; /* rx buffer length */
+ u16 max_tx_desc_cnt;
+ u16 max_rx_desc_cnt;
+ u16 min_tx_desc_cnt;
+ u16 min_rx_desc_cnt;
+ bool modify_ring_size_enabled;
+ bool default_min_ring_size;
+ u16 tx_pages_per_qpl; /* Suggested number of pages per qpl for TX queues by NIC */
u64 max_registered_pages;
u64 num_registered_pages; /* num pages registered with NIC */
+ struct bpf_prog *xdp_prog; /* XDP BPF program */
u32 rx_copybreak; /* copy packets smaller than this */
u16 default_num_queues; /* default num queues to set up */
- struct gve_queue_config tx_cfg;
- struct gve_queue_config rx_cfg;
- struct gve_qpl_config qpl_cfg; /* map used QPL ids */
- u32 num_ntfy_blks; /* spilt between TX and RX so must be even */
+ struct gve_tx_queue_config tx_cfg;
+ struct gve_rx_queue_config rx_cfg;
+ unsigned long *xsk_pools; /* bitmap of RX queues with XSK pools */
+ u32 num_ntfy_blks; /* split between TX and RX so must be even */
+ int numa_node;
struct gve_registers __iomem *reg_bar0; /* see gve_register.h */
__be32 __iomem *db_bar2; /* "array" of doorbells */
@@ -509,6 +840,8 @@ struct gve_priv {
/* Admin queue - see gve_adminq.h*/
union gve_adminq_command *adminq;
dma_addr_t adminq_bus_addr;
+ struct dma_pool *adminq_pool;
+ struct mutex adminq_lock; /* Protects adminq command execution */
u32 adminq_mask; /* masks prod_cnt to adminq size */
u32 adminq_prod_cnt; /* free-running count of AQ cmds executed */
u32 adminq_cmd_fail; /* free-running count of AQ cmds failed */
@@ -526,7 +859,13 @@ struct gve_priv {
u32 adminq_set_driver_parameter_cnt;
u32 adminq_report_stats_cnt;
u32 adminq_report_link_speed_cnt;
+ u32 adminq_report_nic_timestamp_cnt;
u32 adminq_get_ptype_map_cnt;
+ u32 adminq_verify_driver_compatibility_cnt;
+ u32 adminq_query_flow_rules_cnt;
+ u32 adminq_cfg_flow_rule_cnt;
+ u32 adminq_cfg_rss_cnt;
+ u32 adminq_query_rss_cnt;
/* Global stats */
u32 interface_up_cnt; /* count of times interface turned up since last reset */
@@ -535,6 +874,8 @@ struct gve_priv {
u32 page_alloc_fail; /* count of page alloc fails */
u32 dma_mapping_error; /* count of dma mapping errors */
u32 stats_report_trigger_cnt; /* count of device-requested stats-reports since last reset */
+ u32 suspend_cnt; /* count of times suspended */
+ u32 resume_cnt; /* count of times resumed */
struct workqueue_struct *gve_wq;
struct work_struct service_task;
struct work_struct stats_report_task;
@@ -551,14 +892,39 @@ struct gve_priv {
/* Gvnic device link speed from hypervisor. */
u64 link_speed;
+ bool up_before_suspend; /* True if dev was up before suspend */
- struct gve_options_dqo_rda options_dqo_rda;
struct gve_ptype_lut *ptype_lut_dqo;
/* Must be a power of two. */
- int data_buffer_size_dqo;
+ u16 max_rx_buffer_size; /* device limit */
enum gve_queue_format queue_format;
+
+ /* Interrupt coalescing settings */
+ u32 tx_coalesce_usecs;
+ u32 rx_coalesce_usecs;
+
+ u16 header_buf_size; /* device configured, header-split supported if non-zero */
+ bool header_split_enabled; /* True if the header split is enabled by the user */
+
+ u32 max_flow_rules;
+ u32 num_flow_rules;
+
+ struct gve_flow_rules_cache flow_rules_cache;
+
+ u16 rss_key_size;
+ u16 rss_lut_size;
+ bool cache_rss_config;
+ struct gve_rss_config rss_config;
+
+ /* True if the device supports reading the nic clock */
+ bool nic_timestamp_supported;
+ struct gve_ptp *ptp;
+ struct kernel_hwtstamp_config ts_config;
+ struct gve_nic_ts_report *nic_ts_report;
+ dma_addr_t nic_ts_report_bus;
+ u64 last_sync_nic_counter; /* Clock counter from last NIC TS report */
};
enum gve_service_task_flags_bit {
@@ -717,7 +1083,7 @@ static inline void gve_clear_report_stats(struct gve_priv *priv)
static inline __be32 __iomem *gve_irq_doorbell(struct gve_priv *priv,
struct gve_notify_block *block)
{
- return &priv->db_bar2[be32_to_cpu(block->irq_db_index)];
+ return &priv->db_bar2[be32_to_cpu(*block->irq_db_index)];
}
/* Returns the index into ntfy_blocks of the given tx ring's block
@@ -734,72 +1100,69 @@ static inline u32 gve_rx_idx_to_ntfy(struct gve_priv *priv, u32 queue_idx)
return (priv->num_ntfy_blks / 2) + queue_idx;
}
-/* Returns the number of tx queue page lists
- */
-static inline u32 gve_num_tx_qpls(struct gve_priv *priv)
+static inline bool gve_is_qpl(struct gve_priv *priv)
{
- if (priv->queue_format != GVE_GQI_QPL_FORMAT)
- return 0;
-
- return priv->tx_cfg.num_queues;
+ return priv->queue_format == GVE_GQI_QPL_FORMAT ||
+ priv->queue_format == GVE_DQO_QPL_FORMAT;
}
-/* Returns the number of rx queue page lists
- */
-static inline u32 gve_num_rx_qpls(struct gve_priv *priv)
+/* Returns the number of tx queue page lists */
+static inline u32 gve_num_tx_qpls(const struct gve_tx_queue_config *tx_cfg,
+ bool is_qpl)
{
- if (priv->queue_format != GVE_GQI_QPL_FORMAT)
+ if (!is_qpl)
return 0;
-
- return priv->rx_cfg.num_queues;
+ return tx_cfg->num_queues + tx_cfg->num_xdp_queues;
}
-/* Returns a pointer to the next available tx qpl in the list of qpls
- */
-static inline
-struct gve_queue_page_list *gve_assign_tx_qpl(struct gve_priv *priv)
+/* Returns the number of rx queue page lists */
+static inline u32 gve_num_rx_qpls(const struct gve_rx_queue_config *rx_cfg,
+ bool is_qpl)
{
- int id = find_first_zero_bit(priv->qpl_cfg.qpl_id_map,
- priv->qpl_cfg.qpl_map_size);
+ if (!is_qpl)
+ return 0;
+ return rx_cfg->num_queues;
+}
- /* we are out of tx qpls */
- if (id >= gve_num_tx_qpls(priv))
- return NULL;
+static inline u32 gve_tx_qpl_id(struct gve_priv *priv, int tx_qid)
+{
+ return tx_qid;
+}
- set_bit(id, priv->qpl_cfg.qpl_id_map);
- return &priv->qpls[id];
+static inline u32 gve_rx_qpl_id(struct gve_priv *priv, int rx_qid)
+{
+ return priv->tx_cfg.max_queues + rx_qid;
}
-/* Returns a pointer to the next available rx qpl in the list of qpls
- */
-static inline
-struct gve_queue_page_list *gve_assign_rx_qpl(struct gve_priv *priv)
+static inline u32 gve_get_rx_qpl_id(const struct gve_tx_queue_config *tx_cfg,
+ int rx_qid)
{
- int id = find_next_zero_bit(priv->qpl_cfg.qpl_id_map,
- priv->qpl_cfg.qpl_map_size,
- gve_num_tx_qpls(priv));
+ return tx_cfg->max_queues + rx_qid;
+}
- /* we are out of rx qpls */
- if (id == priv->qpl_cfg.qpl_map_size)
- return NULL;
+static inline u32 gve_tx_start_qpl_id(struct gve_priv *priv)
+{
+ return gve_tx_qpl_id(priv, 0);
+}
- set_bit(id, priv->qpl_cfg.qpl_id_map);
- return &priv->qpls[id];
+static inline u32 gve_rx_start_qpl_id(const struct gve_tx_queue_config *tx_cfg)
+{
+ return gve_get_rx_qpl_id(tx_cfg, 0);
}
-/* Unassigns the qpl with the given id
- */
-static inline void gve_unassign_qpl(struct gve_priv *priv, int id)
+static inline u32 gve_get_rx_pages_per_qpl_dqo(u32 rx_desc_cnt)
{
- clear_bit(id, priv->qpl_cfg.qpl_id_map);
+ /* For DQO, page count should be more than ring size for
+ * out-of-order completions. Set it to two times of ring size.
+ */
+ return 2 * rx_desc_cnt;
}
-/* Returns the correct dma direction for tx and rx qpls
- */
+/* Returns the correct dma direction for tx and rx qpls */
static inline enum dma_data_direction gve_qpl_dma_dir(struct gve_priv *priv,
int id)
{
- if (id < gve_num_tx_qpls(priv))
+ if (id < gve_rx_start_qpl_id(&priv->tx_cfg))
return DMA_TO_DEVICE;
else
return DMA_FROM_DEVICE;
@@ -811,36 +1174,173 @@ static inline bool gve_is_gqi(struct gve_priv *priv)
priv->queue_format == GVE_GQI_QPL_FORMAT;
}
+static inline bool gve_is_dqo(struct gve_priv *priv)
+{
+ return priv->queue_format == GVE_DQO_RDA_FORMAT ||
+ priv->queue_format == GVE_DQO_QPL_FORMAT;
+}
+
+static inline u32 gve_num_tx_queues(struct gve_priv *priv)
+{
+ return priv->tx_cfg.num_queues + priv->tx_cfg.num_xdp_queues;
+}
+
+static inline u32 gve_xdp_tx_queue_id(struct gve_priv *priv, u32 queue_id)
+{
+ return priv->tx_cfg.num_queues + queue_id;
+}
+
+static inline u32 gve_xdp_tx_start_queue_id(struct gve_priv *priv)
+{
+ return gve_xdp_tx_queue_id(priv, 0);
+}
+
+static inline bool gve_supports_xdp_xmit(struct gve_priv *priv)
+{
+ switch (priv->queue_format) {
+ case GVE_GQI_QPL_FORMAT:
+ case GVE_DQO_RDA_FORMAT:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/* gqi napi handler defined in gve_main.c */
+int gve_napi_poll(struct napi_struct *napi, int budget);
+
/* buffers */
int gve_alloc_page(struct gve_priv *priv, struct device *dev,
struct page **page, dma_addr_t *dma,
- enum dma_data_direction);
+ enum dma_data_direction, gfp_t gfp_flags);
void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma,
enum dma_data_direction);
+/* qpls */
+struct gve_queue_page_list *gve_alloc_queue_page_list(struct gve_priv *priv,
+ u32 id, int pages);
+void gve_free_queue_page_list(struct gve_priv *priv,
+ struct gve_queue_page_list *qpl,
+ u32 id);
/* tx handling */
netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev);
+int gve_xdp_xmit_gqi(struct net_device *dev, int n, struct xdp_frame **frames,
+ u32 flags);
+int gve_xdp_xmit_dqo(struct net_device *dev, int n, struct xdp_frame **frames,
+ u32 flags);
+int gve_xdp_xmit_one(struct gve_priv *priv, struct gve_tx_ring *tx,
+ void *data, int len, void *frame_p);
+void gve_xdp_tx_flush(struct gve_priv *priv, u32 xdp_qid);
+int gve_xdp_xmit_one_dqo(struct gve_priv *priv, struct gve_tx_ring *tx,
+ struct xdp_frame *xdpf);
bool gve_tx_poll(struct gve_notify_block *block, int budget);
-int gve_tx_alloc_rings(struct gve_priv *priv);
-void gve_tx_free_rings_gqi(struct gve_priv *priv);
-__be32 gve_tx_load_event_counter(struct gve_priv *priv,
- struct gve_tx_ring *tx);
+bool gve_xdp_poll(struct gve_notify_block *block, int budget);
+int gve_xsk_tx_poll(struct gve_notify_block *block, int budget);
+int gve_tx_alloc_rings_gqi(struct gve_priv *priv,
+ struct gve_tx_alloc_rings_cfg *cfg);
+void gve_tx_free_rings_gqi(struct gve_priv *priv,
+ struct gve_tx_alloc_rings_cfg *cfg);
+void gve_tx_start_ring_gqi(struct gve_priv *priv, int idx);
+void gve_tx_stop_ring_gqi(struct gve_priv *priv, int idx);
+u32 gve_tx_load_event_counter(struct gve_priv *priv,
+ struct gve_tx_ring *tx);
+bool gve_tx_clean_pending(struct gve_priv *priv, struct gve_tx_ring *tx);
/* rx handling */
void gve_rx_write_doorbell(struct gve_priv *priv, struct gve_rx_ring *rx);
-bool gve_rx_poll(struct gve_notify_block *block, int budget);
-int gve_rx_alloc_rings(struct gve_priv *priv);
-void gve_rx_free_rings_gqi(struct gve_priv *priv);
-bool gve_clean_rx_done(struct gve_rx_ring *rx, int budget,
- netdev_features_t feat);
+int gve_rx_poll(struct gve_notify_block *block, int budget);
+bool gve_rx_work_pending(struct gve_rx_ring *rx);
+int gve_rx_alloc_ring_gqi(struct gve_priv *priv,
+ struct gve_rx_alloc_rings_cfg *cfg,
+ struct gve_rx_ring *rx,
+ int idx);
+void gve_rx_free_ring_gqi(struct gve_priv *priv, struct gve_rx_ring *rx,
+ struct gve_rx_alloc_rings_cfg *cfg);
+int gve_rx_alloc_rings_gqi(struct gve_priv *priv,
+ struct gve_rx_alloc_rings_cfg *cfg);
+void gve_rx_free_rings_gqi(struct gve_priv *priv,
+ struct gve_rx_alloc_rings_cfg *cfg);
+void gve_rx_start_ring_gqi(struct gve_priv *priv, int idx);
+void gve_rx_stop_ring_gqi(struct gve_priv *priv, int idx);
+bool gve_header_split_supported(const struct gve_priv *priv);
+int gve_set_rx_buf_len_config(struct gve_priv *priv, u32 rx_buf_len,
+ struct netlink_ext_ack *extack,
+ struct gve_rx_alloc_rings_cfg *rx_alloc_cfg);
+int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split,
+ struct gve_rx_alloc_rings_cfg *rx_alloc_cfg);
+/* rx buffer handling */
+int gve_buf_ref_cnt(struct gve_rx_buf_state_dqo *bs);
+void gve_free_page_dqo(struct gve_priv *priv, struct gve_rx_buf_state_dqo *bs,
+ bool free_page);
+struct gve_rx_buf_state_dqo *gve_alloc_buf_state(struct gve_rx_ring *rx);
+bool gve_buf_state_is_allocated(struct gve_rx_ring *rx,
+ struct gve_rx_buf_state_dqo *buf_state);
+void gve_free_buf_state(struct gve_rx_ring *rx,
+ struct gve_rx_buf_state_dqo *buf_state);
+struct gve_rx_buf_state_dqo *gve_dequeue_buf_state(struct gve_rx_ring *rx,
+ struct gve_index_list *list);
+void gve_enqueue_buf_state(struct gve_rx_ring *rx, struct gve_index_list *list,
+ struct gve_rx_buf_state_dqo *buf_state);
+struct gve_rx_buf_state_dqo *gve_get_recycled_buf_state(struct gve_rx_ring *rx);
+void gve_try_recycle_buf(struct gve_priv *priv, struct gve_rx_ring *rx,
+ struct gve_rx_buf_state_dqo *buf_state);
+void gve_free_to_page_pool(struct gve_rx_ring *rx,
+ struct gve_rx_buf_state_dqo *buf_state,
+ bool allow_direct);
+int gve_alloc_qpl_page_dqo(struct gve_rx_ring *rx,
+ struct gve_rx_buf_state_dqo *buf_state);
+void gve_free_qpl_page_dqo(struct gve_rx_buf_state_dqo *buf_state);
+void gve_reuse_buffer(struct gve_rx_ring *rx,
+ struct gve_rx_buf_state_dqo *buf_state);
+void gve_free_buffer(struct gve_rx_ring *rx,
+ struct gve_rx_buf_state_dqo *buf_state);
+int gve_alloc_buffer(struct gve_rx_ring *rx, struct gve_rx_desc_dqo *desc);
+struct page_pool *gve_rx_create_page_pool(struct gve_priv *priv,
+ struct gve_rx_ring *rx,
+ bool xdp);
+
/* Reset */
void gve_schedule_reset(struct gve_priv *priv);
int gve_reset(struct gve_priv *priv, bool attempt_teardown);
+void gve_get_curr_alloc_cfgs(struct gve_priv *priv,
+ struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
+ struct gve_rx_alloc_rings_cfg *rx_alloc_cfg);
+int gve_adjust_config(struct gve_priv *priv,
+ struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
+ struct gve_rx_alloc_rings_cfg *rx_alloc_cfg);
int gve_adjust_queues(struct gve_priv *priv,
- struct gve_queue_config new_rx_config,
- struct gve_queue_config new_tx_config);
+ struct gve_rx_queue_config new_rx_config,
+ struct gve_tx_queue_config new_tx_config,
+ bool reset_rss);
+/* flow steering rule */
+int gve_get_flow_rule_entry(struct gve_priv *priv, struct ethtool_rxnfc *cmd);
+int gve_get_flow_rule_ids(struct gve_priv *priv, struct ethtool_rxnfc *cmd, u32 *rule_locs);
+int gve_add_flow_rule(struct gve_priv *priv, struct ethtool_rxnfc *cmd);
+int gve_del_flow_rule(struct gve_priv *priv, struct ethtool_rxnfc *cmd);
+int gve_flow_rules_reset(struct gve_priv *priv);
+/* RSS config */
+int gve_init_rss_config(struct gve_priv *priv, u16 num_queues);
+/* PTP and timestamping */
+#if IS_ENABLED(CONFIG_PTP_1588_CLOCK)
+int gve_clock_nic_ts_read(struct gve_priv *priv);
+int gve_init_clock(struct gve_priv *priv);
+void gve_teardown_clock(struct gve_priv *priv);
+#else /* CONFIG_PTP_1588_CLOCK */
+static inline int gve_clock_nic_ts_read(struct gve_priv *priv)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int gve_init_clock(struct gve_priv *priv)
+{
+ return 0;
+}
+
+static inline void gve_teardown_clock(struct gve_priv *priv) { }
+#endif /* CONFIG_PTP_1588_CLOCK */
/* report stats handling */
void gve_handle_report_stats(struct gve_priv *priv);
/* exported by ethtool.c */
extern const struct ethtool_ops gve_ethtool_ops;
/* needed by ethtool */
+extern char gve_driver_name[];
extern const char gve_version_str[];
#endif /* _GVE_H_ */