diff options
Diffstat (limited to 'drivers/net/ethernet/google/gve/gve.h')
| -rw-r--r-- | drivers/net/ethernet/google/gve/gve.h | 690 |
1 files changed, 595 insertions, 95 deletions
diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index 1d3188e8e3b3..970d5ca8cdde 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h @@ -1,16 +1,22 @@ /* SPDX-License-Identifier: (GPL-2.0 OR MIT) * Google virtual Ethernet (gve) driver * - * Copyright (C) 2015-2021 Google, Inc. + * Copyright (C) 2015-2024 Google LLC */ #ifndef _GVE_H_ #define _GVE_H_ #include <linux/dma-mapping.h> +#include <linux/dmapool.h> +#include <linux/ethtool_netlink.h> #include <linux/netdevice.h> +#include <linux/net_tstamp.h> #include <linux/pci.h> +#include <linux/ptp_clock_kernel.h> #include <linux/u64_stats_sync.h> +#include <net/page_pool/helpers.h> +#include <net/xdp.h> #include "gve_desc.h" #include "gve_desc_dqo.h" @@ -30,7 +36,7 @@ #define GVE_MIN_MSIX 3 /* Numbers of gve tx/rx stats in stats report. */ -#define GVE_TX_STATS_REPORT_NUM 5 +#define GVE_TX_STATS_REPORT_NUM 6 #define GVE_RX_STATS_REPORT_NUM 2 /* Interval to schedule a stats report update, 20000ms. */ @@ -40,12 +46,59 @@ #define NIC_TX_STATS_REPORT_NUM 0 #define NIC_RX_STATS_REPORT_NUM 4 +#define GVE_ADMINQ_BUFFER_SIZE 4096 + #define GVE_DATA_SLOT_ADDR_PAGE_MASK (~(PAGE_SIZE - 1)) /* PTYPEs are always 10 bits. */ #define GVE_NUM_PTYPES 1024 -#define GVE_RX_BUFFER_SIZE_DQO 2048 +/* Default minimum ring size */ +#define GVE_DEFAULT_MIN_TX_RING_SIZE 256 +#define GVE_DEFAULT_MIN_RX_RING_SIZE 512 + +#define GVE_DEFAULT_RX_BUFFER_SIZE 2048 + +#define GVE_XDP_RX_BUFFER_SIZE_DQO 4096 + +#define GVE_DEFAULT_RX_BUFFER_OFFSET 2048 + +#define GVE_PAGE_POOL_SIZE_MULTIPLIER 4 + +#define GVE_FLOW_RULES_CACHE_SIZE \ + (GVE_ADMINQ_BUFFER_SIZE / sizeof(struct gve_adminq_queried_flow_rule)) +#define GVE_FLOW_RULE_IDS_CACHE_SIZE \ + (GVE_ADMINQ_BUFFER_SIZE / sizeof(((struct gve_adminq_queried_flow_rule *)0)->location)) + +#define GVE_RSS_KEY_SIZE 40 +#define GVE_RSS_INDIR_SIZE 128 + +#define GVE_XDP_ACTIONS 5 + +#define GVE_GQ_TX_MIN_PKT_DESC_BYTES 182 + +#define GVE_DEFAULT_HEADER_BUFFER_SIZE 128 + +#define DQO_QPL_DEFAULT_TX_PAGES 512 + +/* Maximum TSO size supported on DQO */ +#define GVE_DQO_TX_MAX 0x3FFFF + +#define GVE_TX_BUF_SHIFT_DQO 11 + +/* 2K buffers for DQO-QPL */ +#define GVE_TX_BUF_SIZE_DQO BIT(GVE_TX_BUF_SHIFT_DQO) +#define GVE_TX_BUFS_PER_PAGE_DQO (PAGE_SIZE >> GVE_TX_BUF_SHIFT_DQO) +#define GVE_MAX_TX_BUFS_PER_PKT (DIV_ROUND_UP(GVE_DQO_TX_MAX, GVE_TX_BUF_SIZE_DQO)) + +/* If number of free/recyclable buffers are less than this threshold; driver + * allocs and uses a non-qpl page on the receive path of DQO QPL to free + * up buffers. + * Value is set big enough to post at least 3 64K LRO packet via 2K buffer to NIC. + */ +#define GVE_DQO_QPL_ONDEMAND_ALLOC_THRESHOLD 96 + +#define GVE_DQO_RX_HWTSTAMP_VALID 0x1 /* Each slot in the desc ring has a 1:1 mapping to a slot in the data ring */ struct gve_rx_desc_queue { @@ -56,11 +109,19 @@ struct gve_rx_desc_queue { /* The page info for a single slot in the RX data queue */ struct gve_rx_slot_page_info { - struct page *page; + /* netmem is used for DQO RDA mode + * page is used in all other modes + */ + union { + struct page *page; + netmem_ref netmem; + }; void *page_address; u32 page_offset; /* offset to write to in page */ + unsigned int buf_size; int pagecnt_bias; /* expected pagecnt if only the driver has a ref */ - u8 can_flip; + u16 pad; /* adjustment for rx padding */ + u8 can_flip; /* tracks if the networking stack is using the page */ }; /* A list of pages registered with the device during setup and used by a queue @@ -119,11 +180,19 @@ struct gve_rx_compl_queue_dqo { u32 mask; /* Mask for indices to the size of the ring */ }; +struct gve_header_buf { + u8 *data; + dma_addr_t addr; +}; + /* Stores state for tracking buffers posted to HW */ struct gve_rx_buf_state_dqo { /* The page posted to HW. */ struct gve_rx_slot_page_info page_info; + /* XSK buffer */ + struct xdp_buff *xsk_buff; + /* The DMA address corresponding to `page_info`. */ dma_addr_t addr; @@ -136,15 +205,47 @@ struct gve_rx_buf_state_dqo { s16 next; }; +/* Wrapper for XDP Rx metadata */ +struct gve_xdp_buff { + struct xdp_buff xdp; + struct gve_priv *gve; + const struct gve_rx_compl_desc_dqo *compl_desc; +}; + /* `head` and `tail` are indices into an array, or -1 if empty. */ struct gve_index_list { s16 head; s16 tail; }; +/* A single received packet split across multiple buffers may be + * reconstructed using the information in this structure. + */ +struct gve_rx_ctx { + /* head and tail of skb chain for the current packet or NULL if none */ + struct sk_buff *skb_head; + struct sk_buff *skb_tail; + u32 total_size; + u8 frag_cnt; + bool drop_pkt; +}; + +struct gve_rx_cnts { + u32 ok_pkt_bytes; + u16 ok_pkt_cnt; + u16 total_pkt_cnt; + u16 cont_pkt_cnt; + u16 desc_err_pkt_cnt; +}; + /* Contains datapath state used to represent an RX queue. */ struct gve_rx_ring { struct gve_priv *gve; + + u16 packet_buffer_size; /* Size of buffer posted to NIC */ + u16 packet_buffer_truesize; /* Total size of RX buffer */ + u16 rx_headroom; + union { /* GQI fields */ struct { @@ -153,6 +254,10 @@ struct gve_rx_ring { /* threshold for posting new buffs and descs */ u32 db_threshold; + + u32 qpl_copy_pool_mask; + u32 qpl_copy_pool_head; + struct gve_rx_slot_page_info *qpl_copy_pool; }; /* DQO fields. */ @@ -187,33 +292,63 @@ struct gve_rx_ring { * which cannot be reused yet. */ struct gve_index_list used_buf_states; + + /* qpl assigned to this queue */ + struct gve_queue_page_list *qpl; + + /* index into queue page list */ + u32 next_qpl_page_idx; + + /* track number of used buffers */ + u16 used_buf_states_cnt; + + /* Address info of the buffers for header-split */ + struct gve_header_buf hdr_bufs; + + struct page_pool *page_pool; } dqo; }; u64 rbytes; /* free-running bytes received */ + u64 rx_hsplit_bytes; /* free-running header bytes received */ u64 rpackets; /* free-running packets received */ u32 cnt; /* free-running total number of completed packets */ u32 fill_cnt; /* free-running total number of descs and buffs posted */ u32 mask; /* masks the cnt and fill_cnt to the size of the ring */ + u64 rx_hsplit_pkt; /* free-running packets with headers split */ u64 rx_copybreak_pkt; /* free-running count of copybreak packets */ u64 rx_copied_pkt; /* free-running total number of copied packets */ u64 rx_skb_alloc_fail; /* free-running count of skb alloc fails */ u64 rx_buf_alloc_fail; /* free-running count of buffer alloc fails */ u64 rx_desc_err_dropped_pkt; /* free-running count of packets dropped by descriptor error */ + /* free-running count of unsplit packets due to header buffer overflow or hdr_len is 0 */ + u64 rx_hsplit_unsplit_pkt; + u64 rx_cont_packet_cnt; /* free-running multi-fragment packets received */ + u64 rx_frag_flip_cnt; /* free-running count of rx segments where page_flip was used */ + u64 rx_frag_copy_cnt; /* free-running count of rx segments copied */ + u64 rx_frag_alloc_cnt; /* free-running count of rx page allocations */ + u64 xdp_tx_errors; + u64 xdp_redirect_errors; + u64 xdp_alloc_fails; + u64 xdp_actions[GVE_XDP_ACTIONS]; u32 q_num; /* queue index */ u32 ntfy_id; /* notification block index */ struct gve_queue_resources *q_resources; /* head and tail pointer idx */ dma_addr_t q_resources_bus; /* dma address for the queue resources */ struct u64_stats_sync statss; /* sync stats for 32bit archs */ - /* head and tail of skb chain for the current packet or NULL if none */ - struct sk_buff *skb_head; - struct sk_buff *skb_tail; + struct gve_rx_ctx ctx; /* Info for packet currently being processed in this ring. */ + + /* XDP stuff */ + struct xdp_rxq_info xdp_rxq; + struct xsk_buff_pool *xsk_pool; + struct page_frag_cache page_cache; /* Page cache to allocate XDP frames */ }; /* A TX desc ring entry */ union gve_tx_desc { struct gve_tx_pkt_desc pkt; /* first desc for a packet */ + struct gve_tx_mtd_desc mtd; /* optional metadata descriptor */ struct gve_tx_seg_desc seg; /* subsequent descs for a packet */ }; @@ -224,19 +359,24 @@ struct gve_tx_iovec { u32 iov_padding; /* padding associated with this segment */ }; -struct gve_tx_dma_buf { - DEFINE_DMA_UNMAP_ADDR(dma); - DEFINE_DMA_UNMAP_LEN(len); -}; - /* Tracks the memory in the fifo occupied by the skb. Mapped 1:1 to a desc * ring entry but only used for a pkt_desc not a seg_desc */ struct gve_tx_buffer_state { - struct sk_buff *skb; /* skb for this pkt */ + union { + struct sk_buff *skb; /* skb for this pkt */ + struct xdp_frame *xdp_frame; /* xdp_frame */ + }; + struct { + u16 size; /* size of xmitted xdp pkt */ + u8 is_xsk; /* xsk buff */ + } xdp; union { struct gve_tx_iovec iov[GVE_TX_MAX_IOVEC]; /* segments of this pkt */ - struct gve_tx_dma_buf buf; + struct { + DEFINE_DMA_UNMAP_ADDR(dma); + DEFINE_DMA_UNMAP_LEN(len); + }; }; }; @@ -269,10 +409,24 @@ enum gve_packet_state { GVE_PACKET_STATE_PENDING_REINJECT_COMPL, /* No valid completion received within the specified timeout. */ GVE_PACKET_STATE_TIMED_OUT_COMPL, + /* XSK pending packet has received a packet/reinjection completion, or + * has timed out. At this point, the pending packet can be counted by + * xsk_tx_complete and freed. + */ + GVE_PACKET_STATE_XSK_COMPLETE, +}; + +enum gve_tx_pending_packet_dqo_type { + GVE_TX_PENDING_PACKET_DQO_SKB, + GVE_TX_PENDING_PACKET_DQO_XDP_FRAME, + GVE_TX_PENDING_PACKET_DQO_XSK, }; struct gve_tx_pending_packet_dqo { - struct sk_buff *skb; /* skb for this packet */ + union { + struct sk_buff *skb; + struct xdp_frame *xdpf; + }; /* 0th element corresponds to the linear portion of `skb`, should be * unmapped with `dma_unmap_single`. @@ -280,7 +434,14 @@ struct gve_tx_pending_packet_dqo { * All others correspond to `skb`'s frags and should be unmapped with * `dma_unmap_page`. */ - struct gve_tx_dma_buf bufs[MAX_SKB_FRAGS + 1]; + union { + struct { + DEFINE_DMA_UNMAP_ADDR(dma[MAX_SKB_FRAGS + 1]); + DEFINE_DMA_UNMAP_LEN(len[MAX_SKB_FRAGS + 1]); + }; + s16 tx_qpl_buf_ids[GVE_MAX_TX_BUFS_PER_PKT]; + }; + u16 num_bufs; /* Linked list index to next element in the list, or -1 if none */ @@ -295,7 +456,10 @@ struct gve_tx_pending_packet_dqo { /* Identifies the current state of the packet as defined in * `enum gve_packet_state`. */ - u8 state; + u8 state : 3; + + /* gve_tx_pending_packet_dqo_type */ + u8 type : 2; /* If packet is an outstanding miss completion, then the packet is * freed if the corresponding re-injection completion is not received @@ -317,6 +481,9 @@ struct gve_tx_ring { /* DQO fields. */ struct { + /* Spinlock for XDP tx traffic */ + spinlock_t xdp_lock; + /* Linked list of gve_tx_pending_packet_dqo. Index into * pending_packets, or -1 if empty. * @@ -335,6 +502,34 @@ struct gve_tx_ring { * set. */ u32 last_re_idx; + + /* free running number of packet buf descriptors posted */ + u16 posted_packet_desc_cnt; + /* free running number of packet buf descriptors completed */ + u16 completed_packet_desc_cnt; + + /* QPL fields */ + struct { + /* Linked list of gve_tx_buf_dqo. Index into + * tx_qpl_buf_next, or -1 if empty. + * + * This is a consumer list owned by the TX path. When it + * runs out, the producer list is stolen from the + * completion handling path + * (dqo_compl.free_tx_qpl_buf_head). + */ + s16 free_tx_qpl_buf_head; + + /* Free running count of the number of QPL tx buffers + * allocated + */ + u32 alloc_tx_qpl_buf_cnt; + + /* Cached value of `dqo_compl.free_tx_qpl_buf_cnt` */ + u32 free_tx_qpl_buf_cnt; + }; + + atomic_t xsk_reorder_queue_tail; } dqo_tx; }; @@ -342,8 +537,10 @@ struct gve_tx_ring { union { /* GQI fields */ struct { - /* NIC tail pointer */ - __be32 last_nic_done; + /* Spinlock for when cleanup in progress */ + spinlock_t clean_lock; + /* Spinlock for XDP tx traffic */ + spinlock_t xdp_lock; }; /* DQO fields. */ @@ -366,6 +563,9 @@ struct gve_tx_ring { /* Last TX ring index fetched by HW */ atomic_t hw_tx_head; + u16 xsk_reorder_queue_head; + u16 xsk_reorder_queue_tail; + /* List to track pending packets which received a miss * completion but not a corresponding reinjection. */ @@ -376,6 +576,24 @@ struct gve_tx_ring { * reached a specified timeout. */ struct gve_index_list timed_out_completions; + + /* QPL fields */ + struct { + /* Linked list of gve_tx_buf_dqo. Index into + * tx_qpl_buf_next, or -1 if empty. + * + * This is the producer list, owned by the completion + * handling path. When the consumer list + * (dqo_tx.free_tx_qpl_buf_head) is runs out, this list + * will be stolen. + */ + atomic_t free_tx_qpl_buf_head; + + /* Free running count of the number of tx buffers + * freed + */ + atomic_t free_tx_qpl_buf_cnt; + }; } dqo_compl; } ____cacheline_aligned; u64 pkt_done; /* free-running - total packets completed */ @@ -401,7 +619,24 @@ struct gve_tx_ring { struct gve_tx_pending_packet_dqo *pending_packets; s16 num_pending_packets; + u16 *xsk_reorder_queue; + u32 complq_mask; /* complq size is complq_mask + 1 */ + + /* QPL fields */ + struct { + /* qpl assigned to this queue */ + struct gve_queue_page_list *qpl; + + /* Each QPL page is divided into TX bounce buffers + * of size GVE_TX_BUF_SIZE_DQO. tx_qpl_buf_next is + * an array to manage linked lists of TX buffers. + * An entry j at index i implies that j'th buffer + * is next on the list after i + */ + s16 *tx_qpl_buf_next; + u32 num_tx_qpl_bufs; + }; } dqo; } ____cacheline_aligned; struct netdev_queue *netdev_txq; @@ -414,29 +649,44 @@ struct gve_tx_ring { u32 q_num ____cacheline_aligned; /* queue idx */ u32 stop_queue; /* count of queue stops */ u32 wake_queue; /* count of queue wakes */ + u32 queue_timeout; /* count of queue timeouts */ u32 ntfy_id; /* notification block index */ + u32 last_kick_msec; /* Last time the queue was kicked */ dma_addr_t bus; /* dma address of the descr ring */ dma_addr_t q_resources_bus; /* dma address of the queue resources */ dma_addr_t complq_bus_dqo; /* dma address of the dqo.compl_ring */ struct u64_stats_sync statss; /* sync stats for 32bit archs */ + struct xsk_buff_pool *xsk_pool; + u64 xdp_xsk_sent; + u64 xdp_xmit; + u64 xdp_xmit_errors; } ____cacheline_aligned; /* Wraps the info for one irq including the napi struct and the queues * associated with that irq. */ struct gve_notify_block { - __be32 irq_db_index; /* idx into Bar2 - set by device, must be 1st */ + __be32 *irq_db_index; /* pointer to idx into Bar2 */ char name[IFNAMSIZ + 16]; /* name registered with the kernel */ struct napi_struct napi; /* kernel napi struct for this block */ struct gve_priv *priv; struct gve_tx_ring *tx; /* tx rings on this block */ struct gve_rx_ring *rx; /* rx rings on this block */ -} ____cacheline_aligned; + u32 irq; +}; + +/* Tracks allowed and current rx queue settings */ +struct gve_rx_queue_config { + u16 max_queues; + u16 num_queues; + u16 packet_buffer_size; +}; -/* Tracks allowed and current queue settings */ -struct gve_queue_config { +/* Tracks allowed and current tx queue settings */ +struct gve_tx_queue_config { u16 max_queues; - u16 num_queues; /* current */ + u16 num_queues; /* number of TX queues, excluding XDP queues */ + u16 num_xdp_queues; }; /* Tracks the available and used qpl IDs */ @@ -445,10 +695,9 @@ struct gve_qpl_config { unsigned long *qpl_id_map; /* bitmap of used qpl ids */ }; -struct gve_options_dqo_rda { - u16 tx_comp_ring_entries; /* number of tx_comp descriptors */ - u16 rx_buff_ring_entries; /* number of rx_buff descriptors */ -}; +struct gve_irq_db { + __be32 index; +} ____cacheline_aligned; struct gve_ptype { u8 l3_type; /* `gve_l3_type` in gve_adminq.h */ @@ -459,6 +708,36 @@ struct gve_ptype_lut { struct gve_ptype ptypes[GVE_NUM_PTYPES]; }; +/* Parameters for allocating resources for tx queues */ +struct gve_tx_alloc_rings_cfg { + struct gve_tx_queue_config *qcfg; + + u16 num_xdp_rings; + + u16 ring_size; + bool raw_addressing; + + /* Allocated resources are returned here */ + struct gve_tx_ring *tx; +}; + +/* Parameters for allocating resources for rx queues */ +struct gve_rx_alloc_rings_cfg { + /* tx config is also needed to determine QPL ids */ + struct gve_rx_queue_config *qcfg_rx; + struct gve_tx_queue_config *qcfg_tx; + + u16 ring_size; + u16 packet_buffer_size; + bool raw_addressing; + bool enable_header_split; + bool reset_rss; + bool xdp; + + /* Allocated resources are returned here */ + struct gve_rx_ring *rx; +}; + /* GVE_QUEUE_FORMAT_UNSPECIFIED must be zero since 0 is the default value * when the entire configure_device_resources command is zeroed out and the * queue_format is not specified. @@ -468,15 +747,60 @@ enum gve_queue_format { GVE_GQI_RDA_FORMAT = 0x1, GVE_GQI_QPL_FORMAT = 0x2, GVE_DQO_RDA_FORMAT = 0x3, + GVE_DQO_QPL_FORMAT = 0x4, +}; + +struct gve_flow_spec { + __be32 src_ip[4]; + __be32 dst_ip[4]; + union { + struct { + __be16 src_port; + __be16 dst_port; + }; + __be32 spi; + }; + union { + u8 tos; + u8 tclass; + }; +}; + +struct gve_flow_rule { + u32 location; + u16 flow_type; + u16 action; + struct gve_flow_spec key; + struct gve_flow_spec mask; +}; + +struct gve_flow_rules_cache { + bool rules_cache_synced; /* False if the driver's rules_cache is outdated */ + struct gve_adminq_queried_flow_rule *rules_cache; + __be32 *rule_ids_cache; + /* The total number of queried rules that stored in the caches */ + u32 rules_cache_num; + u32 rule_ids_cache_num; +}; + +struct gve_rss_config { + u8 *hash_key; + u32 *hash_lut; +}; + +struct gve_ptp { + struct ptp_clock_info info; + struct ptp_clock *clock; + struct gve_priv *priv; }; struct gve_priv { struct net_device *dev; struct gve_tx_ring *tx; /* array of tx_cfg.num_queues */ struct gve_rx_ring *rx; /* array of rx_cfg.num_queues */ - struct gve_queue_page_list *qpls; /* array of num qpls */ struct gve_notify_block *ntfy_blocks; /* array of num_ntfy_blks */ - dma_addr_t ntfy_block_bus; + struct gve_irq_db *irq_db_indices; /* array of num_ntfy_blks */ + dma_addr_t irq_db_indices_bus; struct msix_entry *msix_vectors; /* array of num_ntfy_blks + 1 */ char mgmt_msix_name[IFNAMSIZ + 16]; u32 mgmt_msix_idx; @@ -486,17 +810,24 @@ struct gve_priv { u16 num_event_counters; u16 tx_desc_cnt; /* num desc per ring */ u16 rx_desc_cnt; /* num desc per ring */ - u16 tx_pages_per_qpl; /* tx buffer length */ - u16 rx_data_slot_cnt; /* rx buffer length */ + u16 max_tx_desc_cnt; + u16 max_rx_desc_cnt; + u16 min_tx_desc_cnt; + u16 min_rx_desc_cnt; + bool modify_ring_size_enabled; + bool default_min_ring_size; + u16 tx_pages_per_qpl; /* Suggested number of pages per qpl for TX queues by NIC */ u64 max_registered_pages; u64 num_registered_pages; /* num pages registered with NIC */ + struct bpf_prog *xdp_prog; /* XDP BPF program */ u32 rx_copybreak; /* copy packets smaller than this */ u16 default_num_queues; /* default num queues to set up */ - struct gve_queue_config tx_cfg; - struct gve_queue_config rx_cfg; - struct gve_qpl_config qpl_cfg; /* map used QPL ids */ - u32 num_ntfy_blks; /* spilt between TX and RX so must be even */ + struct gve_tx_queue_config tx_cfg; + struct gve_rx_queue_config rx_cfg; + unsigned long *xsk_pools; /* bitmap of RX queues with XSK pools */ + u32 num_ntfy_blks; /* split between TX and RX so must be even */ + int numa_node; struct gve_registers __iomem *reg_bar0; /* see gve_register.h */ __be32 __iomem *db_bar2; /* "array" of doorbells */ @@ -509,6 +840,8 @@ struct gve_priv { /* Admin queue - see gve_adminq.h*/ union gve_adminq_command *adminq; dma_addr_t adminq_bus_addr; + struct dma_pool *adminq_pool; + struct mutex adminq_lock; /* Protects adminq command execution */ u32 adminq_mask; /* masks prod_cnt to adminq size */ u32 adminq_prod_cnt; /* free-running count of AQ cmds executed */ u32 adminq_cmd_fail; /* free-running count of AQ cmds failed */ @@ -526,7 +859,13 @@ struct gve_priv { u32 adminq_set_driver_parameter_cnt; u32 adminq_report_stats_cnt; u32 adminq_report_link_speed_cnt; + u32 adminq_report_nic_timestamp_cnt; u32 adminq_get_ptype_map_cnt; + u32 adminq_verify_driver_compatibility_cnt; + u32 adminq_query_flow_rules_cnt; + u32 adminq_cfg_flow_rule_cnt; + u32 adminq_cfg_rss_cnt; + u32 adminq_query_rss_cnt; /* Global stats */ u32 interface_up_cnt; /* count of times interface turned up since last reset */ @@ -535,6 +874,8 @@ struct gve_priv { u32 page_alloc_fail; /* count of page alloc fails */ u32 dma_mapping_error; /* count of dma mapping errors */ u32 stats_report_trigger_cnt; /* count of device-requested stats-reports since last reset */ + u32 suspend_cnt; /* count of times suspended */ + u32 resume_cnt; /* count of times resumed */ struct workqueue_struct *gve_wq; struct work_struct service_task; struct work_struct stats_report_task; @@ -551,14 +892,39 @@ struct gve_priv { /* Gvnic device link speed from hypervisor. */ u64 link_speed; + bool up_before_suspend; /* True if dev was up before suspend */ - struct gve_options_dqo_rda options_dqo_rda; struct gve_ptype_lut *ptype_lut_dqo; /* Must be a power of two. */ - int data_buffer_size_dqo; + u16 max_rx_buffer_size; /* device limit */ enum gve_queue_format queue_format; + + /* Interrupt coalescing settings */ + u32 tx_coalesce_usecs; + u32 rx_coalesce_usecs; + + u16 header_buf_size; /* device configured, header-split supported if non-zero */ + bool header_split_enabled; /* True if the header split is enabled by the user */ + + u32 max_flow_rules; + u32 num_flow_rules; + + struct gve_flow_rules_cache flow_rules_cache; + + u16 rss_key_size; + u16 rss_lut_size; + bool cache_rss_config; + struct gve_rss_config rss_config; + + /* True if the device supports reading the nic clock */ + bool nic_timestamp_supported; + struct gve_ptp *ptp; + struct kernel_hwtstamp_config ts_config; + struct gve_nic_ts_report *nic_ts_report; + dma_addr_t nic_ts_report_bus; + u64 last_sync_nic_counter; /* Clock counter from last NIC TS report */ }; enum gve_service_task_flags_bit { @@ -717,7 +1083,7 @@ static inline void gve_clear_report_stats(struct gve_priv *priv) static inline __be32 __iomem *gve_irq_doorbell(struct gve_priv *priv, struct gve_notify_block *block) { - return &priv->db_bar2[be32_to_cpu(block->irq_db_index)]; + return &priv->db_bar2[be32_to_cpu(*block->irq_db_index)]; } /* Returns the index into ntfy_blocks of the given tx ring's block @@ -734,72 +1100,69 @@ static inline u32 gve_rx_idx_to_ntfy(struct gve_priv *priv, u32 queue_idx) return (priv->num_ntfy_blks / 2) + queue_idx; } -/* Returns the number of tx queue page lists - */ -static inline u32 gve_num_tx_qpls(struct gve_priv *priv) +static inline bool gve_is_qpl(struct gve_priv *priv) { - if (priv->queue_format != GVE_GQI_QPL_FORMAT) - return 0; - - return priv->tx_cfg.num_queues; + return priv->queue_format == GVE_GQI_QPL_FORMAT || + priv->queue_format == GVE_DQO_QPL_FORMAT; } -/* Returns the number of rx queue page lists - */ -static inline u32 gve_num_rx_qpls(struct gve_priv *priv) +/* Returns the number of tx queue page lists */ +static inline u32 gve_num_tx_qpls(const struct gve_tx_queue_config *tx_cfg, + bool is_qpl) { - if (priv->queue_format != GVE_GQI_QPL_FORMAT) + if (!is_qpl) return 0; - - return priv->rx_cfg.num_queues; + return tx_cfg->num_queues + tx_cfg->num_xdp_queues; } -/* Returns a pointer to the next available tx qpl in the list of qpls - */ -static inline -struct gve_queue_page_list *gve_assign_tx_qpl(struct gve_priv *priv) +/* Returns the number of rx queue page lists */ +static inline u32 gve_num_rx_qpls(const struct gve_rx_queue_config *rx_cfg, + bool is_qpl) { - int id = find_first_zero_bit(priv->qpl_cfg.qpl_id_map, - priv->qpl_cfg.qpl_map_size); + if (!is_qpl) + return 0; + return rx_cfg->num_queues; +} - /* we are out of tx qpls */ - if (id >= gve_num_tx_qpls(priv)) - return NULL; +static inline u32 gve_tx_qpl_id(struct gve_priv *priv, int tx_qid) +{ + return tx_qid; +} - set_bit(id, priv->qpl_cfg.qpl_id_map); - return &priv->qpls[id]; +static inline u32 gve_rx_qpl_id(struct gve_priv *priv, int rx_qid) +{ + return priv->tx_cfg.max_queues + rx_qid; } -/* Returns a pointer to the next available rx qpl in the list of qpls - */ -static inline -struct gve_queue_page_list *gve_assign_rx_qpl(struct gve_priv *priv) +static inline u32 gve_get_rx_qpl_id(const struct gve_tx_queue_config *tx_cfg, + int rx_qid) { - int id = find_next_zero_bit(priv->qpl_cfg.qpl_id_map, - priv->qpl_cfg.qpl_map_size, - gve_num_tx_qpls(priv)); + return tx_cfg->max_queues + rx_qid; +} - /* we are out of rx qpls */ - if (id == priv->qpl_cfg.qpl_map_size) - return NULL; +static inline u32 gve_tx_start_qpl_id(struct gve_priv *priv) +{ + return gve_tx_qpl_id(priv, 0); +} - set_bit(id, priv->qpl_cfg.qpl_id_map); - return &priv->qpls[id]; +static inline u32 gve_rx_start_qpl_id(const struct gve_tx_queue_config *tx_cfg) +{ + return gve_get_rx_qpl_id(tx_cfg, 0); } -/* Unassigns the qpl with the given id - */ -static inline void gve_unassign_qpl(struct gve_priv *priv, int id) +static inline u32 gve_get_rx_pages_per_qpl_dqo(u32 rx_desc_cnt) { - clear_bit(id, priv->qpl_cfg.qpl_id_map); + /* For DQO, page count should be more than ring size for + * out-of-order completions. Set it to two times of ring size. + */ + return 2 * rx_desc_cnt; } -/* Returns the correct dma direction for tx and rx qpls - */ +/* Returns the correct dma direction for tx and rx qpls */ static inline enum dma_data_direction gve_qpl_dma_dir(struct gve_priv *priv, int id) { - if (id < gve_num_tx_qpls(priv)) + if (id < gve_rx_start_qpl_id(&priv->tx_cfg)) return DMA_TO_DEVICE; else return DMA_FROM_DEVICE; @@ -811,36 +1174,173 @@ static inline bool gve_is_gqi(struct gve_priv *priv) priv->queue_format == GVE_GQI_QPL_FORMAT; } +static inline bool gve_is_dqo(struct gve_priv *priv) +{ + return priv->queue_format == GVE_DQO_RDA_FORMAT || + priv->queue_format == GVE_DQO_QPL_FORMAT; +} + +static inline u32 gve_num_tx_queues(struct gve_priv *priv) +{ + return priv->tx_cfg.num_queues + priv->tx_cfg.num_xdp_queues; +} + +static inline u32 gve_xdp_tx_queue_id(struct gve_priv *priv, u32 queue_id) +{ + return priv->tx_cfg.num_queues + queue_id; +} + +static inline u32 gve_xdp_tx_start_queue_id(struct gve_priv *priv) +{ + return gve_xdp_tx_queue_id(priv, 0); +} + +static inline bool gve_supports_xdp_xmit(struct gve_priv *priv) +{ + switch (priv->queue_format) { + case GVE_GQI_QPL_FORMAT: + case GVE_DQO_RDA_FORMAT: + return true; + default: + return false; + } +} + +/* gqi napi handler defined in gve_main.c */ +int gve_napi_poll(struct napi_struct *napi, int budget); + /* buffers */ int gve_alloc_page(struct gve_priv *priv, struct device *dev, struct page **page, dma_addr_t *dma, - enum dma_data_direction); + enum dma_data_direction, gfp_t gfp_flags); void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma, enum dma_data_direction); +/* qpls */ +struct gve_queue_page_list *gve_alloc_queue_page_list(struct gve_priv *priv, + u32 id, int pages); +void gve_free_queue_page_list(struct gve_priv *priv, + struct gve_queue_page_list *qpl, + u32 id); /* tx handling */ netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev); +int gve_xdp_xmit_gqi(struct net_device *dev, int n, struct xdp_frame **frames, + u32 flags); +int gve_xdp_xmit_dqo(struct net_device *dev, int n, struct xdp_frame **frames, + u32 flags); +int gve_xdp_xmit_one(struct gve_priv *priv, struct gve_tx_ring *tx, + void *data, int len, void *frame_p); +void gve_xdp_tx_flush(struct gve_priv *priv, u32 xdp_qid); +int gve_xdp_xmit_one_dqo(struct gve_priv *priv, struct gve_tx_ring *tx, + struct xdp_frame *xdpf); bool gve_tx_poll(struct gve_notify_block *block, int budget); -int gve_tx_alloc_rings(struct gve_priv *priv); -void gve_tx_free_rings_gqi(struct gve_priv *priv); -__be32 gve_tx_load_event_counter(struct gve_priv *priv, - struct gve_tx_ring *tx); +bool gve_xdp_poll(struct gve_notify_block *block, int budget); +int gve_xsk_tx_poll(struct gve_notify_block *block, int budget); +int gve_tx_alloc_rings_gqi(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *cfg); +void gve_tx_free_rings_gqi(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *cfg); +void gve_tx_start_ring_gqi(struct gve_priv *priv, int idx); +void gve_tx_stop_ring_gqi(struct gve_priv *priv, int idx); +u32 gve_tx_load_event_counter(struct gve_priv *priv, + struct gve_tx_ring *tx); +bool gve_tx_clean_pending(struct gve_priv *priv, struct gve_tx_ring *tx); /* rx handling */ void gve_rx_write_doorbell(struct gve_priv *priv, struct gve_rx_ring *rx); -bool gve_rx_poll(struct gve_notify_block *block, int budget); -int gve_rx_alloc_rings(struct gve_priv *priv); -void gve_rx_free_rings_gqi(struct gve_priv *priv); -bool gve_clean_rx_done(struct gve_rx_ring *rx, int budget, - netdev_features_t feat); +int gve_rx_poll(struct gve_notify_block *block, int budget); +bool gve_rx_work_pending(struct gve_rx_ring *rx); +int gve_rx_alloc_ring_gqi(struct gve_priv *priv, + struct gve_rx_alloc_rings_cfg *cfg, + struct gve_rx_ring *rx, + int idx); +void gve_rx_free_ring_gqi(struct gve_priv *priv, struct gve_rx_ring *rx, + struct gve_rx_alloc_rings_cfg *cfg); +int gve_rx_alloc_rings_gqi(struct gve_priv *priv, + struct gve_rx_alloc_rings_cfg *cfg); +void gve_rx_free_rings_gqi(struct gve_priv *priv, + struct gve_rx_alloc_rings_cfg *cfg); +void gve_rx_start_ring_gqi(struct gve_priv *priv, int idx); +void gve_rx_stop_ring_gqi(struct gve_priv *priv, int idx); +bool gve_header_split_supported(const struct gve_priv *priv); +int gve_set_rx_buf_len_config(struct gve_priv *priv, u32 rx_buf_len, + struct netlink_ext_ack *extack, + struct gve_rx_alloc_rings_cfg *rx_alloc_cfg); +int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split, + struct gve_rx_alloc_rings_cfg *rx_alloc_cfg); +/* rx buffer handling */ +int gve_buf_ref_cnt(struct gve_rx_buf_state_dqo *bs); +void gve_free_page_dqo(struct gve_priv *priv, struct gve_rx_buf_state_dqo *bs, + bool free_page); +struct gve_rx_buf_state_dqo *gve_alloc_buf_state(struct gve_rx_ring *rx); +bool gve_buf_state_is_allocated(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state); +void gve_free_buf_state(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state); +struct gve_rx_buf_state_dqo *gve_dequeue_buf_state(struct gve_rx_ring *rx, + struct gve_index_list *list); +void gve_enqueue_buf_state(struct gve_rx_ring *rx, struct gve_index_list *list, + struct gve_rx_buf_state_dqo *buf_state); +struct gve_rx_buf_state_dqo *gve_get_recycled_buf_state(struct gve_rx_ring *rx); +void gve_try_recycle_buf(struct gve_priv *priv, struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state); +void gve_free_to_page_pool(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state, + bool allow_direct); +int gve_alloc_qpl_page_dqo(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state); +void gve_free_qpl_page_dqo(struct gve_rx_buf_state_dqo *buf_state); +void gve_reuse_buffer(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state); +void gve_free_buffer(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state); +int gve_alloc_buffer(struct gve_rx_ring *rx, struct gve_rx_desc_dqo *desc); +struct page_pool *gve_rx_create_page_pool(struct gve_priv *priv, + struct gve_rx_ring *rx, + bool xdp); + /* Reset */ void gve_schedule_reset(struct gve_priv *priv); int gve_reset(struct gve_priv *priv, bool attempt_teardown); +void gve_get_curr_alloc_cfgs(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, + struct gve_rx_alloc_rings_cfg *rx_alloc_cfg); +int gve_adjust_config(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, + struct gve_rx_alloc_rings_cfg *rx_alloc_cfg); int gve_adjust_queues(struct gve_priv *priv, - struct gve_queue_config new_rx_config, - struct gve_queue_config new_tx_config); + struct gve_rx_queue_config new_rx_config, + struct gve_tx_queue_config new_tx_config, + bool reset_rss); +/* flow steering rule */ +int gve_get_flow_rule_entry(struct gve_priv *priv, struct ethtool_rxnfc *cmd); +int gve_get_flow_rule_ids(struct gve_priv *priv, struct ethtool_rxnfc *cmd, u32 *rule_locs); +int gve_add_flow_rule(struct gve_priv *priv, struct ethtool_rxnfc *cmd); +int gve_del_flow_rule(struct gve_priv *priv, struct ethtool_rxnfc *cmd); +int gve_flow_rules_reset(struct gve_priv *priv); +/* RSS config */ +int gve_init_rss_config(struct gve_priv *priv, u16 num_queues); +/* PTP and timestamping */ +#if IS_ENABLED(CONFIG_PTP_1588_CLOCK) +int gve_clock_nic_ts_read(struct gve_priv *priv); +int gve_init_clock(struct gve_priv *priv); +void gve_teardown_clock(struct gve_priv *priv); +#else /* CONFIG_PTP_1588_CLOCK */ +static inline int gve_clock_nic_ts_read(struct gve_priv *priv) +{ + return -EOPNOTSUPP; +} + +static inline int gve_init_clock(struct gve_priv *priv) +{ + return 0; +} + +static inline void gve_teardown_clock(struct gve_priv *priv) { } +#endif /* CONFIG_PTP_1588_CLOCK */ /* report stats handling */ void gve_handle_report_stats(struct gve_priv *priv); /* exported by ethtool.c */ extern const struct ethtool_ops gve_ethtool_ops; /* needed by ethtool */ +extern char gve_driver_name[]; extern const char gve_version_str[]; #endif /* _GVE_H_ */ |
