diff options
Diffstat (limited to 'drivers/net/ethernet/google')
-rw-r--r-- | drivers/net/ethernet/google/Kconfig | 1 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/Makefile | 3 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve.h | 361 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_adminq.c | 715 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_adminq.h | 225 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c | 316 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_dqo.h | 24 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_ethtool.c | 424 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_flow_rule.c | 298 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_main.c | 1506 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_rx.c | 255 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_rx_dqo.c | 603 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_tx.c | 193 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_tx_dqo.c | 165 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_utils.c | 49 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_utils.h | 8 |
16 files changed, 3776 insertions, 1370 deletions
diff --git a/drivers/net/ethernet/google/Kconfig b/drivers/net/ethernet/google/Kconfig index 8641a00f8e63..564862a57124 100644 --- a/drivers/net/ethernet/google/Kconfig +++ b/drivers/net/ethernet/google/Kconfig @@ -18,6 +18,7 @@ if NET_VENDOR_GOOGLE config GVE tristate "Google Virtual NIC (gVNIC) support" depends on (PCI_MSI && (X86 || CPU_LITTLE_ENDIAN)) + select PAGE_POOL help This driver supports Google Virtual NIC (gVNIC)" diff --git a/drivers/net/ethernet/google/gve/Makefile b/drivers/net/ethernet/google/gve/Makefile index b9a6be76531b..4520f1c07a63 100644 --- a/drivers/net/ethernet/google/gve/Makefile +++ b/drivers/net/ethernet/google/gve/Makefile @@ -1,4 +1,5 @@ # Makefile for the Google virtual Ethernet (gve) driver obj-$(CONFIG_GVE) += gve.o -gve-objs := gve_main.o gve_tx.o gve_tx_dqo.o gve_rx.o gve_rx_dqo.o gve_ethtool.o gve_adminq.o gve_utils.o +gve-objs := gve_main.o gve_tx.o gve_tx_dqo.o gve_rx.o gve_rx_dqo.o gve_ethtool.o gve_adminq.o gve_utils.o gve_flow_rule.o \ + gve_buffer_mgmt_dqo.o diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index b80349154604..2fab38c8ee78 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: (GPL-2.0 OR MIT) * Google virtual Ethernet (gve) driver * - * Copyright (C) 2015-2021 Google, Inc. + * Copyright (C) 2015-2024 Google LLC */ #ifndef _GVE_H_ @@ -9,9 +9,11 @@ #include <linux/dma-mapping.h> #include <linux/dmapool.h> +#include <linux/ethtool_netlink.h> #include <linux/netdevice.h> #include <linux/pci.h> #include <linux/u64_stats_sync.h> +#include <net/page_pool/helpers.h> #include <net/xdp.h> #include "gve_desc.h" @@ -49,16 +51,35 @@ /* PTYPEs are always 10 bits. */ #define GVE_NUM_PTYPES 1024 +/* Default minimum ring size */ +#define GVE_DEFAULT_MIN_TX_RING_SIZE 256 +#define GVE_DEFAULT_MIN_RX_RING_SIZE 512 + #define GVE_DEFAULT_RX_BUFFER_SIZE 2048 +#define GVE_MAX_RX_BUFFER_SIZE 4096 + +#define GVE_XDP_RX_BUFFER_SIZE_DQO 4096 + #define GVE_DEFAULT_RX_BUFFER_OFFSET 2048 +#define GVE_PAGE_POOL_SIZE_MULTIPLIER 4 + +#define GVE_FLOW_RULES_CACHE_SIZE \ + (GVE_ADMINQ_BUFFER_SIZE / sizeof(struct gve_adminq_queried_flow_rule)) +#define GVE_FLOW_RULE_IDS_CACHE_SIZE \ + (GVE_ADMINQ_BUFFER_SIZE / sizeof(((struct gve_adminq_queried_flow_rule *)0)->location)) + +#define GVE_RSS_KEY_SIZE 40 +#define GVE_RSS_INDIR_SIZE 128 + #define GVE_XDP_ACTIONS 5 #define GVE_GQ_TX_MIN_PKT_DESC_BYTES 182 +#define GVE_DEFAULT_HEADER_BUFFER_SIZE 128 + #define DQO_QPL_DEFAULT_TX_PAGES 512 -#define DQO_QPL_DEFAULT_RX_PAGES 2048 /* Maximum TSO size supported on DQO */ #define GVE_DQO_TX_MAX 0x3FFFF @@ -86,9 +107,16 @@ struct gve_rx_desc_queue { /* The page info for a single slot in the RX data queue */ struct gve_rx_slot_page_info { - struct page *page; + /* netmem is used for DQO RDA mode + * page is used in all other modes + */ + union { + struct page *page; + netmem_ref netmem; + }; void *page_address; u32 page_offset; /* offset to write to in page */ + unsigned int buf_size; int pagecnt_bias; /* expected pagecnt if only the driver has a ref */ u16 pad; /* adjustment for rx padding */ u8 can_flip; /* tracks if the networking stack is using the page */ @@ -150,6 +178,11 @@ struct gve_rx_compl_queue_dqo { u32 mask; /* Mask for indices to the size of the ring */ }; +struct gve_header_buf { + u8 *data; + dma_addr_t addr; +}; + /* Stores state for tracking buffers posted to HW */ struct gve_rx_buf_state_dqo { /* The page posted to HW. */ @@ -196,6 +229,11 @@ struct gve_rx_cnts { /* Contains datapath state used to represent an RX queue. */ struct gve_rx_ring { struct gve_priv *gve; + + u16 packet_buffer_size; /* Size of buffer posted to NIC */ + u16 packet_buffer_truesize; /* Total size of RX buffer */ + u16 rx_headroom; + union { /* GQI fields */ struct { @@ -204,7 +242,6 @@ struct gve_rx_ring { /* threshold for posting new buffs and descs */ u32 db_threshold; - u16 packet_buffer_size; u32 qpl_copy_pool_mask; u32 qpl_copy_pool_head; @@ -252,19 +289,28 @@ struct gve_rx_ring { /* track number of used buffers */ u16 used_buf_states_cnt; + + /* Address info of the buffers for header-split */ + struct gve_header_buf hdr_bufs; + + struct page_pool *page_pool; } dqo; }; u64 rbytes; /* free-running bytes received */ + u64 rx_hsplit_bytes; /* free-running header bytes received */ u64 rpackets; /* free-running packets received */ u32 cnt; /* free-running total number of completed packets */ u32 fill_cnt; /* free-running total number of descs and buffs posted */ u32 mask; /* masks the cnt and fill_cnt to the size of the ring */ + u64 rx_hsplit_pkt; /* free-running packets with headers split */ u64 rx_copybreak_pkt; /* free-running count of copybreak packets */ u64 rx_copied_pkt; /* free-running total number of copied packets */ u64 rx_skb_alloc_fail; /* free-running count of skb alloc fails */ u64 rx_buf_alloc_fail; /* free-running count of buffer alloc fails */ u64 rx_desc_err_dropped_pkt; /* free-running count of packets dropped by descriptor error */ + /* free-running count of unsplit packets due to header buffer overflow or hdr_len is 0 */ + u64 rx_hsplit_unsplit_pkt; u64 rx_cont_packet_cnt; /* free-running multi-fragment packets received */ u64 rx_frag_flip_cnt; /* free-running count of rx segments where page_flip was used */ u64 rx_frag_copy_cnt; /* free-running count of rx segments copied */ @@ -573,8 +619,6 @@ struct gve_tx_ring { dma_addr_t complq_bus_dqo; /* dma address of the dqo.compl_ring */ struct u64_stats_sync statss; /* sync stats for 32bit archs */ struct xsk_buff_pool *xsk_pool; - u32 xdp_xsk_wakeup; - u32 xdp_xsk_done; u64 xdp_xsk_sent; u64 xdp_xmit; u64 xdp_xmit_errors; @@ -590,12 +634,21 @@ struct gve_notify_block { struct gve_priv *priv; struct gve_tx_ring *tx; /* tx rings on this block */ struct gve_rx_ring *rx; /* rx rings on this block */ + u32 irq; +}; + +/* Tracks allowed and current rx queue settings */ +struct gve_rx_queue_config { + u16 max_queues; + u16 num_queues; + u16 packet_buffer_size; }; -/* Tracks allowed and current queue settings */ -struct gve_queue_config { +/* Tracks allowed and current tx queue settings */ +struct gve_tx_queue_config { u16 max_queues; - u16 num_queues; /* current */ + u16 num_queues; /* number of TX queues, excluding XDP queues */ + u16 num_xdp_queues; }; /* Tracks the available and used qpl IDs */ @@ -604,11 +657,6 @@ struct gve_qpl_config { unsigned long *qpl_id_map; /* bitmap of used qpl ids */ }; -struct gve_options_dqo_rda { - u16 tx_comp_ring_entries; /* number of tx_comp descriptors */ - u16 rx_buff_ring_entries; /* number of rx_buff descriptors */ -}; - struct gve_irq_db { __be32 index; } ____cacheline_aligned; @@ -622,6 +670,36 @@ struct gve_ptype_lut { struct gve_ptype ptypes[GVE_NUM_PTYPES]; }; +/* Parameters for allocating resources for tx queues */ +struct gve_tx_alloc_rings_cfg { + struct gve_tx_queue_config *qcfg; + + u16 num_xdp_rings; + + u16 ring_size; + bool raw_addressing; + + /* Allocated resources are returned here */ + struct gve_tx_ring *tx; +}; + +/* Parameters for allocating resources for rx queues */ +struct gve_rx_alloc_rings_cfg { + /* tx config is also needed to determine QPL ids */ + struct gve_rx_queue_config *qcfg_rx; + struct gve_tx_queue_config *qcfg_tx; + + u16 ring_size; + u16 packet_buffer_size; + bool raw_addressing; + bool enable_header_split; + bool reset_rss; + bool xdp; + + /* Allocated resources are returned here */ + struct gve_rx_ring *rx; +}; + /* GVE_QUEUE_FORMAT_UNSPECIFIED must be zero since 0 is the default value * when the entire configure_device_resources command is zeroed out and the * queue_format is not specified. @@ -634,11 +712,48 @@ enum gve_queue_format { GVE_DQO_QPL_FORMAT = 0x4, }; +struct gve_flow_spec { + __be32 src_ip[4]; + __be32 dst_ip[4]; + union { + struct { + __be16 src_port; + __be16 dst_port; + }; + __be32 spi; + }; + union { + u8 tos; + u8 tclass; + }; +}; + +struct gve_flow_rule { + u32 location; + u16 flow_type; + u16 action; + struct gve_flow_spec key; + struct gve_flow_spec mask; +}; + +struct gve_flow_rules_cache { + bool rules_cache_synced; /* False if the driver's rules_cache is outdated */ + struct gve_adminq_queried_flow_rule *rules_cache; + __be32 *rule_ids_cache; + /* The total number of queried rules that stored in the caches */ + u32 rules_cache_num; + u32 rule_ids_cache_num; +}; + +struct gve_rss_config { + u8 *hash_key; + u32 *hash_lut; +}; + struct gve_priv { struct net_device *dev; struct gve_tx_ring *tx; /* array of tx_cfg.num_queues */ struct gve_rx_ring *rx; /* array of rx_cfg.num_queues */ - struct gve_queue_page_list *qpls; /* array of num qpls */ struct gve_notify_block *ntfy_blocks; /* array of num_ntfy_blks */ struct gve_irq_db *irq_db_indices; /* array of num_ntfy_blks */ dma_addr_t irq_db_indices_bus; @@ -651,19 +766,21 @@ struct gve_priv { u16 num_event_counters; u16 tx_desc_cnt; /* num desc per ring */ u16 rx_desc_cnt; /* num desc per ring */ + u16 max_tx_desc_cnt; + u16 max_rx_desc_cnt; + u16 min_tx_desc_cnt; + u16 min_rx_desc_cnt; + bool modify_ring_size_enabled; + bool default_min_ring_size; u16 tx_pages_per_qpl; /* Suggested number of pages per qpl for TX queues by NIC */ - u16 rx_pages_per_qpl; /* Suggested number of pages per qpl for RX queues by NIC */ - u16 rx_data_slot_cnt; /* rx buffer length */ u64 max_registered_pages; u64 num_registered_pages; /* num pages registered with NIC */ struct bpf_prog *xdp_prog; /* XDP BPF program */ u32 rx_copybreak; /* copy packets smaller than this */ u16 default_num_queues; /* default num queues to set up */ - u16 num_xdp_queues; - struct gve_queue_config tx_cfg; - struct gve_queue_config rx_cfg; - struct gve_qpl_config qpl_cfg; /* map used QPL ids */ + struct gve_tx_queue_config tx_cfg; + struct gve_rx_queue_config rx_cfg; u32 num_ntfy_blks; /* spilt between TX and RX so must be even */ struct gve_registers __iomem *reg_bar0; /* see gve_register.h */ @@ -678,6 +795,7 @@ struct gve_priv { union gve_adminq_command *adminq; dma_addr_t adminq_bus_addr; struct dma_pool *adminq_pool; + struct mutex adminq_lock; /* Protects adminq command execution */ u32 adminq_mask; /* masks prod_cnt to adminq size */ u32 adminq_prod_cnt; /* free-running count of AQ cmds executed */ u32 adminq_cmd_fail; /* free-running count of AQ cmds failed */ @@ -697,6 +815,10 @@ struct gve_priv { u32 adminq_report_link_speed_cnt; u32 adminq_get_ptype_map_cnt; u32 adminq_verify_driver_compatibility_cnt; + u32 adminq_query_flow_rules_cnt; + u32 adminq_cfg_flow_rule_cnt; + u32 adminq_cfg_rss_cnt; + u32 adminq_query_rss_cnt; /* Global stats */ u32 interface_up_cnt; /* count of times interface turned up since last reset */ @@ -725,17 +847,29 @@ struct gve_priv { u64 link_speed; bool up_before_suspend; /* True if dev was up before suspend */ - struct gve_options_dqo_rda options_dqo_rda; struct gve_ptype_lut *ptype_lut_dqo; /* Must be a power of two. */ - int data_buffer_size_dqo; + u16 max_rx_buffer_size; /* device limit */ enum gve_queue_format queue_format; /* Interrupt coalescing settings */ u32 tx_coalesce_usecs; u32 rx_coalesce_usecs; + + u16 header_buf_size; /* device configured, header-split supported if non-zero */ + bool header_split_enabled; /* True if the header split is enabled by the user */ + + u32 max_flow_rules; + u32 num_flow_rules; + + struct gve_flow_rules_cache flow_rules_cache; + + u16 rss_key_size; + u16 rss_lut_size; + bool cache_rss_config; + struct gve_rss_config rss_config; }; enum gve_service_task_flags_bit { @@ -917,34 +1051,22 @@ static inline bool gve_is_qpl(struct gve_priv *priv) priv->queue_format == GVE_DQO_QPL_FORMAT; } -/* Returns the number of tx queue page lists - */ -static inline u32 gve_num_tx_qpls(struct gve_priv *priv) -{ - if (!gve_is_qpl(priv)) - return 0; - - return priv->tx_cfg.num_queues + priv->num_xdp_queues; -} - -/* Returns the number of XDP tx queue page lists - */ -static inline u32 gve_num_xdp_qpls(struct gve_priv *priv) +/* Returns the number of tx queue page lists */ +static inline u32 gve_num_tx_qpls(const struct gve_tx_queue_config *tx_cfg, + bool is_qpl) { - if (priv->queue_format != GVE_GQI_QPL_FORMAT) + if (!is_qpl) return 0; - - return priv->num_xdp_queues; + return tx_cfg->num_queues + tx_cfg->num_xdp_queues; } -/* Returns the number of rx queue page lists - */ -static inline u32 gve_num_rx_qpls(struct gve_priv *priv) +/* Returns the number of rx queue page lists */ +static inline u32 gve_num_rx_qpls(const struct gve_rx_queue_config *rx_cfg, + bool is_qpl) { - if (!gve_is_qpl(priv)) + if (!is_qpl) return 0; - - return priv->rx_cfg.num_queues; + return rx_cfg->num_queues; } static inline u32 gve_tx_qpl_id(struct gve_priv *priv, int tx_qid) @@ -957,59 +1079,35 @@ static inline u32 gve_rx_qpl_id(struct gve_priv *priv, int rx_qid) return priv->tx_cfg.max_queues + rx_qid; } -static inline u32 gve_tx_start_qpl_id(struct gve_priv *priv) -{ - return gve_tx_qpl_id(priv, 0); -} - -static inline u32 gve_rx_start_qpl_id(struct gve_priv *priv) +static inline u32 gve_get_rx_qpl_id(const struct gve_tx_queue_config *tx_cfg, + int rx_qid) { - return gve_rx_qpl_id(priv, 0); + return tx_cfg->max_queues + rx_qid; } -/* Returns a pointer to the next available tx qpl in the list of qpls - */ -static inline -struct gve_queue_page_list *gve_assign_tx_qpl(struct gve_priv *priv, int tx_qid) +static inline u32 gve_tx_start_qpl_id(struct gve_priv *priv) { - int id = gve_tx_qpl_id(priv, tx_qid); - - /* QPL already in use */ - if (test_bit(id, priv->qpl_cfg.qpl_id_map)) - return NULL; - - set_bit(id, priv->qpl_cfg.qpl_id_map); - return &priv->qpls[id]; + return gve_tx_qpl_id(priv, 0); } -/* Returns a pointer to the next available rx qpl in the list of qpls - */ -static inline -struct gve_queue_page_list *gve_assign_rx_qpl(struct gve_priv *priv, int rx_qid) +static inline u32 gve_rx_start_qpl_id(const struct gve_tx_queue_config *tx_cfg) { - int id = gve_rx_qpl_id(priv, rx_qid); - - /* QPL already in use */ - if (test_bit(id, priv->qpl_cfg.qpl_id_map)) - return NULL; - - set_bit(id, priv->qpl_cfg.qpl_id_map); - return &priv->qpls[id]; + return gve_get_rx_qpl_id(tx_cfg, 0); } -/* Unassigns the qpl with the given id - */ -static inline void gve_unassign_qpl(struct gve_priv *priv, int id) +static inline u32 gve_get_rx_pages_per_qpl_dqo(u32 rx_desc_cnt) { - clear_bit(id, priv->qpl_cfg.qpl_id_map); + /* For DQO, page count should be more than ring size for + * out-of-order completions. Set it to two times of ring size. + */ + return 2 * rx_desc_cnt; } -/* Returns the correct dma direction for tx and rx qpls - */ +/* Returns the correct dma direction for tx and rx qpls */ static inline enum dma_data_direction gve_qpl_dma_dir(struct gve_priv *priv, int id) { - if (id < gve_rx_start_qpl_id(priv)) + if (id < gve_rx_start_qpl_id(&priv->tx_cfg)) return DMA_TO_DEVICE; else return DMA_FROM_DEVICE; @@ -1023,7 +1121,7 @@ static inline bool gve_is_gqi(struct gve_priv *priv) static inline u32 gve_num_tx_queues(struct gve_priv *priv) { - return priv->tx_cfg.num_queues + priv->num_xdp_queues; + return priv->tx_cfg.num_queues + priv->tx_cfg.num_xdp_queues; } static inline u32 gve_xdp_tx_queue_id(struct gve_priv *priv, u32 queue_id) @@ -1036,12 +1134,31 @@ static inline u32 gve_xdp_tx_start_queue_id(struct gve_priv *priv) return gve_xdp_tx_queue_id(priv, 0); } +static inline bool gve_supports_xdp_xmit(struct gve_priv *priv) +{ + switch (priv->queue_format) { + case GVE_GQI_QPL_FORMAT: + return true; + default: + return false; + } +} + +/* gqi napi handler defined in gve_main.c */ +int gve_napi_poll(struct napi_struct *napi, int budget); + /* buffers */ int gve_alloc_page(struct gve_priv *priv, struct device *dev, struct page **page, dma_addr_t *dma, enum dma_data_direction, gfp_t gfp_flags); void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma, enum dma_data_direction); +/* qpls */ +struct gve_queue_page_list *gve_alloc_queue_page_list(struct gve_priv *priv, + u32 id, int pages); +void gve_free_queue_page_list(struct gve_priv *priv, + struct gve_queue_page_list *qpl, + u32 id); /* tx handling */ netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev); int gve_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, @@ -1051,8 +1168,13 @@ int gve_xdp_xmit_one(struct gve_priv *priv, struct gve_tx_ring *tx, void gve_xdp_tx_flush(struct gve_priv *priv, u32 xdp_qid); bool gve_tx_poll(struct gve_notify_block *block, int budget); bool gve_xdp_poll(struct gve_notify_block *block, int budget); -int gve_tx_alloc_rings(struct gve_priv *priv, int start_id, int num_rings); -void gve_tx_free_rings_gqi(struct gve_priv *priv, int start_id, int num_rings); +int gve_xsk_tx_poll(struct gve_notify_block *block, int budget); +int gve_tx_alloc_rings_gqi(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *cfg); +void gve_tx_free_rings_gqi(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *cfg); +void gve_tx_start_ring_gqi(struct gve_priv *priv, int idx); +void gve_tx_stop_ring_gqi(struct gve_priv *priv, int idx); u32 gve_tx_load_event_counter(struct gve_priv *priv, struct gve_tx_ring *tx); bool gve_tx_clean_pending(struct gve_priv *priv, struct gve_tx_ring *tx); @@ -1060,14 +1182,73 @@ bool gve_tx_clean_pending(struct gve_priv *priv, struct gve_tx_ring *tx); void gve_rx_write_doorbell(struct gve_priv *priv, struct gve_rx_ring *rx); int gve_rx_poll(struct gve_notify_block *block, int budget); bool gve_rx_work_pending(struct gve_rx_ring *rx); -int gve_rx_alloc_rings(struct gve_priv *priv); -void gve_rx_free_rings_gqi(struct gve_priv *priv); +int gve_rx_alloc_ring_gqi(struct gve_priv *priv, + struct gve_rx_alloc_rings_cfg *cfg, + struct gve_rx_ring *rx, + int idx); +void gve_rx_free_ring_gqi(struct gve_priv *priv, struct gve_rx_ring *rx, + struct gve_rx_alloc_rings_cfg *cfg); +int gve_rx_alloc_rings_gqi(struct gve_priv *priv, + struct gve_rx_alloc_rings_cfg *cfg); +void gve_rx_free_rings_gqi(struct gve_priv *priv, + struct gve_rx_alloc_rings_cfg *cfg); +void gve_rx_start_ring_gqi(struct gve_priv *priv, int idx); +void gve_rx_stop_ring_gqi(struct gve_priv *priv, int idx); +u16 gve_get_pkt_buf_size(const struct gve_priv *priv, bool enable_hplit); +bool gve_header_split_supported(const struct gve_priv *priv); +int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split); +/* rx buffer handling */ +int gve_buf_ref_cnt(struct gve_rx_buf_state_dqo *bs); +void gve_free_page_dqo(struct gve_priv *priv, struct gve_rx_buf_state_dqo *bs, + bool free_page); +struct gve_rx_buf_state_dqo *gve_alloc_buf_state(struct gve_rx_ring *rx); +bool gve_buf_state_is_allocated(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state); +void gve_free_buf_state(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state); +struct gve_rx_buf_state_dqo *gve_dequeue_buf_state(struct gve_rx_ring *rx, + struct gve_index_list *list); +void gve_enqueue_buf_state(struct gve_rx_ring *rx, struct gve_index_list *list, + struct gve_rx_buf_state_dqo *buf_state); +struct gve_rx_buf_state_dqo *gve_get_recycled_buf_state(struct gve_rx_ring *rx); +void gve_try_recycle_buf(struct gve_priv *priv, struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state); +void gve_free_to_page_pool(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state, + bool allow_direct); +int gve_alloc_qpl_page_dqo(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state); +void gve_free_qpl_page_dqo(struct gve_rx_buf_state_dqo *buf_state); +void gve_reuse_buffer(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state); +void gve_free_buffer(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state); +int gve_alloc_buffer(struct gve_rx_ring *rx, struct gve_rx_desc_dqo *desc); +struct page_pool *gve_rx_create_page_pool(struct gve_priv *priv, + struct gve_rx_ring *rx, + bool xdp); + /* Reset */ void gve_schedule_reset(struct gve_priv *priv); int gve_reset(struct gve_priv *priv, bool attempt_teardown); +void gve_get_curr_alloc_cfgs(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, + struct gve_rx_alloc_rings_cfg *rx_alloc_cfg); +int gve_adjust_config(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, + struct gve_rx_alloc_rings_cfg *rx_alloc_cfg); int gve_adjust_queues(struct gve_priv *priv, - struct gve_queue_config new_rx_config, - struct gve_queue_config new_tx_config); + struct gve_rx_queue_config new_rx_config, + struct gve_tx_queue_config new_tx_config, + bool reset_rss); +/* flow steering rule */ +int gve_get_flow_rule_entry(struct gve_priv *priv, struct ethtool_rxnfc *cmd); +int gve_get_flow_rule_ids(struct gve_priv *priv, struct ethtool_rxnfc *cmd, u32 *rule_locs); +int gve_add_flow_rule(struct gve_priv *priv, struct ethtool_rxnfc *cmd); +int gve_del_flow_rule(struct gve_priv *priv, struct ethtool_rxnfc *cmd); +int gve_flow_rules_reset(struct gve_priv *priv); +/* RSS config */ +int gve_init_rss_config(struct gve_priv *priv, u16 num_queues); /* report stats handling */ void gve_handle_report_stats(struct gve_priv *priv); /* exported by ethtool.c */ diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c index 12fbd723ecc6..3e8fc33cc11f 100644 --- a/drivers/net/ethernet/google/gve/gve_adminq.c +++ b/drivers/net/ethernet/google/gve/gve_adminq.c @@ -32,6 +32,8 @@ struct gve_device_option *gve_get_next_option(struct gve_device_descriptor *desc return option_end > descriptor_end ? NULL : (struct gve_device_option *)option_end; } +#define GVE_DEVICE_OPTION_NO_MIN_RING_SIZE 8 + static void gve_parse_device_option(struct gve_priv *priv, struct gve_device_descriptor *device_descriptor, @@ -40,7 +42,11 @@ void gve_parse_device_option(struct gve_priv *priv, struct gve_device_option_gqi_qpl **dev_op_gqi_qpl, struct gve_device_option_dqo_rda **dev_op_dqo_rda, struct gve_device_option_jumbo_frames **dev_op_jumbo_frames, - struct gve_device_option_dqo_qpl **dev_op_dqo_qpl) + struct gve_device_option_dqo_qpl **dev_op_dqo_qpl, + struct gve_device_option_buffer_sizes **dev_op_buffer_sizes, + struct gve_device_option_flow_steering **dev_op_flow_steering, + struct gve_device_option_rss_config **dev_op_rss_config, + struct gve_device_option_modify_ring **dev_op_modify_ring) { u32 req_feat_mask = be32_to_cpu(option->required_features_mask); u16 option_length = be16_to_cpu(option->option_length); @@ -147,6 +153,78 @@ void gve_parse_device_option(struct gve_priv *priv, } *dev_op_jumbo_frames = (void *)(option + 1); break; + case GVE_DEV_OPT_ID_BUFFER_SIZES: + if (option_length < sizeof(**dev_op_buffer_sizes) || + req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_BUFFER_SIZES) { + dev_warn(&priv->pdev->dev, GVE_DEVICE_OPTION_ERROR_FMT, + "Buffer Sizes", + (int)sizeof(**dev_op_buffer_sizes), + GVE_DEV_OPT_REQ_FEAT_MASK_BUFFER_SIZES, + option_length, req_feat_mask); + break; + } + + if (option_length > sizeof(**dev_op_buffer_sizes)) + dev_warn(&priv->pdev->dev, + GVE_DEVICE_OPTION_TOO_BIG_FMT, + "Buffer Sizes"); + *dev_op_buffer_sizes = (void *)(option + 1); + break; + case GVE_DEV_OPT_ID_MODIFY_RING: + if (option_length < GVE_DEVICE_OPTION_NO_MIN_RING_SIZE || + req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_MODIFY_RING) { + dev_warn(&priv->pdev->dev, GVE_DEVICE_OPTION_ERROR_FMT, + "Modify Ring", (int)sizeof(**dev_op_modify_ring), + GVE_DEV_OPT_REQ_FEAT_MASK_MODIFY_RING, + option_length, req_feat_mask); + break; + } + + if (option_length > sizeof(**dev_op_modify_ring)) { + dev_warn(&priv->pdev->dev, + GVE_DEVICE_OPTION_TOO_BIG_FMT, "Modify Ring"); + } + + *dev_op_modify_ring = (void *)(option + 1); + + /* device has not provided min ring size */ + if (option_length == GVE_DEVICE_OPTION_NO_MIN_RING_SIZE) + priv->default_min_ring_size = true; + break; + case GVE_DEV_OPT_ID_FLOW_STEERING: + if (option_length < sizeof(**dev_op_flow_steering) || + req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_FLOW_STEERING) { + dev_warn(&priv->pdev->dev, GVE_DEVICE_OPTION_ERROR_FMT, + "Flow Steering", + (int)sizeof(**dev_op_flow_steering), + GVE_DEV_OPT_REQ_FEAT_MASK_FLOW_STEERING, + option_length, req_feat_mask); + break; + } + + if (option_length > sizeof(**dev_op_flow_steering)) + dev_warn(&priv->pdev->dev, + GVE_DEVICE_OPTION_TOO_BIG_FMT, + "Flow Steering"); + *dev_op_flow_steering = (void *)(option + 1); + break; + case GVE_DEV_OPT_ID_RSS_CONFIG: + if (option_length < sizeof(**dev_op_rss_config) || + req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_RSS_CONFIG) { + dev_warn(&priv->pdev->dev, GVE_DEVICE_OPTION_ERROR_FMT, + "RSS config", + (int)sizeof(**dev_op_rss_config), + GVE_DEV_OPT_REQ_FEAT_MASK_RSS_CONFIG, + option_length, req_feat_mask); + break; + } + + if (option_length > sizeof(**dev_op_rss_config)) + dev_warn(&priv->pdev->dev, + GVE_DEVICE_OPTION_TOO_BIG_FMT, + "RSS config"); + *dev_op_rss_config = (void *)(option + 1); + break; default: /* If we don't recognize the option just continue * without doing anything. @@ -164,7 +242,11 @@ gve_process_device_options(struct gve_priv *priv, struct gve_device_option_gqi_qpl **dev_op_gqi_qpl, struct gve_device_option_dqo_rda **dev_op_dqo_rda, struct gve_device_option_jumbo_frames **dev_op_jumbo_frames, - struct gve_device_option_dqo_qpl **dev_op_dqo_qpl) + struct gve_device_option_dqo_qpl **dev_op_dqo_qpl, + struct gve_device_option_buffer_sizes **dev_op_buffer_sizes, + struct gve_device_option_flow_steering **dev_op_flow_steering, + struct gve_device_option_rss_config **dev_op_rss_config, + struct gve_device_option_modify_ring **dev_op_modify_ring) { const int num_options = be16_to_cpu(descriptor->num_device_options); struct gve_device_option *dev_opt; @@ -185,7 +267,9 @@ gve_process_device_options(struct gve_priv *priv, gve_parse_device_option(priv, descriptor, dev_opt, dev_op_gqi_rda, dev_op_gqi_qpl, dev_op_dqo_rda, dev_op_jumbo_frames, - dev_op_dqo_qpl); + dev_op_dqo_qpl, dev_op_buffer_sizes, + dev_op_flow_steering, dev_op_rss_config, + dev_op_modify_ring); dev_opt = next_opt; } @@ -223,6 +307,10 @@ int gve_adminq_alloc(struct device *dev, struct gve_priv *priv) priv->adminq_report_stats_cnt = 0; priv->adminq_report_link_speed_cnt = 0; priv->adminq_get_ptype_map_cnt = 0; + priv->adminq_query_flow_rules_cnt = 0; + priv->adminq_cfg_flow_rule_cnt = 0; + priv->adminq_cfg_rss_cnt = 0; + priv->adminq_query_rss_cnt = 0; /* Setup Admin queue with the device */ if (priv->pdev->revision < 0x1) { @@ -239,6 +327,7 @@ int gve_adminq_alloc(struct device *dev, struct gve_priv *priv) &priv->reg_bar0->adminq_base_address_lo); iowrite32be(GVE_DRIVER_STATUS_RUN_MASK, &priv->reg_bar0->driver_status); } + mutex_init(&priv->adminq_lock); gve_set_admin_queue_ok(priv); return 0; } @@ -415,6 +504,8 @@ static int gve_adminq_issue_cmd(struct gve_priv *priv, memcpy(cmd, cmd_orig, sizeof(*cmd_orig)); opcode = be32_to_cpu(READ_ONCE(cmd->opcode)); + if (opcode == GVE_ADMINQ_EXTENDED_COMMAND) + opcode = be32_to_cpu(cmd->extended_command.inner_opcode); switch (opcode) { case GVE_ADMINQ_DESCRIBE_DEVICE: @@ -459,6 +550,18 @@ static int gve_adminq_issue_cmd(struct gve_priv *priv, case GVE_ADMINQ_VERIFY_DRIVER_COMPATIBILITY: priv->adminq_verify_driver_compatibility_cnt++; break; + case GVE_ADMINQ_QUERY_FLOW_RULES: + priv->adminq_query_flow_rules_cnt++; + break; + case GVE_ADMINQ_CONFIGURE_FLOW_RULE: + priv->adminq_cfg_flow_rule_cnt++; + break; + case GVE_ADMINQ_CONFIGURE_RSS: + priv->adminq_cfg_rss_cnt++; + break; + case GVE_ADMINQ_QUERY_RSS: + priv->adminq_query_rss_cnt++; + break; default: dev_err(&priv->pdev->dev, "unknown AQ command opcode %d\n", opcode); } @@ -466,28 +569,58 @@ static int gve_adminq_issue_cmd(struct gve_priv *priv, return 0; } -/* This function is not threadsafe - the caller is responsible for any - * necessary locks. - * The caller is also responsible for making sure there are no commands - * waiting to be executed. - */ static int gve_adminq_execute_cmd(struct gve_priv *priv, union gve_adminq_command *cmd_orig) { u32 tail, head; int err; + mutex_lock(&priv->adminq_lock); tail = ioread32be(&priv->reg_bar0->adminq_event_counter); head = priv->adminq_prod_cnt; - if (tail != head) - // This is not a valid path - return -EINVAL; + if (tail != head) { + err = -EINVAL; + goto out; + } err = gve_adminq_issue_cmd(priv, cmd_orig); if (err) - return err; + goto out; - return gve_adminq_kick_and_wait(priv); + err = gve_adminq_kick_and_wait(priv); + +out: + mutex_unlock(&priv->adminq_lock); + return err; +} + +static int gve_adminq_execute_extended_cmd(struct gve_priv *priv, u32 opcode, + size_t cmd_size, void *cmd_orig) +{ + union gve_adminq_command cmd; + dma_addr_t inner_cmd_bus; + void *inner_cmd; + int err; + + inner_cmd = dma_alloc_coherent(&priv->pdev->dev, cmd_size, + &inner_cmd_bus, GFP_KERNEL); + if (!inner_cmd) + return -ENOMEM; + + memcpy(inner_cmd, cmd_orig, cmd_size); + + memset(&cmd, 0, sizeof(cmd)); + cmd.opcode = cpu_to_be32(GVE_ADMINQ_EXTENDED_COMMAND); + cmd.extended_command = (struct gve_adminq_extended_command) { + .inner_opcode = cpu_to_be32(opcode), + .inner_length = cpu_to_be32(cmd_size), + .inner_command_addr = cpu_to_be64(inner_cmd_bus), + }; + + err = gve_adminq_execute_cmd(priv, &cmd); + + dma_free_coherent(&priv->pdev->dev, cmd_size, inner_cmd, inner_cmd_bus); + return err; } /* The device specifies that the management vector can either be the first irq @@ -546,6 +679,7 @@ static int gve_adminq_create_tx_queue(struct gve_priv *priv, u32 queue_index) cpu_to_be64(tx->q_resources_bus), .tx_ring_addr = cpu_to_be64(tx->bus), .ntfy_id = cpu_to_be32(tx->ntfy_id), + .tx_ring_size = cpu_to_be16(priv->tx_desc_cnt), }; if (gve_is_gqi(priv)) { @@ -554,24 +688,17 @@ static int gve_adminq_create_tx_queue(struct gve_priv *priv, u32 queue_index) cmd.create_tx_queue.queue_page_list_id = cpu_to_be32(qpl_id); } else { - u16 comp_ring_size; u32 qpl_id = 0; - if (priv->queue_format == GVE_DQO_RDA_FORMAT) { + if (priv->queue_format == GVE_DQO_RDA_FORMAT) qpl_id = GVE_RAW_ADDRESSING_QPL_ID; - comp_ring_size = - priv->options_dqo_rda.tx_comp_ring_entries; - } else { + else qpl_id = tx->dqo.qpl->id; - comp_ring_size = priv->tx_desc_cnt; - } cmd.create_tx_queue.queue_page_list_id = cpu_to_be32(qpl_id); - cmd.create_tx_queue.tx_ring_size = - cpu_to_be16(priv->tx_desc_cnt); cmd.create_tx_queue.tx_comp_ring_addr = cpu_to_be64(tx->complq_bus_dqo); cmd.create_tx_queue.tx_comp_ring_size = - cpu_to_be16(comp_ring_size); + cpu_to_be16(priv->tx_desc_cnt); } return gve_adminq_issue_cmd(priv, &cmd); @@ -591,60 +718,71 @@ int gve_adminq_create_tx_queues(struct gve_priv *priv, u32 start_id, u32 num_que return gve_adminq_kick_and_wait(priv); } -static int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_index) +static void gve_adminq_get_create_rx_queue_cmd(struct gve_priv *priv, + union gve_adminq_command *cmd, + u32 queue_index) { struct gve_rx_ring *rx = &priv->rx[queue_index]; - union gve_adminq_command cmd; - memset(&cmd, 0, sizeof(cmd)); - cmd.opcode = cpu_to_be32(GVE_ADMINQ_CREATE_RX_QUEUE); - cmd.create_rx_queue = (struct gve_adminq_create_rx_queue) { + memset(cmd, 0, sizeof(*cmd)); + cmd->opcode = cpu_to_be32(GVE_ADMINQ_CREATE_RX_QUEUE); + cmd->create_rx_queue = (struct gve_adminq_create_rx_queue) { .queue_id = cpu_to_be32(queue_index), .ntfy_id = cpu_to_be32(rx->ntfy_id), .queue_resources_addr = cpu_to_be64(rx->q_resources_bus), + .rx_ring_size = cpu_to_be16(priv->rx_desc_cnt), + .packet_buffer_size = cpu_to_be16(rx->packet_buffer_size), }; if (gve_is_gqi(priv)) { u32 qpl_id = priv->queue_format == GVE_GQI_RDA_FORMAT ? GVE_RAW_ADDRESSING_QPL_ID : rx->data.qpl->id; - cmd.create_rx_queue.rx_desc_ring_addr = - cpu_to_be64(rx->desc.bus), - cmd.create_rx_queue.rx_data_ring_addr = - cpu_to_be64(rx->data.data_bus), - cmd.create_rx_queue.index = cpu_to_be32(queue_index); - cmd.create_rx_queue.queue_page_list_id = cpu_to_be32(qpl_id); - cmd.create_rx_queue.packet_buffer_size = cpu_to_be16(rx->packet_buffer_size); + cmd->create_rx_queue.rx_desc_ring_addr = + cpu_to_be64(rx->desc.bus); + cmd->create_rx_queue.rx_data_ring_addr = + cpu_to_be64(rx->data.data_bus); + cmd->create_rx_queue.index = cpu_to_be32(queue_index); + cmd->create_rx_queue.queue_page_list_id = cpu_to_be32(qpl_id); } else { - u16 rx_buff_ring_entries; u32 qpl_id = 0; - if (priv->queue_format == GVE_DQO_RDA_FORMAT) { + if (priv->queue_format == GVE_DQO_RDA_FORMAT) qpl_id = GVE_RAW_ADDRESSING_QPL_ID; - rx_buff_ring_entries = - priv->options_dqo_rda.rx_buff_ring_entries; - } else { + else qpl_id = rx->dqo.qpl->id; - rx_buff_ring_entries = priv->rx_desc_cnt; - } - cmd.create_rx_queue.queue_page_list_id = cpu_to_be32(qpl_id); - cmd.create_rx_queue.rx_ring_size = - cpu_to_be16(priv->rx_desc_cnt); - cmd.create_rx_queue.rx_desc_ring_addr = + cmd->create_rx_queue.queue_page_list_id = cpu_to_be32(qpl_id); + cmd->create_rx_queue.rx_desc_ring_addr = cpu_to_be64(rx->dqo.complq.bus); - cmd.create_rx_queue.rx_data_ring_addr = + cmd->create_rx_queue.rx_data_ring_addr = cpu_to_be64(rx->dqo.bufq.bus); - cmd.create_rx_queue.packet_buffer_size = - cpu_to_be16(priv->data_buffer_size_dqo); - cmd.create_rx_queue.rx_buff_ring_size = - cpu_to_be16(rx_buff_ring_entries); - cmd.create_rx_queue.enable_rsc = + cmd->create_rx_queue.rx_buff_ring_size = + cpu_to_be16(priv->rx_desc_cnt); + cmd->create_rx_queue.enable_rsc = !!(priv->dev->features & NETIF_F_LRO); + if (priv->header_split_enabled) + cmd->create_rx_queue.header_buffer_size = + cpu_to_be16(priv->header_buf_size); } +} +static int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_index) +{ + union gve_adminq_command cmd; + + gve_adminq_get_create_rx_queue_cmd(priv, &cmd, queue_index); return gve_adminq_issue_cmd(priv, &cmd); } +/* Unlike gve_adminq_create_rx_queue, this actually rings the doorbell */ +int gve_adminq_create_single_rx_queue(struct gve_priv *priv, u32 queue_index) +{ + union gve_adminq_command cmd; + + gve_adminq_get_create_rx_queue_cmd(priv, &cmd, queue_index); + return gve_adminq_execute_cmd(priv, &cmd); +} + int gve_adminq_create_rx_queues(struct gve_priv *priv, u32 num_queues) { int err; @@ -691,22 +829,31 @@ int gve_adminq_destroy_tx_queues(struct gve_priv *priv, u32 start_id, u32 num_qu return gve_adminq_kick_and_wait(priv); } +static void gve_adminq_make_destroy_rx_queue_cmd(union gve_adminq_command *cmd, + u32 queue_index) +{ + memset(cmd, 0, sizeof(*cmd)); + cmd->opcode = cpu_to_be32(GVE_ADMINQ_DESTROY_RX_QUEUE); + cmd->destroy_rx_queue = (struct gve_adminq_destroy_rx_queue) { + .queue_id = cpu_to_be32(queue_index), + }; +} + static int gve_adminq_destroy_rx_queue(struct gve_priv *priv, u32 queue_index) { union gve_adminq_command cmd; - int err; - memset(&cmd, 0, sizeof(cmd)); - cmd.opcode = cpu_to_be32(GVE_ADMINQ_DESTROY_RX_QUEUE); - cmd.destroy_rx_queue = (struct gve_adminq_destroy_rx_queue) { - .queue_id = cpu_to_be32(queue_index), - }; + gve_adminq_make_destroy_rx_queue_cmd(&cmd, queue_index); + return gve_adminq_issue_cmd(priv, &cmd); +} - err = gve_adminq_issue_cmd(priv, &cmd); - if (err) - return err; +/* Unlike gve_adminq_destroy_rx_queue, this actually rings the doorbell */ +int gve_adminq_destroy_single_rx_queue(struct gve_priv *priv, u32 queue_index) +{ + union gve_adminq_command cmd; - return 0; + gve_adminq_make_destroy_rx_queue_cmd(&cmd, queue_index); + return gve_adminq_execute_cmd(priv, &cmd); } int gve_adminq_destroy_rx_queues(struct gve_priv *priv, u32 num_queues) @@ -723,31 +870,26 @@ int gve_adminq_destroy_rx_queues(struct gve_priv *priv, u32 num_queues) return gve_adminq_kick_and_wait(priv); } -static int gve_set_desc_cnt(struct gve_priv *priv, - struct gve_device_descriptor *descriptor) +static void gve_set_default_desc_cnt(struct gve_priv *priv, + const struct gve_device_descriptor *descriptor) { priv->tx_desc_cnt = be16_to_cpu(descriptor->tx_queue_entries); priv->rx_desc_cnt = be16_to_cpu(descriptor->rx_queue_entries); - return 0; + + /* set default ranges */ + priv->max_tx_desc_cnt = priv->tx_desc_cnt; + priv->max_rx_desc_cnt = priv->rx_desc_cnt; + priv->min_tx_desc_cnt = priv->tx_desc_cnt; + priv->min_rx_desc_cnt = priv->rx_desc_cnt; } -static int -gve_set_desc_cnt_dqo(struct gve_priv *priv, - const struct gve_device_descriptor *descriptor, - const struct gve_device_option_dqo_rda *dev_op_dqo_rda) +static void gve_set_default_rss_sizes(struct gve_priv *priv) { - priv->tx_desc_cnt = be16_to_cpu(descriptor->tx_queue_entries); - priv->rx_desc_cnt = be16_to_cpu(descriptor->rx_queue_entries); - - if (priv->queue_format == GVE_DQO_QPL_FORMAT) - return 0; - - priv->options_dqo_rda.tx_comp_ring_entries = - be16_to_cpu(dev_op_dqo_rda->tx_comp_ring_entries); - priv->options_dqo_rda.rx_buff_ring_entries = - be16_to_cpu(dev_op_dqo_rda->rx_buff_ring_entries); - - return 0; + if (!gve_is_gqi(priv)) { + priv->rss_key_size = GVE_RSS_KEY_SIZE; + priv->rss_lut_size = GVE_RSS_INDIR_SIZE; + priv->cache_rss_config = true; + } } static void gve_enable_supported_features(struct gve_priv *priv, @@ -755,7 +897,15 @@ static void gve_enable_supported_features(struct gve_priv *priv, const struct gve_device_option_jumbo_frames *dev_op_jumbo_frames, const struct gve_device_option_dqo_qpl - *dev_op_dqo_qpl) + *dev_op_dqo_qpl, + const struct gve_device_option_buffer_sizes + *dev_op_buffer_sizes, + const struct gve_device_option_flow_steering + *dev_op_flow_steering, + const struct gve_device_option_rss_config + *dev_op_rss_config, + const struct gve_device_option_modify_ring + *dev_op_modify_ring) { /* Before control reaches this point, the page-size-capped max MTU from * the gve_device_descriptor field has already been stored in @@ -772,18 +922,73 @@ static void gve_enable_supported_features(struct gve_priv *priv, if (dev_op_dqo_qpl) { priv->tx_pages_per_qpl = be16_to_cpu(dev_op_dqo_qpl->tx_pages_per_qpl); - priv->rx_pages_per_qpl = - be16_to_cpu(dev_op_dqo_qpl->rx_pages_per_qpl); if (priv->tx_pages_per_qpl == 0) priv->tx_pages_per_qpl = DQO_QPL_DEFAULT_TX_PAGES; - if (priv->rx_pages_per_qpl == 0) - priv->rx_pages_per_qpl = DQO_QPL_DEFAULT_RX_PAGES; + } + + if (dev_op_buffer_sizes && + (supported_features_mask & GVE_SUP_BUFFER_SIZES_MASK)) { + priv->max_rx_buffer_size = + be16_to_cpu(dev_op_buffer_sizes->packet_buffer_size); + priv->header_buf_size = + be16_to_cpu(dev_op_buffer_sizes->header_buffer_size); + dev_info(&priv->pdev->dev, + "BUFFER SIZES device option enabled with max_rx_buffer_size of %u, header_buf_size of %u.\n", + priv->max_rx_buffer_size, priv->header_buf_size); + } + + /* Read and store ring size ranges given by device */ + if (dev_op_modify_ring && + (supported_features_mask & GVE_SUP_MODIFY_RING_MASK)) { + priv->modify_ring_size_enabled = true; + + /* max ring size for DQO QPL should not be overwritten because of device limit */ + if (priv->queue_format != GVE_DQO_QPL_FORMAT) { + priv->max_rx_desc_cnt = be16_to_cpu(dev_op_modify_ring->max_rx_ring_size); + priv->max_tx_desc_cnt = be16_to_cpu(dev_op_modify_ring->max_tx_ring_size); + } + if (priv->default_min_ring_size) { + /* If device hasn't provided minimums, use default minimums */ + priv->min_tx_desc_cnt = GVE_DEFAULT_MIN_TX_RING_SIZE; + priv->min_rx_desc_cnt = GVE_DEFAULT_MIN_RX_RING_SIZE; + } else { + priv->min_rx_desc_cnt = be16_to_cpu(dev_op_modify_ring->min_rx_ring_size); + priv->min_tx_desc_cnt = be16_to_cpu(dev_op_modify_ring->min_tx_ring_size); + } + } + + if (dev_op_flow_steering && + (supported_features_mask & GVE_SUP_FLOW_STEERING_MASK)) { + if (dev_op_flow_steering->max_flow_rules) { + priv->max_flow_rules = + be32_to_cpu(dev_op_flow_steering->max_flow_rules); + priv->dev->hw_features |= NETIF_F_NTUPLE; + dev_info(&priv->pdev->dev, + "FLOW STEERING device option enabled with max rule limit of %u.\n", + priv->max_flow_rules); + } + } + + if (dev_op_rss_config && + (supported_features_mask & GVE_SUP_RSS_CONFIG_MASK)) { + priv->rss_key_size = + be16_to_cpu(dev_op_rss_config->hash_key_size); + priv->rss_lut_size = + be16_to_cpu(dev_op_rss_config->hash_lut_size); + priv->cache_rss_config = false; + dev_dbg(&priv->pdev->dev, + "RSS device option enabled with key size of %u, lut size of %u.\n", + priv->rss_key_size, priv->rss_lut_size); } } int gve_adminq_describe_device(struct gve_priv *priv) { + struct gve_device_option_flow_steering *dev_op_flow_steering = NULL; + struct gve_device_option_buffer_sizes *dev_op_buffer_sizes = NULL; struct gve_device_option_jumbo_frames *dev_op_jumbo_frames = NULL; + struct gve_device_option_modify_ring *dev_op_modify_ring = NULL; + struct gve_device_option_rss_config *dev_op_rss_config = NULL; struct gve_device_option_gqi_rda *dev_op_gqi_rda = NULL; struct gve_device_option_gqi_qpl *dev_op_gqi_qpl = NULL; struct gve_device_option_dqo_rda *dev_op_dqo_rda = NULL; @@ -815,8 +1020,11 @@ int gve_adminq_describe_device(struct gve_priv *priv) err = gve_process_device_options(priv, descriptor, &dev_op_gqi_rda, &dev_op_gqi_qpl, &dev_op_dqo_rda, - &dev_op_jumbo_frames, - &dev_op_dqo_qpl); + &dev_op_jumbo_frames, &dev_op_dqo_qpl, + &dev_op_buffer_sizes, + &dev_op_flow_steering, + &dev_op_rss_config, + &dev_op_modify_ring); if (err) goto free_device_descriptor; @@ -851,15 +1059,15 @@ int gve_adminq_describe_device(struct gve_priv *priv) dev_info(&priv->pdev->dev, "Driver is running with GQI QPL queue format.\n"); } - if (gve_is_gqi(priv)) { - err = gve_set_desc_cnt(priv, descriptor); - } else { - /* DQO supports LRO. */ + + /* set default descriptor counts */ + gve_set_default_desc_cnt(priv, descriptor); + + gve_set_default_rss_sizes(priv); + + /* DQO supports LRO. */ + if (!gve_is_gqi(priv)) priv->dev->hw_features |= NETIF_F_LRO; - err = gve_set_desc_cnt_dqo(priv, descriptor, dev_op_dqo_rda); - } - if (err) - goto free_device_descriptor; priv->max_registered_pages = be64_to_cpu(descriptor->max_registered_pages); @@ -875,17 +1083,12 @@ int gve_adminq_describe_device(struct gve_priv *priv) mac = descriptor->mac; dev_info(&priv->pdev->dev, "MAC addr: %pM\n", mac); priv->tx_pages_per_qpl = be16_to_cpu(descriptor->tx_pages_per_qpl); - priv->rx_data_slot_cnt = be16_to_cpu(descriptor->rx_pages_per_qpl); - - if (gve_is_gqi(priv) && priv->rx_data_slot_cnt < priv->rx_desc_cnt) { - dev_err(&priv->pdev->dev, "rx_data_slot_cnt cannot be smaller than rx_desc_cnt, setting rx_desc_cnt down to %d.\n", - priv->rx_data_slot_cnt); - priv->rx_desc_cnt = priv->rx_data_slot_cnt; - } priv->default_num_queues = be16_to_cpu(descriptor->default_num_queues); gve_enable_supported_features(priv, supported_features_mask, - dev_op_jumbo_frames, dev_op_dqo_qpl); + dev_op_jumbo_frames, dev_op_dqo_qpl, + dev_op_buffer_sizes, dev_op_flow_steering, + dev_op_rss_config, dev_op_modify_ring); free_device_descriptor: dma_pool_free(priv->adminq_pool, descriptor, descriptor_bus); @@ -938,20 +1141,6 @@ int gve_adminq_unregister_page_list(struct gve_priv *priv, u32 page_list_id) return gve_adminq_execute_cmd(priv, &cmd); } -int gve_adminq_set_mtu(struct gve_priv *priv, u64 mtu) -{ - union gve_adminq_command cmd; - - memset(&cmd, 0, sizeof(cmd)); - cmd.opcode = cpu_to_be32(GVE_ADMINQ_SET_DRIVER_PARAMETER); - cmd.set_driver_param = (struct gve_adminq_set_driver_parameter) { - .parameter_type = cpu_to_be32(GVE_SET_PARAM_MTU), - .parameter_value = cpu_to_be64(mtu), - }; - - return gve_adminq_execute_cmd(priv, &cmd); -} - int gve_adminq_report_stats(struct gve_priv *priv, u64 stats_report_len, dma_addr_t stats_report_addr, u64 interval) { @@ -1048,3 +1237,293 @@ err: ptype_map_bus); return err; } + +static int +gve_adminq_configure_flow_rule(struct gve_priv *priv, + struct gve_adminq_configure_flow_rule *flow_rule_cmd) +{ + int err = gve_adminq_execute_extended_cmd(priv, + GVE_ADMINQ_CONFIGURE_FLOW_RULE, + sizeof(struct gve_adminq_configure_flow_rule), + flow_rule_cmd); + + if (err == -ETIME) { + dev_err(&priv->pdev->dev, "Timeout to configure the flow rule, trigger reset"); + gve_reset(priv, true); + } else if (!err) { + priv->flow_rules_cache.rules_cache_synced = false; + } + + return err; +} + +int gve_adminq_add_flow_rule(struct gve_priv *priv, struct gve_adminq_flow_rule *rule, u32 loc) +{ + struct gve_adminq_configure_flow_rule flow_rule_cmd = { + .opcode = cpu_to_be16(GVE_FLOW_RULE_CFG_ADD), + .location = cpu_to_be32(loc), + .rule = *rule, + }; + + return gve_adminq_configure_flow_rule(priv, &flow_rule_cmd); +} + +int gve_adminq_del_flow_rule(struct gve_priv *priv, u32 loc) +{ + struct gve_adminq_configure_flow_rule flow_rule_cmd = { + .opcode = cpu_to_be16(GVE_FLOW_RULE_CFG_DEL), + .location = cpu_to_be32(loc), + }; + + return gve_adminq_configure_flow_rule(priv, &flow_rule_cmd); +} + +int gve_adminq_reset_flow_rules(struct gve_priv *priv) +{ + struct gve_adminq_configure_flow_rule flow_rule_cmd = { + .opcode = cpu_to_be16(GVE_FLOW_RULE_CFG_RESET), + }; + + return gve_adminq_configure_flow_rule(priv, &flow_rule_cmd); +} + +int gve_adminq_configure_rss(struct gve_priv *priv, struct ethtool_rxfh_param *rxfh) +{ + const u32 *hash_lut_to_config = NULL; + const u8 *hash_key_to_config = NULL; + dma_addr_t lut_bus = 0, key_bus = 0; + union gve_adminq_command cmd; + __be32 *lut = NULL; + u8 hash_alg = 0; + u8 *key = NULL; + int err = 0; + u16 i; + + switch (rxfh->hfunc) { + case ETH_RSS_HASH_NO_CHANGE: + fallthrough; + case ETH_RSS_HASH_TOP: + hash_alg = ETH_RSS_HASH_TOP; + break; + default: + return -EOPNOTSUPP; + } + + if (rxfh->indir) { + if (rxfh->indir_size != priv->rss_lut_size) + return -EINVAL; + + hash_lut_to_config = rxfh->indir; + } else if (priv->cache_rss_config) { + hash_lut_to_config = priv->rss_config.hash_lut; + } + + if (hash_lut_to_config) { + lut = dma_alloc_coherent(&priv->pdev->dev, + priv->rss_lut_size * sizeof(*lut), + &lut_bus, GFP_KERNEL); + if (!lut) + return -ENOMEM; + + for (i = 0; i < priv->rss_lut_size; i++) + lut[i] = cpu_to_be32(hash_lut_to_config[i]); + } + + if (rxfh->key) { + if (rxfh->key_size != priv->rss_key_size) { + err = -EINVAL; + goto out; + } + + hash_key_to_config = rxfh->key; + } else if (priv->cache_rss_config) { + hash_key_to_config = priv->rss_config.hash_key; + } + + if (hash_key_to_config) { + key = dma_alloc_coherent(&priv->pdev->dev, + priv->rss_key_size, + &key_bus, GFP_KERNEL); + if (!key) { + err = -ENOMEM; + goto out; + } + + memcpy(key, hash_key_to_config, priv->rss_key_size); + } + + /* Zero-valued fields in the cmd.configure_rss instruct the device to + * not update those fields. + */ + memset(&cmd, 0, sizeof(cmd)); + cmd.opcode = cpu_to_be32(GVE_ADMINQ_CONFIGURE_RSS); + cmd.configure_rss = (struct gve_adminq_configure_rss) { + .hash_types = cpu_to_be16(BIT(GVE_RSS_HASH_TCPV4) | + BIT(GVE_RSS_HASH_UDPV4) | + BIT(GVE_RSS_HASH_TCPV6) | + BIT(GVE_RSS_HASH_UDPV6)), + .hash_alg = hash_alg, + .hash_key_size = + cpu_to_be16((key_bus) ? priv->rss_key_size : 0), + .hash_lut_size = + cpu_to_be16((lut_bus) ? priv->rss_lut_size : 0), + .hash_key_addr = cpu_to_be64(key_bus), + .hash_lut_addr = cpu_to_be64(lut_bus), + }; + + err = gve_adminq_execute_cmd(priv, &cmd); + +out: + if (lut) + dma_free_coherent(&priv->pdev->dev, + priv->rss_lut_size * sizeof(*lut), + lut, lut_bus); + if (key) + dma_free_coherent(&priv->pdev->dev, + priv->rss_key_size, key, key_bus); + return err; +} + +/* In the dma memory that the driver allocated for the device to query the flow rules, the device + * will first write it with a struct of gve_query_flow_rules_descriptor. Next to it, the device + * will write an array of rules or rule ids with the count that specified in the descriptor. + * For GVE_FLOW_RULE_QUERY_STATS, the device will only write the descriptor. + */ +static int gve_adminq_process_flow_rules_query(struct gve_priv *priv, u16 query_opcode, + struct gve_query_flow_rules_descriptor *descriptor) +{ + struct gve_flow_rules_cache *flow_rules_cache = &priv->flow_rules_cache; + u32 num_queried_rules, total_memory_len, rule_info_len; + void *rule_info; + + total_memory_len = be32_to_cpu(descriptor->total_length); + num_queried_rules = be32_to_cpu(descriptor->num_queried_rules); + rule_info = (void *)(descriptor + 1); + + switch (query_opcode) { + case GVE_FLOW_RULE_QUERY_RULES: + rule_info_len = num_queried_rules * sizeof(*flow_rules_cache->rules_cache); + if (sizeof(*descriptor) + rule_info_len != total_memory_len) { + dev_err(&priv->dev->dev, "flow rules query is out of memory.\n"); + return -ENOMEM; + } + + memcpy(flow_rules_cache->rules_cache, rule_info, rule_info_len); + flow_rules_cache->rules_cache_num = num_queried_rules; + break; + case GVE_FLOW_RULE_QUERY_IDS: + rule_info_len = num_queried_rules * sizeof(*flow_rules_cache->rule_ids_cache); + if (sizeof(*descriptor) + rule_info_len != total_memory_len) { + dev_err(&priv->dev->dev, "flow rule ids query is out of memory.\n"); + return -ENOMEM; + } + + memcpy(flow_rules_cache->rule_ids_cache, rule_info, rule_info_len); + flow_rules_cache->rule_ids_cache_num = num_queried_rules; + break; + case GVE_FLOW_RULE_QUERY_STATS: + priv->num_flow_rules = be32_to_cpu(descriptor->num_flow_rules); + priv->max_flow_rules = be32_to_cpu(descriptor->max_flow_rules); + return 0; + default: + return -EINVAL; + } + + return 0; +} + +int gve_adminq_query_flow_rules(struct gve_priv *priv, u16 query_opcode, u32 starting_loc) +{ + struct gve_query_flow_rules_descriptor *descriptor; + union gve_adminq_command cmd; + dma_addr_t descriptor_bus; + int err = 0; + + memset(&cmd, 0, sizeof(cmd)); + descriptor = dma_pool_alloc(priv->adminq_pool, GFP_KERNEL, &descriptor_bus); + if (!descriptor) + return -ENOMEM; + + cmd.opcode = cpu_to_be32(GVE_ADMINQ_QUERY_FLOW_RULES); + cmd.query_flow_rules = (struct gve_adminq_query_flow_rules) { + .opcode = cpu_to_be16(query_opcode), + .starting_rule_id = cpu_to_be32(starting_loc), + .available_length = cpu_to_be64(GVE_ADMINQ_BUFFER_SIZE), + .rule_descriptor_addr = cpu_to_be64(descriptor_bus), + }; + err = gve_adminq_execute_cmd(priv, &cmd); + if (err) + goto out; + + err = gve_adminq_process_flow_rules_query(priv, query_opcode, descriptor); + +out: + dma_pool_free(priv->adminq_pool, descriptor, descriptor_bus); + return err; +} + +static int gve_adminq_process_rss_query(struct gve_priv *priv, + struct gve_query_rss_descriptor *descriptor, + struct ethtool_rxfh_param *rxfh) +{ + u32 total_memory_length; + u16 hash_lut_length; + void *rss_info_addr; + __be32 *lut; + u16 i; + + total_memory_length = be32_to_cpu(descriptor->total_length); + hash_lut_length = priv->rss_lut_size * sizeof(*rxfh->indir); + + if (sizeof(*descriptor) + priv->rss_key_size + hash_lut_length != total_memory_length) { + dev_err(&priv->dev->dev, + "rss query desc from device has invalid length parameter.\n"); + return -EINVAL; + } + + rxfh->hfunc = descriptor->hash_alg; + + rss_info_addr = (void *)(descriptor + 1); + if (rxfh->key) { + rxfh->key_size = priv->rss_key_size; + memcpy(rxfh->key, rss_info_addr, priv->rss_key_size); + } + + rss_info_addr += priv->rss_key_size; + lut = (__be32 *)rss_info_addr; + if (rxfh->indir) { + rxfh->indir_size = priv->rss_lut_size; + for (i = 0; i < priv->rss_lut_size; i++) + rxfh->indir[i] = be32_to_cpu(lut[i]); + } + + return 0; +} + +int gve_adminq_query_rss_config(struct gve_priv *priv, struct ethtool_rxfh_param *rxfh) +{ + struct gve_query_rss_descriptor *descriptor; + union gve_adminq_command cmd; + dma_addr_t descriptor_bus; + int err = 0; + + descriptor = dma_pool_alloc(priv->adminq_pool, GFP_KERNEL, &descriptor_bus); + if (!descriptor) + return -ENOMEM; + + memset(&cmd, 0, sizeof(cmd)); + cmd.opcode = cpu_to_be32(GVE_ADMINQ_QUERY_RSS); + cmd.query_rss = (struct gve_adminq_query_rss) { + .available_length = cpu_to_be64(GVE_ADMINQ_BUFFER_SIZE), + .rss_descriptor_addr = cpu_to_be64(descriptor_bus), + }; + err = gve_adminq_execute_cmd(priv, &cmd); + if (err) + goto out; + + err = gve_adminq_process_rss_query(priv, descriptor, rxfh); + +out: + dma_pool_free(priv->adminq_pool, descriptor, descriptor_bus); + return err; +} diff --git a/drivers/net/ethernet/google/gve/gve_adminq.h b/drivers/net/ethernet/google/gve/gve_adminq.h index 5865ccdccbd0..228217458275 100644 --- a/drivers/net/ethernet/google/gve/gve_adminq.h +++ b/drivers/net/ethernet/google/gve/gve_adminq.h @@ -20,11 +20,26 @@ enum gve_adminq_opcodes { GVE_ADMINQ_DESTROY_TX_QUEUE = 0x7, GVE_ADMINQ_DESTROY_RX_QUEUE = 0x8, GVE_ADMINQ_DECONFIGURE_DEVICE_RESOURCES = 0x9, + GVE_ADMINQ_CONFIGURE_RSS = 0xA, GVE_ADMINQ_SET_DRIVER_PARAMETER = 0xB, GVE_ADMINQ_REPORT_STATS = 0xC, GVE_ADMINQ_REPORT_LINK_SPEED = 0xD, GVE_ADMINQ_GET_PTYPE_MAP = 0xE, GVE_ADMINQ_VERIFY_DRIVER_COMPATIBILITY = 0xF, + GVE_ADMINQ_QUERY_FLOW_RULES = 0x10, + GVE_ADMINQ_QUERY_RSS = 0x12, + + /* For commands that are larger than 56 bytes */ + GVE_ADMINQ_EXTENDED_COMMAND = 0xFF, +}; + +/* The normal adminq command is restricted to be 56 bytes at maximum. For the + * longer adminq command, it is wrapped by GVE_ADMINQ_EXTENDED_COMMAND with + * inner opcode of gve_adminq_extended_cmd_opcodes specified. The inner command + * is written in the dma memory allocated by GVE_ADMINQ_EXTENDED_COMMAND. + */ +enum gve_adminq_extended_cmd_opcodes { + GVE_ADMINQ_CONFIGURE_FLOW_RULE = 0x101, }; /* Admin queue status codes */ @@ -103,8 +118,7 @@ static_assert(sizeof(struct gve_device_option_gqi_qpl) == 4); struct gve_device_option_dqo_rda { __be32 supported_features_mask; - __be16 tx_comp_ring_entries; - __be16 rx_buff_ring_entries; + __be32 reserved; }; static_assert(sizeof(struct gve_device_option_dqo_rda) == 8); @@ -125,6 +139,41 @@ struct gve_device_option_jumbo_frames { static_assert(sizeof(struct gve_device_option_jumbo_frames) == 8); +struct gve_device_option_buffer_sizes { + /* GVE_SUP_BUFFER_SIZES_MASK bit should be set */ + __be32 supported_features_mask; + __be16 packet_buffer_size; + __be16 header_buffer_size; +}; + +static_assert(sizeof(struct gve_device_option_buffer_sizes) == 8); + +struct gve_device_option_modify_ring { + __be32 supported_featured_mask; + __be16 max_rx_ring_size; + __be16 max_tx_ring_size; + __be16 min_rx_ring_size; + __be16 min_tx_ring_size; +}; + +static_assert(sizeof(struct gve_device_option_modify_ring) == 12); + +struct gve_device_option_flow_steering { + __be32 supported_features_mask; + __be32 reserved; + __be32 max_flow_rules; +}; + +static_assert(sizeof(struct gve_device_option_flow_steering) == 12); + +struct gve_device_option_rss_config { + __be32 supported_features_mask; + __be16 hash_key_size; + __be16 hash_lut_size; +}; + +static_assert(sizeof(struct gve_device_option_rss_config) == 8); + /* Terminology: * * RDA - Raw DMA Addressing - Buffers associated with SKBs are directly DMA @@ -134,25 +183,37 @@ static_assert(sizeof(struct gve_device_option_jumbo_frames) == 8); * the device for read/write and data is copied from/to SKBs. */ enum gve_dev_opt_id { - GVE_DEV_OPT_ID_GQI_RAW_ADDRESSING = 0x1, - GVE_DEV_OPT_ID_GQI_RDA = 0x2, - GVE_DEV_OPT_ID_GQI_QPL = 0x3, - GVE_DEV_OPT_ID_DQO_RDA = 0x4, - GVE_DEV_OPT_ID_DQO_QPL = 0x7, - GVE_DEV_OPT_ID_JUMBO_FRAMES = 0x8, + GVE_DEV_OPT_ID_GQI_RAW_ADDRESSING = 0x1, + GVE_DEV_OPT_ID_GQI_RDA = 0x2, + GVE_DEV_OPT_ID_GQI_QPL = 0x3, + GVE_DEV_OPT_ID_DQO_RDA = 0x4, + GVE_DEV_OPT_ID_MODIFY_RING = 0x6, + GVE_DEV_OPT_ID_DQO_QPL = 0x7, + GVE_DEV_OPT_ID_JUMBO_FRAMES = 0x8, + GVE_DEV_OPT_ID_BUFFER_SIZES = 0xa, + GVE_DEV_OPT_ID_FLOW_STEERING = 0xb, + GVE_DEV_OPT_ID_RSS_CONFIG = 0xe, }; enum gve_dev_opt_req_feat_mask { - GVE_DEV_OPT_REQ_FEAT_MASK_GQI_RAW_ADDRESSING = 0x0, - GVE_DEV_OPT_REQ_FEAT_MASK_GQI_RDA = 0x0, - GVE_DEV_OPT_REQ_FEAT_MASK_GQI_QPL = 0x0, - GVE_DEV_OPT_REQ_FEAT_MASK_DQO_RDA = 0x0, - GVE_DEV_OPT_REQ_FEAT_MASK_JUMBO_FRAMES = 0x0, - GVE_DEV_OPT_REQ_FEAT_MASK_DQO_QPL = 0x0, + GVE_DEV_OPT_REQ_FEAT_MASK_GQI_RAW_ADDRESSING = 0x0, + GVE_DEV_OPT_REQ_FEAT_MASK_GQI_RDA = 0x0, + GVE_DEV_OPT_REQ_FEAT_MASK_GQI_QPL = 0x0, + GVE_DEV_OPT_REQ_FEAT_MASK_DQO_RDA = 0x0, + GVE_DEV_OPT_REQ_FEAT_MASK_JUMBO_FRAMES = 0x0, + GVE_DEV_OPT_REQ_FEAT_MASK_DQO_QPL = 0x0, + GVE_DEV_OPT_REQ_FEAT_MASK_BUFFER_SIZES = 0x0, + GVE_DEV_OPT_REQ_FEAT_MASK_MODIFY_RING = 0x0, + GVE_DEV_OPT_REQ_FEAT_MASK_FLOW_STEERING = 0x0, + GVE_DEV_OPT_REQ_FEAT_MASK_RSS_CONFIG = 0x0, }; enum gve_sup_feature_mask { - GVE_SUP_JUMBO_FRAMES_MASK = 1 << 2, + GVE_SUP_MODIFY_RING_MASK = 1 << 0, + GVE_SUP_JUMBO_FRAMES_MASK = 1 << 2, + GVE_SUP_BUFFER_SIZES_MASK = 1 << 4, + GVE_SUP_FLOW_STEERING_MASK = 1 << 5, + GVE_SUP_RSS_CONFIG_MASK = 1 << 7, }; #define GVE_DEV_OPT_LEN_GQI_RAW_ADDRESSING 0x0 @@ -165,6 +226,8 @@ enum gve_driver_capbility { gve_driver_capability_dqo_qpl = 2, /* reserved for future use */ gve_driver_capability_dqo_rda = 3, gve_driver_capability_alt_miss_compl = 4, + gve_driver_capability_flexible_buffer_size = 5, + gve_driver_capability_flexible_rss_size = 6, }; #define GVE_CAP1(a) BIT((int)a) @@ -176,12 +239,22 @@ enum gve_driver_capbility { (GVE_CAP1(gve_driver_capability_gqi_qpl) | \ GVE_CAP1(gve_driver_capability_gqi_rda) | \ GVE_CAP1(gve_driver_capability_dqo_rda) | \ - GVE_CAP1(gve_driver_capability_alt_miss_compl)) + GVE_CAP1(gve_driver_capability_alt_miss_compl) | \ + GVE_CAP1(gve_driver_capability_flexible_buffer_size) | \ + GVE_CAP1(gve_driver_capability_flexible_rss_size)) #define GVE_DRIVER_CAPABILITY_FLAGS2 0x0 #define GVE_DRIVER_CAPABILITY_FLAGS3 0x0 #define GVE_DRIVER_CAPABILITY_FLAGS4 0x0 +struct gve_adminq_extended_command { + __be32 inner_opcode; + __be32 inner_length; + __be64 inner_command_addr; +}; + +static_assert(sizeof(struct gve_adminq_extended_command) == 16); + struct gve_driver_info { u8 os_type; /* 0x01 = Linux */ u8 driver_major; @@ -260,7 +333,9 @@ struct gve_adminq_create_rx_queue { __be16 packet_buffer_size; __be16 rx_buff_ring_size; u8 enable_rsc; - u8 padding[5]; + u8 padding1; + __be16 header_buffer_size; + u8 padding2[2]; }; static_assert(sizeof(struct gve_adminq_create_rx_queue) == 56); @@ -384,6 +459,109 @@ struct gve_adminq_get_ptype_map { __be64 ptype_map_addr; }; +/* Flow-steering related definitions */ +enum gve_adminq_flow_rule_cfg_opcode { + GVE_FLOW_RULE_CFG_ADD = 0, + GVE_FLOW_RULE_CFG_DEL = 1, + GVE_FLOW_RULE_CFG_RESET = 2, +}; + +enum gve_adminq_flow_rule_query_opcode { + GVE_FLOW_RULE_QUERY_RULES = 0, + GVE_FLOW_RULE_QUERY_IDS = 1, + GVE_FLOW_RULE_QUERY_STATS = 2, +}; + +enum gve_adminq_flow_type { + GVE_FLOW_TYPE_TCPV4, + GVE_FLOW_TYPE_UDPV4, + GVE_FLOW_TYPE_SCTPV4, + GVE_FLOW_TYPE_AHV4, + GVE_FLOW_TYPE_ESPV4, + GVE_FLOW_TYPE_TCPV6, + GVE_FLOW_TYPE_UDPV6, + GVE_FLOW_TYPE_SCTPV6, + GVE_FLOW_TYPE_AHV6, + GVE_FLOW_TYPE_ESPV6, +}; + +/* Flow-steering command */ +struct gve_adminq_flow_rule { + __be16 flow_type; + __be16 action; /* RX queue id */ + struct gve_flow_spec key; + struct gve_flow_spec mask; +}; + +struct gve_adminq_configure_flow_rule { + __be16 opcode; + u8 padding[2]; + struct gve_adminq_flow_rule rule; + __be32 location; +}; + +static_assert(sizeof(struct gve_adminq_configure_flow_rule) == 92); + +struct gve_query_flow_rules_descriptor { + __be32 num_flow_rules; + __be32 max_flow_rules; + __be32 num_queried_rules; + __be32 total_length; +}; + +struct gve_adminq_queried_flow_rule { + __be32 location; + struct gve_adminq_flow_rule flow_rule; +}; + +struct gve_adminq_query_flow_rules { + __be16 opcode; + u8 padding[2]; + __be32 starting_rule_id; + __be64 available_length; /* The dma memory length that the driver allocated */ + __be64 rule_descriptor_addr; /* The dma memory address */ +}; + +static_assert(sizeof(struct gve_adminq_query_flow_rules) == 24); + +enum gve_rss_hash_type { + GVE_RSS_HASH_IPV4, + GVE_RSS_HASH_TCPV4, + GVE_RSS_HASH_IPV6, + GVE_RSS_HASH_IPV6_EX, + GVE_RSS_HASH_TCPV6, + GVE_RSS_HASH_TCPV6_EX, + GVE_RSS_HASH_UDPV4, + GVE_RSS_HASH_UDPV6, + GVE_RSS_HASH_UDPV6_EX, +}; + +struct gve_adminq_configure_rss { + __be16 hash_types; + u8 hash_alg; + u8 reserved; + __be16 hash_key_size; + __be16 hash_lut_size; + __be64 hash_key_addr; + __be64 hash_lut_addr; +}; + +static_assert(sizeof(struct gve_adminq_configure_rss) == 24); + +struct gve_query_rss_descriptor { + __be32 total_length; + __be16 hash_types; + u8 hash_alg; + u8 reserved; +}; + +struct gve_adminq_query_rss { + __be64 available_length; + __be64 rss_descriptor_addr; +}; + +static_assert(sizeof(struct gve_adminq_query_rss) == 16); + union gve_adminq_command { struct { __be32 opcode; @@ -404,6 +582,10 @@ union gve_adminq_command { struct gve_adminq_get_ptype_map get_ptype_map; struct gve_adminq_verify_driver_compatibility verify_driver_compatibility; + struct gve_adminq_query_flow_rules query_flow_rules; + struct gve_adminq_configure_rss configure_rss; + struct gve_adminq_query_rss query_rss; + struct gve_adminq_extended_command extended_command; }; }; u8 reserved[64]; @@ -423,18 +605,25 @@ int gve_adminq_configure_device_resources(struct gve_priv *priv, int gve_adminq_deconfigure_device_resources(struct gve_priv *priv); int gve_adminq_create_tx_queues(struct gve_priv *priv, u32 start_id, u32 num_queues); int gve_adminq_destroy_tx_queues(struct gve_priv *priv, u32 start_id, u32 num_queues); +int gve_adminq_create_single_rx_queue(struct gve_priv *priv, u32 queue_index); int gve_adminq_create_rx_queues(struct gve_priv *priv, u32 num_queues); +int gve_adminq_destroy_single_rx_queue(struct gve_priv *priv, u32 queue_index); int gve_adminq_destroy_rx_queues(struct gve_priv *priv, u32 queue_id); int gve_adminq_register_page_list(struct gve_priv *priv, struct gve_queue_page_list *qpl); int gve_adminq_unregister_page_list(struct gve_priv *priv, u32 page_list_id); -int gve_adminq_set_mtu(struct gve_priv *priv, u64 mtu); int gve_adminq_report_stats(struct gve_priv *priv, u64 stats_report_len, dma_addr_t stats_report_addr, u64 interval); int gve_adminq_verify_driver_compatibility(struct gve_priv *priv, u64 driver_info_len, dma_addr_t driver_info_addr); int gve_adminq_report_link_speed(struct gve_priv *priv); +int gve_adminq_add_flow_rule(struct gve_priv *priv, struct gve_adminq_flow_rule *rule, u32 loc); +int gve_adminq_del_flow_rule(struct gve_priv *priv, u32 loc); +int gve_adminq_reset_flow_rules(struct gve_priv *priv); +int gve_adminq_query_flow_rules(struct gve_priv *priv, u16 query_opcode, u32 starting_loc); +int gve_adminq_configure_rss(struct gve_priv *priv, struct ethtool_rxfh_param *rxfh); +int gve_adminq_query_rss_config(struct gve_priv *priv, struct ethtool_rxfh_param *rxfh); struct gve_ptype_lut; int gve_adminq_get_ptype_map_dqo(struct gve_priv *priv, diff --git a/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c b/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c new file mode 100644 index 000000000000..a71883e1d920 --- /dev/null +++ b/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c @@ -0,0 +1,316 @@ +// SPDX-License-Identifier: (GPL-2.0 OR MIT) +/* Google virtual Ethernet (gve) driver + * + * Copyright (C) 2015-2024 Google, Inc. + */ + +#include "gve.h" +#include "gve_utils.h" + +int gve_buf_ref_cnt(struct gve_rx_buf_state_dqo *bs) +{ + return page_count(bs->page_info.page) - bs->page_info.pagecnt_bias; +} + +struct gve_rx_buf_state_dqo *gve_alloc_buf_state(struct gve_rx_ring *rx) +{ + struct gve_rx_buf_state_dqo *buf_state; + s16 buffer_id; + + buffer_id = rx->dqo.free_buf_states; + if (unlikely(buffer_id == -1)) + return NULL; + + buf_state = &rx->dqo.buf_states[buffer_id]; + + /* Remove buf_state from free list */ + rx->dqo.free_buf_states = buf_state->next; + + /* Point buf_state to itself to mark it as allocated */ + buf_state->next = buffer_id; + + return buf_state; +} + +bool gve_buf_state_is_allocated(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state) +{ + s16 buffer_id = buf_state - rx->dqo.buf_states; + + return buf_state->next == buffer_id; +} + +void gve_free_buf_state(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state) +{ + s16 buffer_id = buf_state - rx->dqo.buf_states; + + buf_state->next = rx->dqo.free_buf_states; + rx->dqo.free_buf_states = buffer_id; +} + +struct gve_rx_buf_state_dqo *gve_dequeue_buf_state(struct gve_rx_ring *rx, + struct gve_index_list *list) +{ + struct gve_rx_buf_state_dqo *buf_state; + s16 buffer_id; + + buffer_id = list->head; + if (unlikely(buffer_id == -1)) + return NULL; + + buf_state = &rx->dqo.buf_states[buffer_id]; + + /* Remove buf_state from list */ + list->head = buf_state->next; + if (buf_state->next == -1) + list->tail = -1; + + /* Point buf_state to itself to mark it as allocated */ + buf_state->next = buffer_id; + + return buf_state; +} + +void gve_enqueue_buf_state(struct gve_rx_ring *rx, struct gve_index_list *list, + struct gve_rx_buf_state_dqo *buf_state) +{ + s16 buffer_id = buf_state - rx->dqo.buf_states; + + buf_state->next = -1; + + if (list->head == -1) { + list->head = buffer_id; + list->tail = buffer_id; + } else { + int tail = list->tail; + + rx->dqo.buf_states[tail].next = buffer_id; + list->tail = buffer_id; + } +} + +struct gve_rx_buf_state_dqo *gve_get_recycled_buf_state(struct gve_rx_ring *rx) +{ + struct gve_rx_buf_state_dqo *buf_state; + int i; + + /* Recycled buf states are immediately usable. */ + buf_state = gve_dequeue_buf_state(rx, &rx->dqo.recycled_buf_states); + if (likely(buf_state)) + return buf_state; + + if (unlikely(rx->dqo.used_buf_states.head == -1)) + return NULL; + + /* Used buf states are only usable when ref count reaches 0, which means + * no SKBs refer to them. + * + * Search a limited number before giving up. + */ + for (i = 0; i < 5; i++) { + buf_state = gve_dequeue_buf_state(rx, &rx->dqo.used_buf_states); + if (gve_buf_ref_cnt(buf_state) == 0) { + rx->dqo.used_buf_states_cnt--; + return buf_state; + } + + gve_enqueue_buf_state(rx, &rx->dqo.used_buf_states, buf_state); + } + + return NULL; +} + +int gve_alloc_qpl_page_dqo(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state) +{ + struct gve_priv *priv = rx->gve; + u32 idx; + + idx = rx->dqo.next_qpl_page_idx; + if (idx >= gve_get_rx_pages_per_qpl_dqo(priv->rx_desc_cnt)) { + net_err_ratelimited("%s: Out of QPL pages\n", + priv->dev->name); + return -ENOMEM; + } + buf_state->page_info.page = rx->dqo.qpl->pages[idx]; + buf_state->addr = rx->dqo.qpl->page_buses[idx]; + rx->dqo.next_qpl_page_idx++; + buf_state->page_info.page_offset = 0; + buf_state->page_info.page_address = + page_address(buf_state->page_info.page); + buf_state->page_info.buf_size = rx->packet_buffer_truesize; + buf_state->page_info.pad = rx->rx_headroom; + buf_state->last_single_ref_offset = 0; + + /* The page already has 1 ref. */ + page_ref_add(buf_state->page_info.page, INT_MAX - 1); + buf_state->page_info.pagecnt_bias = INT_MAX; + + return 0; +} + +void gve_free_qpl_page_dqo(struct gve_rx_buf_state_dqo *buf_state) +{ + if (!buf_state->page_info.page) + return; + + page_ref_sub(buf_state->page_info.page, + buf_state->page_info.pagecnt_bias - 1); + buf_state->page_info.page = NULL; +} + +void gve_try_recycle_buf(struct gve_priv *priv, struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state) +{ + const u16 data_buffer_size = rx->packet_buffer_truesize; + int pagecount; + + /* Can't reuse if we only fit one buffer per page */ + if (data_buffer_size * 2 > PAGE_SIZE) + goto mark_used; + + pagecount = gve_buf_ref_cnt(buf_state); + + /* Record the offset when we have a single remaining reference. + * + * When this happens, we know all of the other offsets of the page are + * usable. + */ + if (pagecount == 1) { + buf_state->last_single_ref_offset = + buf_state->page_info.page_offset; + } + + /* Use the next buffer sized chunk in the page. */ + buf_state->page_info.page_offset += data_buffer_size; + buf_state->page_info.page_offset &= (PAGE_SIZE - 1); + + /* If we wrap around to the same offset without ever dropping to 1 + * reference, then we don't know if this offset was ever freed. + */ + if (buf_state->page_info.page_offset == + buf_state->last_single_ref_offset) { + goto mark_used; + } + + gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state); + return; + +mark_used: + gve_enqueue_buf_state(rx, &rx->dqo.used_buf_states, buf_state); + rx->dqo.used_buf_states_cnt++; +} + +void gve_free_to_page_pool(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state, + bool allow_direct) +{ + netmem_ref netmem = buf_state->page_info.netmem; + + if (!netmem) + return; + + page_pool_put_full_netmem(netmem_get_pp(netmem), netmem, allow_direct); + buf_state->page_info.netmem = 0; +} + +static int gve_alloc_from_page_pool(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state) +{ + netmem_ref netmem; + + buf_state->page_info.buf_size = rx->packet_buffer_truesize; + netmem = page_pool_alloc_netmem(rx->dqo.page_pool, + &buf_state->page_info.page_offset, + &buf_state->page_info.buf_size, + GFP_ATOMIC); + + if (!netmem) + return -ENOMEM; + + buf_state->page_info.netmem = netmem; + buf_state->page_info.page_address = netmem_address(netmem); + buf_state->addr = page_pool_get_dma_addr_netmem(netmem); + buf_state->page_info.pad = rx->dqo.page_pool->p.offset; + + return 0; +} + +struct page_pool *gve_rx_create_page_pool(struct gve_priv *priv, + struct gve_rx_ring *rx, + bool xdp) +{ + u32 ntfy_id = gve_rx_idx_to_ntfy(priv, rx->q_num); + struct page_pool_params pp = { + .flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV, + .order = 0, + .pool_size = GVE_PAGE_POOL_SIZE_MULTIPLIER * priv->rx_desc_cnt, + .dev = &priv->pdev->dev, + .netdev = priv->dev, + .napi = &priv->ntfy_blocks[ntfy_id].napi, + .max_len = PAGE_SIZE, + .dma_dir = xdp ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE, + .offset = xdp ? XDP_PACKET_HEADROOM : 0, + }; + + return page_pool_create(&pp); +} + +void gve_free_buffer(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state) +{ + if (rx->dqo.page_pool) { + gve_free_to_page_pool(rx, buf_state, true); + gve_free_buf_state(rx, buf_state); + } else { + gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, + buf_state); + } +} + +void gve_reuse_buffer(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state) +{ + if (rx->dqo.page_pool) { + buf_state->page_info.netmem = 0; + gve_free_buf_state(rx, buf_state); + } else { + gve_dec_pagecnt_bias(&buf_state->page_info); + gve_try_recycle_buf(rx->gve, rx, buf_state); + } +} + +int gve_alloc_buffer(struct gve_rx_ring *rx, struct gve_rx_desc_dqo *desc) +{ + struct gve_rx_buf_state_dqo *buf_state; + + if (rx->dqo.page_pool) { + buf_state = gve_alloc_buf_state(rx); + if (WARN_ON_ONCE(!buf_state)) + return -ENOMEM; + + if (gve_alloc_from_page_pool(rx, buf_state)) + goto free_buf_state; + } else { + buf_state = gve_get_recycled_buf_state(rx); + if (unlikely(!buf_state)) { + buf_state = gve_alloc_buf_state(rx); + if (unlikely(!buf_state)) + return -ENOMEM; + + if (unlikely(gve_alloc_qpl_page_dqo(rx, buf_state))) + goto free_buf_state; + } + } + desc->buf_id = cpu_to_le16(buf_state - rx->dqo.buf_states); + desc->buf_addr = cpu_to_le64(buf_state->addr + + buf_state->page_info.page_offset + + buf_state->page_info.pad); + + return 0; + +free_buf_state: + gve_free_buf_state(rx, buf_state); + return -ENOMEM; +} diff --git a/drivers/net/ethernet/google/gve/gve_dqo.h b/drivers/net/ethernet/google/gve/gve_dqo.h index c36b93f0de15..e83773fb891f 100644 --- a/drivers/net/ethernet/google/gve/gve_dqo.h +++ b/drivers/net/ethernet/google/gve/gve_dqo.h @@ -38,10 +38,24 @@ netdev_features_t gve_features_check_dqo(struct sk_buff *skb, netdev_features_t features); bool gve_tx_poll_dqo(struct gve_notify_block *block, bool do_clean); int gve_rx_poll_dqo(struct gve_notify_block *block, int budget); -int gve_tx_alloc_rings_dqo(struct gve_priv *priv); -void gve_tx_free_rings_dqo(struct gve_priv *priv); -int gve_rx_alloc_rings_dqo(struct gve_priv *priv); -void gve_rx_free_rings_dqo(struct gve_priv *priv); +int gve_tx_alloc_rings_dqo(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *cfg); +void gve_tx_free_rings_dqo(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *cfg); +void gve_tx_start_ring_dqo(struct gve_priv *priv, int idx); +void gve_tx_stop_ring_dqo(struct gve_priv *priv, int idx); +int gve_rx_alloc_ring_dqo(struct gve_priv *priv, + struct gve_rx_alloc_rings_cfg *cfg, + struct gve_rx_ring *rx, + int idx); +void gve_rx_free_ring_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, + struct gve_rx_alloc_rings_cfg *cfg); +int gve_rx_alloc_rings_dqo(struct gve_priv *priv, + struct gve_rx_alloc_rings_cfg *cfg); +void gve_rx_free_rings_dqo(struct gve_priv *priv, + struct gve_rx_alloc_rings_cfg *cfg); +void gve_rx_start_ring_dqo(struct gve_priv *priv, int idx); +void gve_rx_stop_ring_dqo(struct gve_priv *priv, int idx); int gve_clean_tx_done_dqo(struct gve_priv *priv, struct gve_tx_ring *tx, struct napi_struct *napi); void gve_rx_post_buffers_dqo(struct gve_rx_ring *rx); @@ -93,4 +107,6 @@ gve_set_itr_coalesce_usecs_dqo(struct gve_priv *priv, gve_write_irq_doorbell_dqo(priv, block, gve_setup_itr_interval_dqo(usecs)); } + +int gve_napi_poll_dqo(struct napi_struct *napi, int budget); #endif /* _GVE_DQO_H_ */ diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c index e5397aa1e48f..3c1da0cf3f61 100644 --- a/drivers/net/ethernet/google/gve/gve_ethtool.c +++ b/drivers/net/ethernet/google/gve/gve_ethtool.c @@ -1,14 +1,14 @@ // SPDX-License-Identifier: (GPL-2.0 OR MIT) /* Google virtual Ethernet (gve) driver * - * Copyright (C) 2015-2021 Google, Inc. + * Copyright (C) 2015-2024 Google LLC */ -#include <linux/ethtool.h> #include <linux/rtnetlink.h> #include "gve.h" #include "gve_adminq.h" #include "gve_dqo.h" +#include "gve_utils.h" static void gve_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *info) @@ -40,17 +40,18 @@ static u32 gve_get_msglevel(struct net_device *netdev) * as declared in enum xdp_action inside file uapi/linux/bpf.h . */ static const char gve_gstrings_main_stats[][ETH_GSTRING_LEN] = { - "rx_packets", "tx_packets", "rx_bytes", "tx_bytes", - "rx_dropped", "tx_dropped", "tx_timeouts", + "rx_packets", "rx_hsplit_pkt", "tx_packets", "rx_bytes", + "tx_bytes", "rx_dropped", "tx_dropped", "tx_timeouts", "rx_skb_alloc_fail", "rx_buf_alloc_fail", "rx_desc_err_dropped_pkt", + "rx_hsplit_unsplit_pkt", "interface_up_cnt", "interface_down_cnt", "reset_cnt", "page_alloc_fail", "dma_mapping_error", "stats_report_trigger_cnt", }; static const char gve_gstrings_rx_stats[][ETH_GSTRING_LEN] = { - "rx_posted_desc[%u]", "rx_completed_desc[%u]", "rx_consumed_desc[%u]", "rx_bytes[%u]", - "rx_cont_packet_cnt[%u]", "rx_frag_flip_cnt[%u]", "rx_frag_copy_cnt[%u]", - "rx_frag_alloc_cnt[%u]", + "rx_posted_desc[%u]", "rx_completed_desc[%u]", "rx_consumed_desc[%u]", + "rx_bytes[%u]", "rx_hsplit_bytes[%u]", "rx_cont_packet_cnt[%u]", + "rx_frag_flip_cnt[%u]", "rx_frag_copy_cnt[%u]", "rx_frag_alloc_cnt[%u]", "rx_dropped_pkt[%u]", "rx_copybreak_pkt[%u]", "rx_copied_pkt[%u]", "rx_queue_drop_cnt[%u]", "rx_no_buffers_posted[%u]", "rx_drops_packet_over_mru[%u]", "rx_drops_invalid_checksum[%u]", @@ -62,18 +63,20 @@ static const char gve_gstrings_rx_stats[][ETH_GSTRING_LEN] = { static const char gve_gstrings_tx_stats[][ETH_GSTRING_LEN] = { "tx_posted_desc[%u]", "tx_completed_desc[%u]", "tx_consumed_desc[%u]", "tx_bytes[%u]", "tx_wake[%u]", "tx_stop[%u]", "tx_event_counter[%u]", - "tx_dma_mapping_error[%u]", "tx_xsk_wakeup[%u]", - "tx_xsk_done[%u]", "tx_xsk_sent[%u]", "tx_xdp_xmit[%u]", "tx_xdp_xmit_errors[%u]" + "tx_dma_mapping_error[%u]", + "tx_xsk_sent[%u]", "tx_xdp_xmit[%u]", "tx_xdp_xmit_errors[%u]" }; -static const char gve_gstrings_adminq_stats[][ETH_GSTRING_LEN] = { +static const char gve_gstrings_adminq_stats[][ETH_GSTRING_LEN] __nonstring_array = { "adminq_prod_cnt", "adminq_cmd_fail", "adminq_timeouts", "adminq_describe_device_cnt", "adminq_cfg_device_resources_cnt", "adminq_register_page_list_cnt", "adminq_unregister_page_list_cnt", "adminq_create_tx_queue_cnt", "adminq_create_rx_queue_cnt", "adminq_destroy_tx_queue_cnt", "adminq_destroy_rx_queue_cnt", "adminq_dcfg_device_resources_cnt", "adminq_set_driver_parameter_cnt", - "adminq_report_stats_cnt", "adminq_report_link_speed_cnt" + "adminq_report_stats_cnt", "adminq_report_link_speed_cnt", "adminq_get_ptype_map_cnt", + "adminq_query_flow_rules", "adminq_cfg_flow_rule", "adminq_cfg_rss_cnt", + "adminq_query_rss_cnt", }; static const char gve_gstrings_priv_flags[][ETH_GSTRING_LEN] = { @@ -89,42 +92,34 @@ static const char gve_gstrings_priv_flags[][ETH_GSTRING_LEN] = { static void gve_get_strings(struct net_device *netdev, u32 stringset, u8 *data) { struct gve_priv *priv = netdev_priv(netdev); - char *s = (char *)data; + u8 *s = (char *)data; int num_tx_queues; int i, j; num_tx_queues = gve_num_tx_queues(priv); switch (stringset) { case ETH_SS_STATS: - memcpy(s, *gve_gstrings_main_stats, - sizeof(gve_gstrings_main_stats)); - s += sizeof(gve_gstrings_main_stats); - - for (i = 0; i < priv->rx_cfg.num_queues; i++) { - for (j = 0; j < NUM_GVE_RX_CNTS; j++) { - snprintf(s, ETH_GSTRING_LEN, - gve_gstrings_rx_stats[j], i); - s += ETH_GSTRING_LEN; - } - } + for (i = 0; i < ARRAY_SIZE(gve_gstrings_main_stats); i++) + ethtool_puts(&s, gve_gstrings_main_stats[i]); - for (i = 0; i < num_tx_queues; i++) { - for (j = 0; j < NUM_GVE_TX_CNTS; j++) { - snprintf(s, ETH_GSTRING_LEN, - gve_gstrings_tx_stats[j], i); - s += ETH_GSTRING_LEN; - } - } + for (i = 0; i < priv->rx_cfg.num_queues; i++) + for (j = 0; j < NUM_GVE_RX_CNTS; j++) + ethtool_sprintf(&s, gve_gstrings_rx_stats[j], + i); + + for (i = 0; i < num_tx_queues; i++) + for (j = 0; j < NUM_GVE_TX_CNTS; j++) + ethtool_sprintf(&s, gve_gstrings_tx_stats[j], + i); + + for (i = 0; i < ARRAY_SIZE(gve_gstrings_adminq_stats); i++) + ethtool_cpy(&s, gve_gstrings_adminq_stats[i]); - memcpy(s, *gve_gstrings_adminq_stats, - sizeof(gve_gstrings_adminq_stats)); - s += sizeof(gve_gstrings_adminq_stats); break; case ETH_SS_PRIV_FLAGS: - memcpy(s, *gve_gstrings_priv_flags, - sizeof(gve_gstrings_priv_flags)); - s += sizeof(gve_gstrings_priv_flags); + for (i = 0; i < ARRAY_SIZE(gve_gstrings_priv_flags); i++) + ethtool_puts(&s, gve_gstrings_priv_flags[i]); break; default: @@ -154,15 +149,19 @@ static void gve_get_ethtool_stats(struct net_device *netdev, struct ethtool_stats *stats, u64 *data) { - u64 tmp_rx_pkts, tmp_rx_bytes, tmp_rx_skb_alloc_fail, - tmp_rx_buf_alloc_fail, tmp_rx_desc_err_dropped_pkt, + u64 tmp_rx_pkts, tmp_rx_hsplit_pkt, tmp_rx_bytes, tmp_rx_hsplit_bytes, + tmp_rx_skb_alloc_fail, tmp_rx_buf_alloc_fail, + tmp_rx_desc_err_dropped_pkt, tmp_rx_hsplit_unsplit_pkt, tmp_tx_pkts, tmp_tx_bytes; - u64 rx_buf_alloc_fail, rx_desc_err_dropped_pkt, rx_pkts, - rx_skb_alloc_fail, rx_bytes, tx_pkts, tx_bytes, tx_dropped; + u64 rx_buf_alloc_fail, rx_desc_err_dropped_pkt, rx_hsplit_unsplit_pkt, + rx_pkts, rx_hsplit_pkt, rx_skb_alloc_fail, rx_bytes, tx_pkts, tx_bytes, + tx_dropped; int stats_idx, base_stats_idx, max_stats_idx; struct stats *report_stats; int *rx_qid_to_stats_idx; int *tx_qid_to_stats_idx; + int num_stopped_rxqs = 0; + int num_stopped_txqs = 0; struct gve_priv *priv; bool skip_nic_stats; unsigned int start; @@ -179,14 +178,27 @@ gve_get_ethtool_stats(struct net_device *netdev, sizeof(int), GFP_KERNEL); if (!rx_qid_to_stats_idx) return; + for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) { + rx_qid_to_stats_idx[ring] = -1; + if (!gve_rx_was_added_to_block(priv, ring)) + num_stopped_rxqs++; + } tx_qid_to_stats_idx = kmalloc_array(num_tx_queues, sizeof(int), GFP_KERNEL); if (!tx_qid_to_stats_idx) { kfree(rx_qid_to_stats_idx); return; } - for (rx_pkts = 0, rx_bytes = 0, rx_skb_alloc_fail = 0, - rx_buf_alloc_fail = 0, rx_desc_err_dropped_pkt = 0, ring = 0; + for (ring = 0; ring < num_tx_queues; ring++) { + tx_qid_to_stats_idx[ring] = -1; + if (!gve_tx_was_added_to_block(priv, ring)) + num_stopped_txqs++; + } + + for (rx_pkts = 0, rx_bytes = 0, rx_hsplit_pkt = 0, + rx_skb_alloc_fail = 0, rx_buf_alloc_fail = 0, + rx_desc_err_dropped_pkt = 0, rx_hsplit_unsplit_pkt = 0, + ring = 0; ring < priv->rx_cfg.num_queues; ring++) { if (priv->rx) { do { @@ -195,18 +207,23 @@ gve_get_ethtool_stats(struct net_device *netdev, start = u64_stats_fetch_begin(&priv->rx[ring].statss); tmp_rx_pkts = rx->rpackets; + tmp_rx_hsplit_pkt = rx->rx_hsplit_pkt; tmp_rx_bytes = rx->rbytes; tmp_rx_skb_alloc_fail = rx->rx_skb_alloc_fail; tmp_rx_buf_alloc_fail = rx->rx_buf_alloc_fail; tmp_rx_desc_err_dropped_pkt = rx->rx_desc_err_dropped_pkt; + tmp_rx_hsplit_unsplit_pkt = + rx->rx_hsplit_unsplit_pkt; } while (u64_stats_fetch_retry(&priv->rx[ring].statss, start)); rx_pkts += tmp_rx_pkts; + rx_hsplit_pkt += tmp_rx_hsplit_pkt; rx_bytes += tmp_rx_bytes; rx_skb_alloc_fail += tmp_rx_skb_alloc_fail; rx_buf_alloc_fail += tmp_rx_buf_alloc_fail; rx_desc_err_dropped_pkt += tmp_rx_desc_err_dropped_pkt; + rx_hsplit_unsplit_pkt += tmp_rx_hsplit_unsplit_pkt; } } for (tx_pkts = 0, tx_bytes = 0, tx_dropped = 0, ring = 0; @@ -227,6 +244,7 @@ gve_get_ethtool_stats(struct net_device *netdev, i = 0; data[i++] = rx_pkts; + data[i++] = rx_hsplit_pkt; data[i++] = tx_pkts; data[i++] = rx_bytes; data[i++] = tx_bytes; @@ -238,6 +256,7 @@ gve_get_ethtool_stats(struct net_device *netdev, data[i++] = rx_skb_alloc_fail; data[i++] = rx_buf_alloc_fail; data[i++] = rx_desc_err_dropped_pkt; + data[i++] = rx_hsplit_unsplit_pkt; data[i++] = priv->interface_up_cnt; data[i++] = priv->interface_down_cnt; data[i++] = priv->reset_cnt; @@ -249,7 +268,13 @@ gve_get_ethtool_stats(struct net_device *netdev, /* For rx cross-reporting stats, start from nic rx stats in report */ base_stats_idx = GVE_TX_STATS_REPORT_NUM * num_tx_queues + GVE_RX_STATS_REPORT_NUM * priv->rx_cfg.num_queues; - max_stats_idx = NIC_RX_STATS_REPORT_NUM * priv->rx_cfg.num_queues + + /* The boundary between driver stats and NIC stats shifts if there are + * stopped queues. + */ + base_stats_idx += NIC_RX_STATS_REPORT_NUM * num_stopped_rxqs + + NIC_TX_STATS_REPORT_NUM * num_stopped_txqs; + max_stats_idx = NIC_RX_STATS_REPORT_NUM * + (priv->rx_cfg.num_queues - num_stopped_rxqs) + base_stats_idx; /* Preprocess the stats report for rx, map queue id to start index */ skip_nic_stats = false; @@ -263,6 +288,10 @@ gve_get_ethtool_stats(struct net_device *netdev, skip_nic_stats = true; break; } + if (queue_id < 0 || queue_id >= priv->rx_cfg.num_queues) { + net_err_ratelimited("Invalid rxq id in NIC stats\n"); + continue; + } rx_qid_to_stats_idx[queue_id] = stats_idx; } /* walk RX rings */ @@ -277,6 +306,7 @@ gve_get_ethtool_stats(struct net_device *netdev, start = u64_stats_fetch_begin(&priv->rx[ring].statss); tmp_rx_bytes = rx->rbytes; + tmp_rx_hsplit_bytes = rx->rx_hsplit_bytes; tmp_rx_skb_alloc_fail = rx->rx_skb_alloc_fail; tmp_rx_buf_alloc_fail = rx->rx_buf_alloc_fail; tmp_rx_desc_err_dropped_pkt = @@ -284,6 +314,7 @@ gve_get_ethtool_stats(struct net_device *netdev, } while (u64_stats_fetch_retry(&priv->rx[ring].statss, start)); data[i++] = tmp_rx_bytes; + data[i++] = tmp_rx_hsplit_bytes; data[i++] = rx->rx_cont_packet_cnt; data[i++] = rx->rx_frag_flip_cnt; data[i++] = rx->rx_frag_copy_cnt; @@ -295,11 +326,11 @@ gve_get_ethtool_stats(struct net_device *netdev, data[i++] = rx->rx_copybreak_pkt; data[i++] = rx->rx_copied_pkt; /* stats from NIC */ - if (skip_nic_stats) { + stats_idx = rx_qid_to_stats_idx[ring]; + if (skip_nic_stats || stats_idx < 0) { /* skip NIC rx stats */ i += NIC_RX_STATS_REPORT_NUM; } else { - stats_idx = rx_qid_to_stats_idx[ring]; for (j = 0; j < NIC_RX_STATS_REPORT_NUM; j++) { u64 value = be64_to_cpu(report_stats[stats_idx + j].value); @@ -325,7 +356,8 @@ gve_get_ethtool_stats(struct net_device *netdev, /* For tx cross-reporting stats, start from nic tx stats in report */ base_stats_idx = max_stats_idx; - max_stats_idx = NIC_TX_STATS_REPORT_NUM * num_tx_queues + + max_stats_idx = NIC_TX_STATS_REPORT_NUM * + (num_tx_queues - num_stopped_txqs) + max_stats_idx; /* Preprocess the stats report for tx, map queue id to start index */ skip_nic_stats = false; @@ -339,6 +371,10 @@ gve_get_ethtool_stats(struct net_device *netdev, skip_nic_stats = true; break; } + if (queue_id < 0 || queue_id >= num_tx_queues) { + net_err_ratelimited("Invalid txq id in NIC stats\n"); + continue; + } tx_qid_to_stats_idx[queue_id] = stats_idx; } /* walk TX rings */ @@ -356,7 +392,9 @@ gve_get_ethtool_stats(struct net_device *netdev, */ data[i++] = 0; data[i++] = 0; - data[i++] = tx->dqo_tx.tail - tx->dqo_tx.head; + data[i++] = + (tx->dqo_tx.tail - tx->dqo_tx.head) & + tx->mask; } do { start = @@ -370,20 +408,18 @@ gve_get_ethtool_stats(struct net_device *netdev, data[i++] = gve_tx_load_event_counter(priv, tx); data[i++] = tx->dma_mapping_error; /* stats from NIC */ - if (skip_nic_stats) { + stats_idx = tx_qid_to_stats_idx[ring]; + if (skip_nic_stats || stats_idx < 0) { /* skip NIC tx stats */ i += NIC_TX_STATS_REPORT_NUM; } else { - stats_idx = tx_qid_to_stats_idx[ring]; for (j = 0; j < NIC_TX_STATS_REPORT_NUM; j++) { u64 value = be64_to_cpu(report_stats[stats_idx + j].value); data[i++] = value; } } - /* XDP xsk counters */ - data[i++] = tx->xdp_xsk_wakeup; - data[i++] = tx->xdp_xsk_done; + /* XDP counters */ do { start = u64_stats_fetch_begin(&priv->tx[ring].statss); data[i] = tx->xdp_xsk_sent; @@ -415,6 +451,11 @@ gve_get_ethtool_stats(struct net_device *netdev, data[i++] = priv->adminq_set_driver_parameter_cnt; data[i++] = priv->adminq_report_stats_cnt; data[i++] = priv->adminq_report_link_speed_cnt; + data[i++] = priv->adminq_get_ptype_map_cnt; + data[i++] = priv->adminq_query_flow_rules_cnt; + data[i++] = priv->adminq_cfg_flow_rule_cnt; + data[i++] = priv->adminq_cfg_rss_cnt; + data[i++] = priv->adminq_query_rss_cnt; } static void gve_get_channels(struct net_device *netdev, @@ -436,11 +477,12 @@ static int gve_set_channels(struct net_device *netdev, struct ethtool_channels *cmd) { struct gve_priv *priv = netdev_priv(netdev); - struct gve_queue_config new_tx_cfg = priv->tx_cfg; - struct gve_queue_config new_rx_cfg = priv->rx_cfg; + struct gve_tx_queue_config new_tx_cfg = priv->tx_cfg; + struct gve_rx_queue_config new_rx_cfg = priv->rx_cfg; struct ethtool_channels old_settings; int new_tx = cmd->tx_count; int new_rx = cmd->rx_count; + bool reset_rss = false; gve_get_channels(netdev, &old_settings); @@ -451,22 +493,27 @@ static int gve_set_channels(struct net_device *netdev, if (!new_rx || !new_tx) return -EINVAL; - if (priv->num_xdp_queues && - (new_tx != new_rx || (2 * new_tx > priv->tx_cfg.max_queues))) { - dev_err(&priv->pdev->dev, "XDP load failed: The number of configured RX queues should be equal to the number of configured TX queues and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues"); - return -EINVAL; - } + if (priv->xdp_prog) { + if (new_tx != new_rx || + (2 * new_tx > priv->tx_cfg.max_queues)) { + dev_err(&priv->pdev->dev, "The number of configured RX queues should be equal to the number of configured TX queues and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues when XDP program is installed"); + return -EINVAL; + } - if (!netif_carrier_ok(netdev)) { - priv->tx_cfg.num_queues = new_tx; - priv->rx_cfg.num_queues = new_rx; - return 0; + /* One XDP TX queue per RX queue. */ + new_tx_cfg.num_xdp_queues = new_rx; + } else { + new_tx_cfg.num_xdp_queues = 0; } + if (new_rx != priv->rx_cfg.num_queues && + priv->cache_rss_config && !netif_is_rxfh_configured(netdev)) + reset_rss = true; + new_tx_cfg.num_queues = new_tx; new_rx_cfg.num_queues = new_rx; - return gve_adjust_queues(priv, new_rx_cfg, new_tx_cfg); + return gve_adjust_queues(priv, new_rx_cfg, new_tx_cfg, reset_rss); } static void gve_get_ringparam(struct net_device *netdev, @@ -476,10 +523,94 @@ static void gve_get_ringparam(struct net_device *netdev, { struct gve_priv *priv = netdev_priv(netdev); - cmd->rx_max_pending = priv->rx_desc_cnt; - cmd->tx_max_pending = priv->tx_desc_cnt; + cmd->rx_max_pending = priv->max_rx_desc_cnt; + cmd->tx_max_pending = priv->max_tx_desc_cnt; cmd->rx_pending = priv->rx_desc_cnt; cmd->tx_pending = priv->tx_desc_cnt; + + if (!gve_header_split_supported(priv)) + kernel_cmd->tcp_data_split = ETHTOOL_TCP_DATA_SPLIT_UNKNOWN; + else if (priv->header_split_enabled) + kernel_cmd->tcp_data_split = ETHTOOL_TCP_DATA_SPLIT_ENABLED; + else + kernel_cmd->tcp_data_split = ETHTOOL_TCP_DATA_SPLIT_DISABLED; +} + +static int gve_adjust_ring_sizes(struct gve_priv *priv, + u16 new_tx_desc_cnt, + u16 new_rx_desc_cnt) +{ + struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; + struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; + int err; + + /* get current queue configuration */ + gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); + + /* copy over the new ring_size from ethtool */ + tx_alloc_cfg.ring_size = new_tx_desc_cnt; + rx_alloc_cfg.ring_size = new_rx_desc_cnt; + + if (netif_running(priv->dev)) { + err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); + if (err) + return err; + } + + /* Set new ring_size for the next up */ + priv->tx_desc_cnt = new_tx_desc_cnt; + priv->rx_desc_cnt = new_rx_desc_cnt; + + return 0; +} + +static int gve_validate_req_ring_size(struct gve_priv *priv, u16 new_tx_desc_cnt, + u16 new_rx_desc_cnt) +{ + /* check for valid range */ + if (new_tx_desc_cnt < priv->min_tx_desc_cnt || + new_tx_desc_cnt > priv->max_tx_desc_cnt || + new_rx_desc_cnt < priv->min_rx_desc_cnt || + new_rx_desc_cnt > priv->max_rx_desc_cnt) { + dev_err(&priv->pdev->dev, "Requested descriptor count out of range\n"); + return -EINVAL; + } + + if (!is_power_of_2(new_tx_desc_cnt) || !is_power_of_2(new_rx_desc_cnt)) { + dev_err(&priv->pdev->dev, "Requested descriptor count has to be a power of 2\n"); + return -EINVAL; + } + return 0; +} + +static int gve_set_ringparam(struct net_device *netdev, + struct ethtool_ringparam *cmd, + struct kernel_ethtool_ringparam *kernel_cmd, + struct netlink_ext_ack *extack) +{ + struct gve_priv *priv = netdev_priv(netdev); + u16 new_tx_cnt, new_rx_cnt; + int err; + + err = gve_set_hsplit_config(priv, kernel_cmd->tcp_data_split); + if (err) + return err; + + if (cmd->tx_pending == priv->tx_desc_cnt && cmd->rx_pending == priv->rx_desc_cnt) + return 0; + + if (!priv->modify_ring_size_enabled) { + dev_err(&priv->pdev->dev, "Modify ring size is not supported.\n"); + return -EOPNOTSUPP; + } + + new_tx_cnt = cmd->tx_pending; + new_rx_cnt = cmd->rx_pending; + + if (gve_validate_req_ring_size(priv, new_tx_cnt, new_rx_cnt)) + return -EINVAL; + + return gve_adjust_ring_sizes(priv, new_tx_cnt, new_rx_cnt); } static int gve_user_reset(struct net_device *netdev, u32 *flags) @@ -518,8 +649,7 @@ static int gve_set_tunable(struct net_device *netdev, switch (etuna->id) { case ETHTOOL_RX_COPYBREAK: { - u32 max_copybreak = gve_is_gqi(priv) ? - GVE_DEFAULT_RX_BUFFER_SIZE : priv->data_buffer_size_dqo; + u32 max_copybreak = priv->rx_cfg.packet_buffer_size; len = *(u32 *)value; if (len > max_copybreak) @@ -575,7 +705,7 @@ static int gve_set_priv_flags(struct net_device *netdev, u32 flags) memset(priv->stats_report->stats, 0, (tx_stats_num + rx_stats_num) * sizeof(struct stats)); - del_timer_sync(&priv->stats_report_timer); + timer_delete_sync(&priv->stats_report_timer); } return 0; } @@ -653,8 +783,154 @@ static int gve_set_coalesce(struct net_device *netdev, return 0; } +static int gve_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd) +{ + struct gve_priv *priv = netdev_priv(netdev); + int err = 0; + + if (!(netdev->features & NETIF_F_NTUPLE)) + return -EOPNOTSUPP; + + switch (cmd->cmd) { + case ETHTOOL_SRXCLSRLINS: + err = gve_add_flow_rule(priv, cmd); + break; + case ETHTOOL_SRXCLSRLDEL: + err = gve_del_flow_rule(priv, cmd); + break; + case ETHTOOL_SRXFH: + err = -EOPNOTSUPP; + break; + default: + err = -EOPNOTSUPP; + break; + } + + return err; +} + +static int gve_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd, u32 *rule_locs) +{ + struct gve_priv *priv = netdev_priv(netdev); + int err = 0; + + switch (cmd->cmd) { + case ETHTOOL_GRXRINGS: + cmd->data = priv->rx_cfg.num_queues; + break; + case ETHTOOL_GRXCLSRLCNT: + if (!priv->max_flow_rules) + return -EOPNOTSUPP; + + err = gve_adminq_query_flow_rules(priv, GVE_FLOW_RULE_QUERY_STATS, 0); + if (err) + return err; + + cmd->rule_cnt = priv->num_flow_rules; + cmd->data = priv->max_flow_rules; + break; + case ETHTOOL_GRXCLSRULE: + err = gve_get_flow_rule_entry(priv, cmd); + break; + case ETHTOOL_GRXCLSRLALL: + err = gve_get_flow_rule_ids(priv, cmd, (u32 *)rule_locs); + break; + case ETHTOOL_GRXFH: + err = -EOPNOTSUPP; + break; + default: + err = -EOPNOTSUPP; + break; + } + + return err; +} + +static u32 gve_get_rxfh_key_size(struct net_device *netdev) +{ + struct gve_priv *priv = netdev_priv(netdev); + + return priv->rss_key_size; +} + +static u32 gve_get_rxfh_indir_size(struct net_device *netdev) +{ + struct gve_priv *priv = netdev_priv(netdev); + + return priv->rss_lut_size; +} + +static void gve_get_rss_config_cache(struct gve_priv *priv, + struct ethtool_rxfh_param *rxfh) +{ + struct gve_rss_config *rss_config = &priv->rss_config; + + rxfh->hfunc = ETH_RSS_HASH_TOP; + + if (rxfh->key) { + rxfh->key_size = priv->rss_key_size; + memcpy(rxfh->key, rss_config->hash_key, priv->rss_key_size); + } + + if (rxfh->indir) { + rxfh->indir_size = priv->rss_lut_size; + memcpy(rxfh->indir, rss_config->hash_lut, + priv->rss_lut_size * sizeof(*rxfh->indir)); + } +} + +static int gve_get_rxfh(struct net_device *netdev, struct ethtool_rxfh_param *rxfh) +{ + struct gve_priv *priv = netdev_priv(netdev); + + if (!priv->rss_key_size || !priv->rss_lut_size) + return -EOPNOTSUPP; + + if (priv->cache_rss_config) { + gve_get_rss_config_cache(priv, rxfh); + return 0; + } + + return gve_adminq_query_rss_config(priv, rxfh); +} + +static void gve_set_rss_config_cache(struct gve_priv *priv, + struct ethtool_rxfh_param *rxfh) +{ + struct gve_rss_config *rss_config = &priv->rss_config; + + if (rxfh->key) + memcpy(rss_config->hash_key, rxfh->key, priv->rss_key_size); + + if (rxfh->indir) + memcpy(rss_config->hash_lut, rxfh->indir, + priv->rss_lut_size * sizeof(*rxfh->indir)); +} + +static int gve_set_rxfh(struct net_device *netdev, struct ethtool_rxfh_param *rxfh, + struct netlink_ext_ack *extack) +{ + struct gve_priv *priv = netdev_priv(netdev); + int err; + + if (!priv->rss_key_size || !priv->rss_lut_size) + return -EOPNOTSUPP; + + err = gve_adminq_configure_rss(priv, rxfh); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Fail to configure RSS config"); + return err; + } + + if (priv->cache_rss_config) + gve_set_rss_config_cache(priv, rxfh); + + return 0; +} + const struct ethtool_ops gve_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS, + .supported_ring_params = ETHTOOL_RING_USE_TCP_DATA_SPLIT, .get_drvinfo = gve_get_drvinfo, .get_strings = gve_get_strings, .get_sset_count = gve_get_sset_count, @@ -663,14 +939,22 @@ const struct ethtool_ops gve_ethtool_ops = { .get_msglevel = gve_get_msglevel, .set_channels = gve_set_channels, .get_channels = gve_get_channels, + .set_rxnfc = gve_set_rxnfc, + .get_rxnfc = gve_get_rxnfc, + .get_rxfh_indir_size = gve_get_rxfh_indir_size, + .get_rxfh_key_size = gve_get_rxfh_key_size, + .get_rxfh = gve_get_rxfh, + .set_rxfh = gve_set_rxfh, .get_link = ethtool_op_get_link, .get_coalesce = gve_get_coalesce, .set_coalesce = gve_set_coalesce, .get_ringparam = gve_get_ringparam, + .set_ringparam = gve_set_ringparam, .reset = gve_user_reset, .get_tunable = gve_get_tunable, .set_tunable = gve_set_tunable, .get_priv_flags = gve_get_priv_flags, .set_priv_flags = gve_set_priv_flags, - .get_link_ksettings = gve_get_link_ksettings + .get_link_ksettings = gve_get_link_ksettings, + .get_ts_info = ethtool_op_get_ts_info, }; diff --git a/drivers/net/ethernet/google/gve/gve_flow_rule.c b/drivers/net/ethernet/google/gve/gve_flow_rule.c new file mode 100644 index 000000000000..0bb8cd1876a3 --- /dev/null +++ b/drivers/net/ethernet/google/gve/gve_flow_rule.c @@ -0,0 +1,298 @@ +// SPDX-License-Identifier: (GPL-2.0 OR MIT) +/* Google virtual Ethernet (gve) driver + * + * Copyright (C) 2015-2024 Google LLC + */ + +#include "gve.h" +#include "gve_adminq.h" + +static +int gve_fill_ethtool_flow_spec(struct ethtool_rx_flow_spec *fsp, + struct gve_adminq_queried_flow_rule *rule) +{ + struct gve_adminq_flow_rule *flow_rule = &rule->flow_rule; + static const u16 flow_type_lut[] = { + [GVE_FLOW_TYPE_TCPV4] = TCP_V4_FLOW, + [GVE_FLOW_TYPE_UDPV4] = UDP_V4_FLOW, + [GVE_FLOW_TYPE_SCTPV4] = SCTP_V4_FLOW, + [GVE_FLOW_TYPE_AHV4] = AH_V4_FLOW, + [GVE_FLOW_TYPE_ESPV4] = ESP_V4_FLOW, + [GVE_FLOW_TYPE_TCPV6] = TCP_V6_FLOW, + [GVE_FLOW_TYPE_UDPV6] = UDP_V6_FLOW, + [GVE_FLOW_TYPE_SCTPV6] = SCTP_V6_FLOW, + [GVE_FLOW_TYPE_AHV6] = AH_V6_FLOW, + [GVE_FLOW_TYPE_ESPV6] = ESP_V6_FLOW, + }; + + if (be16_to_cpu(flow_rule->flow_type) >= ARRAY_SIZE(flow_type_lut)) + return -EINVAL; + + fsp->flow_type = flow_type_lut[be16_to_cpu(flow_rule->flow_type)]; + + memset(&fsp->h_u, 0, sizeof(fsp->h_u)); + memset(&fsp->h_ext, 0, sizeof(fsp->h_ext)); + memset(&fsp->m_u, 0, sizeof(fsp->m_u)); + memset(&fsp->m_ext, 0, sizeof(fsp->m_ext)); + + switch (fsp->flow_type) { + case TCP_V4_FLOW: + case UDP_V4_FLOW: + case SCTP_V4_FLOW: + fsp->h_u.tcp_ip4_spec.ip4src = flow_rule->key.src_ip[0]; + fsp->h_u.tcp_ip4_spec.ip4dst = flow_rule->key.dst_ip[0]; + fsp->h_u.tcp_ip4_spec.psrc = flow_rule->key.src_port; + fsp->h_u.tcp_ip4_spec.pdst = flow_rule->key.dst_port; + fsp->h_u.tcp_ip4_spec.tos = flow_rule->key.tos; + fsp->m_u.tcp_ip4_spec.ip4src = flow_rule->mask.src_ip[0]; + fsp->m_u.tcp_ip4_spec.ip4dst = flow_rule->mask.dst_ip[0]; + fsp->m_u.tcp_ip4_spec.psrc = flow_rule->mask.src_port; + fsp->m_u.tcp_ip4_spec.pdst = flow_rule->mask.dst_port; + fsp->m_u.tcp_ip4_spec.tos = flow_rule->mask.tos; + break; + case AH_V4_FLOW: + case ESP_V4_FLOW: + fsp->h_u.ah_ip4_spec.ip4src = flow_rule->key.src_ip[0]; + fsp->h_u.ah_ip4_spec.ip4dst = flow_rule->key.dst_ip[0]; + fsp->h_u.ah_ip4_spec.spi = flow_rule->key.spi; + fsp->h_u.ah_ip4_spec.tos = flow_rule->key.tos; + fsp->m_u.ah_ip4_spec.ip4src = flow_rule->mask.src_ip[0]; + fsp->m_u.ah_ip4_spec.ip4dst = flow_rule->mask.dst_ip[0]; + fsp->m_u.ah_ip4_spec.spi = flow_rule->mask.spi; + fsp->m_u.ah_ip4_spec.tos = flow_rule->mask.tos; + break; + case TCP_V6_FLOW: + case UDP_V6_FLOW: + case SCTP_V6_FLOW: + memcpy(fsp->h_u.tcp_ip6_spec.ip6src, &flow_rule->key.src_ip, + sizeof(struct in6_addr)); + memcpy(fsp->h_u.tcp_ip6_spec.ip6dst, &flow_rule->key.dst_ip, + sizeof(struct in6_addr)); + fsp->h_u.tcp_ip6_spec.psrc = flow_rule->key.src_port; + fsp->h_u.tcp_ip6_spec.pdst = flow_rule->key.dst_port; + fsp->h_u.tcp_ip6_spec.tclass = flow_rule->key.tclass; + memcpy(fsp->m_u.tcp_ip6_spec.ip6src, &flow_rule->mask.src_ip, + sizeof(struct in6_addr)); + memcpy(fsp->m_u.tcp_ip6_spec.ip6dst, &flow_rule->mask.dst_ip, + sizeof(struct in6_addr)); + fsp->m_u.tcp_ip6_spec.psrc = flow_rule->mask.src_port; + fsp->m_u.tcp_ip6_spec.pdst = flow_rule->mask.dst_port; + fsp->m_u.tcp_ip6_spec.tclass = flow_rule->mask.tclass; + break; + case AH_V6_FLOW: + case ESP_V6_FLOW: + memcpy(fsp->h_u.ah_ip6_spec.ip6src, &flow_rule->key.src_ip, + sizeof(struct in6_addr)); + memcpy(fsp->h_u.ah_ip6_spec.ip6dst, &flow_rule->key.dst_ip, + sizeof(struct in6_addr)); + fsp->h_u.ah_ip6_spec.spi = flow_rule->key.spi; + fsp->h_u.ah_ip6_spec.tclass = flow_rule->key.tclass; + memcpy(fsp->m_u.ah_ip6_spec.ip6src, &flow_rule->mask.src_ip, + sizeof(struct in6_addr)); + memcpy(fsp->m_u.ah_ip6_spec.ip6dst, &flow_rule->mask.dst_ip, + sizeof(struct in6_addr)); + fsp->m_u.ah_ip6_spec.spi = flow_rule->mask.spi; + fsp->m_u.ah_ip6_spec.tclass = flow_rule->mask.tclass; + break; + default: + return -EINVAL; + } + + fsp->ring_cookie = be16_to_cpu(flow_rule->action); + + return 0; +} + +static int gve_generate_flow_rule(struct gve_priv *priv, struct ethtool_rx_flow_spec *fsp, + struct gve_adminq_flow_rule *rule) +{ + static const u16 flow_type_lut[] = { + [TCP_V4_FLOW] = GVE_FLOW_TYPE_TCPV4, + [UDP_V4_FLOW] = GVE_FLOW_TYPE_UDPV4, + [SCTP_V4_FLOW] = GVE_FLOW_TYPE_SCTPV4, + [AH_V4_FLOW] = GVE_FLOW_TYPE_AHV4, + [ESP_V4_FLOW] = GVE_FLOW_TYPE_ESPV4, + [TCP_V6_FLOW] = GVE_FLOW_TYPE_TCPV6, + [UDP_V6_FLOW] = GVE_FLOW_TYPE_UDPV6, + [SCTP_V6_FLOW] = GVE_FLOW_TYPE_SCTPV6, + [AH_V6_FLOW] = GVE_FLOW_TYPE_AHV6, + [ESP_V6_FLOW] = GVE_FLOW_TYPE_ESPV6, + }; + u32 flow_type; + + if (fsp->ring_cookie == RX_CLS_FLOW_DISC) + return -EOPNOTSUPP; + + if (fsp->ring_cookie >= priv->rx_cfg.num_queues) + return -EINVAL; + + rule->action = cpu_to_be16(fsp->ring_cookie); + + flow_type = fsp->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT | FLOW_RSS); + if (!flow_type || flow_type >= ARRAY_SIZE(flow_type_lut)) + return -EINVAL; + + rule->flow_type = cpu_to_be16(flow_type_lut[flow_type]); + + switch (flow_type) { + case TCP_V4_FLOW: + case UDP_V4_FLOW: + case SCTP_V4_FLOW: + rule->key.src_ip[0] = fsp->h_u.tcp_ip4_spec.ip4src; + rule->key.dst_ip[0] = fsp->h_u.tcp_ip4_spec.ip4dst; + rule->key.src_port = fsp->h_u.tcp_ip4_spec.psrc; + rule->key.dst_port = fsp->h_u.tcp_ip4_spec.pdst; + rule->mask.src_ip[0] = fsp->m_u.tcp_ip4_spec.ip4src; + rule->mask.dst_ip[0] = fsp->m_u.tcp_ip4_spec.ip4dst; + rule->mask.src_port = fsp->m_u.tcp_ip4_spec.psrc; + rule->mask.dst_port = fsp->m_u.tcp_ip4_spec.pdst; + break; + case AH_V4_FLOW: + case ESP_V4_FLOW: + rule->key.src_ip[0] = fsp->h_u.tcp_ip4_spec.ip4src; + rule->key.dst_ip[0] = fsp->h_u.tcp_ip4_spec.ip4dst; + rule->key.spi = fsp->h_u.ah_ip4_spec.spi; + rule->mask.src_ip[0] = fsp->m_u.tcp_ip4_spec.ip4src; + rule->mask.dst_ip[0] = fsp->m_u.tcp_ip4_spec.ip4dst; + rule->mask.spi = fsp->m_u.ah_ip4_spec.spi; + break; + case TCP_V6_FLOW: + case UDP_V6_FLOW: + case SCTP_V6_FLOW: + memcpy(&rule->key.src_ip, fsp->h_u.tcp_ip6_spec.ip6src, + sizeof(struct in6_addr)); + memcpy(&rule->key.dst_ip, fsp->h_u.tcp_ip6_spec.ip6dst, + sizeof(struct in6_addr)); + rule->key.src_port = fsp->h_u.tcp_ip6_spec.psrc; + rule->key.dst_port = fsp->h_u.tcp_ip6_spec.pdst; + memcpy(&rule->mask.src_ip, fsp->m_u.tcp_ip6_spec.ip6src, + sizeof(struct in6_addr)); + memcpy(&rule->mask.dst_ip, fsp->m_u.tcp_ip6_spec.ip6dst, + sizeof(struct in6_addr)); + rule->mask.src_port = fsp->m_u.tcp_ip6_spec.psrc; + rule->mask.dst_port = fsp->m_u.tcp_ip6_spec.pdst; + break; + case AH_V6_FLOW: + case ESP_V6_FLOW: + memcpy(&rule->key.src_ip, fsp->h_u.usr_ip6_spec.ip6src, + sizeof(struct in6_addr)); + memcpy(&rule->key.dst_ip, fsp->h_u.usr_ip6_spec.ip6dst, + sizeof(struct in6_addr)); + rule->key.spi = fsp->h_u.ah_ip6_spec.spi; + memcpy(&rule->mask.src_ip, fsp->m_u.usr_ip6_spec.ip6src, + sizeof(struct in6_addr)); + memcpy(&rule->mask.dst_ip, fsp->m_u.usr_ip6_spec.ip6dst, + sizeof(struct in6_addr)); + rule->key.spi = fsp->h_u.ah_ip6_spec.spi; + break; + default: + /* not doing un-parsed flow types */ + return -EINVAL; + } + + return 0; +} + +int gve_get_flow_rule_entry(struct gve_priv *priv, struct ethtool_rxnfc *cmd) +{ + struct gve_adminq_queried_flow_rule *rules_cache = priv->flow_rules_cache.rules_cache; + struct ethtool_rx_flow_spec *fsp = (struct ethtool_rx_flow_spec *)&cmd->fs; + u32 *cache_num = &priv->flow_rules_cache.rules_cache_num; + struct gve_adminq_queried_flow_rule *rule = NULL; + int err = 0; + u32 i; + + if (!priv->max_flow_rules) + return -EOPNOTSUPP; + + if (!priv->flow_rules_cache.rules_cache_synced || + fsp->location < be32_to_cpu(rules_cache[0].location) || + fsp->location > be32_to_cpu(rules_cache[*cache_num - 1].location)) { + err = gve_adminq_query_flow_rules(priv, GVE_FLOW_RULE_QUERY_RULES, fsp->location); + if (err) + return err; + + priv->flow_rules_cache.rules_cache_synced = true; + } + + for (i = 0; i < *cache_num; i++) { + if (fsp->location == be32_to_cpu(rules_cache[i].location)) { + rule = &rules_cache[i]; + break; + } + } + + if (!rule) + return -EINVAL; + + err = gve_fill_ethtool_flow_spec(fsp, rule); + + return err; +} + +int gve_get_flow_rule_ids(struct gve_priv *priv, struct ethtool_rxnfc *cmd, u32 *rule_locs) +{ + __be32 *rule_ids_cache = priv->flow_rules_cache.rule_ids_cache; + u32 *cache_num = &priv->flow_rules_cache.rule_ids_cache_num; + u32 starting_rule_id = 0; + u32 i = 0, j = 0; + int err = 0; + + if (!priv->max_flow_rules) + return -EOPNOTSUPP; + + do { + err = gve_adminq_query_flow_rules(priv, GVE_FLOW_RULE_QUERY_IDS, + starting_rule_id); + if (err) + return err; + + for (i = 0; i < *cache_num; i++) { + if (j >= cmd->rule_cnt) + return -EMSGSIZE; + + rule_locs[j++] = be32_to_cpu(rule_ids_cache[i]); + starting_rule_id = be32_to_cpu(rule_ids_cache[i]) + 1; + } + } while (*cache_num != 0); + cmd->data = priv->max_flow_rules; + + return err; +} + +int gve_add_flow_rule(struct gve_priv *priv, struct ethtool_rxnfc *cmd) +{ + struct ethtool_rx_flow_spec *fsp = &cmd->fs; + struct gve_adminq_flow_rule *rule = NULL; + int err; + + if (!priv->max_flow_rules) + return -EOPNOTSUPP; + + rule = kvzalloc(sizeof(*rule), GFP_KERNEL); + if (!rule) + return -ENOMEM; + + err = gve_generate_flow_rule(priv, fsp, rule); + if (err) + goto out; + + err = gve_adminq_add_flow_rule(priv, rule, fsp->location); + +out: + kvfree(rule); + if (err) + dev_err(&priv->pdev->dev, "Failed to add the flow rule: %u", fsp->location); + + return err; +} + +int gve_del_flow_rule(struct gve_priv *priv, struct ethtool_rxnfc *cmd) +{ + struct ethtool_rx_flow_spec *fsp = (struct ethtool_rx_flow_spec *)&cmd->fs; + + if (!priv->max_flow_rules) + return -EOPNOTSUPP; + + return gve_adminq_del_flow_rule(priv, fsp->location); +} diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 619bf63ec935..e1ffbd561fac 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: (GPL-2.0 OR MIT) /* Google virtual Ethernet (gve) driver * - * Copyright (C) 2015-2021 Google, Inc. + * Copyright (C) 2015-2024 Google LLC */ #include <linux/bpf.h> @@ -9,6 +9,7 @@ #include <linux/etherdevice.h> #include <linux/filter.h> #include <linux/interrupt.h> +#include <linux/irq.h> #include <linux/module.h> #include <linux/pci.h> #include <linux/sched.h> @@ -16,12 +17,14 @@ #include <linux/workqueue.h> #include <linux/utsname.h> #include <linux/version.h> +#include <net/netdev_queues.h> #include <net/sch_generic.h> #include <net/xdp_sock_drv.h> #include "gve.h" #include "gve_dqo.h" #include "gve_adminq.h" #include "gve_register.h" +#include "gve_utils.h" #define GVE_DEFAULT_RX_COPYBREAK (256) @@ -138,6 +141,86 @@ static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s) } } +static int gve_alloc_flow_rule_caches(struct gve_priv *priv) +{ + struct gve_flow_rules_cache *flow_rules_cache = &priv->flow_rules_cache; + int err = 0; + + if (!priv->max_flow_rules) + return 0; + + flow_rules_cache->rules_cache = + kvcalloc(GVE_FLOW_RULES_CACHE_SIZE, sizeof(*flow_rules_cache->rules_cache), + GFP_KERNEL); + if (!flow_rules_cache->rules_cache) { + dev_err(&priv->pdev->dev, "Cannot alloc flow rules cache\n"); + return -ENOMEM; + } + + flow_rules_cache->rule_ids_cache = + kvcalloc(GVE_FLOW_RULE_IDS_CACHE_SIZE, sizeof(*flow_rules_cache->rule_ids_cache), + GFP_KERNEL); + if (!flow_rules_cache->rule_ids_cache) { + dev_err(&priv->pdev->dev, "Cannot alloc flow rule ids cache\n"); + err = -ENOMEM; + goto free_rules_cache; + } + + return 0; + +free_rules_cache: + kvfree(flow_rules_cache->rules_cache); + flow_rules_cache->rules_cache = NULL; + return err; +} + +static void gve_free_flow_rule_caches(struct gve_priv *priv) +{ + struct gve_flow_rules_cache *flow_rules_cache = &priv->flow_rules_cache; + + kvfree(flow_rules_cache->rule_ids_cache); + flow_rules_cache->rule_ids_cache = NULL; + kvfree(flow_rules_cache->rules_cache); + flow_rules_cache->rules_cache = NULL; +} + +static int gve_alloc_rss_config_cache(struct gve_priv *priv) +{ + struct gve_rss_config *rss_config = &priv->rss_config; + + if (!priv->cache_rss_config) + return 0; + + rss_config->hash_key = kcalloc(priv->rss_key_size, + sizeof(rss_config->hash_key[0]), + GFP_KERNEL); + if (!rss_config->hash_key) + return -ENOMEM; + + rss_config->hash_lut = kcalloc(priv->rss_lut_size, + sizeof(rss_config->hash_lut[0]), + GFP_KERNEL); + if (!rss_config->hash_lut) + goto free_rss_key_cache; + + return 0; + +free_rss_key_cache: + kfree(rss_config->hash_key); + rss_config->hash_key = NULL; + return -ENOMEM; +} + +static void gve_free_rss_config_cache(struct gve_priv *priv) +{ + struct gve_rss_config *rss_config = &priv->rss_config; + + kfree(rss_config->hash_key); + kfree(rss_config->hash_lut); + + memset(rss_config, 0, sizeof(*rss_config)); +} + static int gve_alloc_counter_array(struct gve_priv *priv) { priv->counter_array = @@ -219,7 +302,7 @@ static void gve_free_stats_report(struct gve_priv *priv) if (!priv->stats_report) return; - del_timer_sync(&priv->stats_report_timer); + timer_delete_sync(&priv->stats_report_timer); dma_free_coherent(&priv->pdev->dev, priv->stats_report_len, priv->stats_report, priv->stats_report_bus); priv->stats_report = NULL; @@ -252,7 +335,19 @@ static irqreturn_t gve_intr_dqo(int irq, void *arg) return IRQ_HANDLED; } -static int gve_napi_poll(struct napi_struct *napi, int budget) +static int gve_is_napi_on_home_cpu(struct gve_priv *priv, u32 irq) +{ + int cpu_curr = smp_processor_id(); + const struct cpumask *aff_mask; + + aff_mask = irq_get_effective_affinity_mask(irq); + if (unlikely(!aff_mask)) + return 1; + + return cpumask_test_cpu(cpu_curr, aff_mask); +} + +int gve_napi_poll(struct napi_struct *napi, int budget) { struct gve_notify_block *block; __be32 __iomem *irq_doorbell; @@ -275,6 +370,14 @@ static int gve_napi_poll(struct napi_struct *napi, int budget) if (block->rx) { work_done = gve_rx_poll(block, budget); + + /* Poll XSK TX as part of RX NAPI. Setup re-poll based on max of + * TX and RX work done. + */ + if (priv->xdp_prog) + work_done = max_t(int, work_done, + gve_xsk_tx_poll(block, budget)); + reschedule |= work_done == budget; } @@ -302,7 +405,7 @@ static int gve_napi_poll(struct napi_struct *napi, int budget) return work_done; } -static int gve_napi_poll_dqo(struct napi_struct *napi, int budget) +int gve_napi_poll_dqo(struct napi_struct *napi, int budget) { struct gve_notify_block *block = container_of(napi, struct gve_notify_block, napi); @@ -321,8 +424,21 @@ static int gve_napi_poll_dqo(struct napi_struct *napi, int budget) reschedule |= work_done == budget; } - if (reschedule) - return budget; + if (reschedule) { + /* Reschedule by returning budget only if already on the correct + * cpu. + */ + if (likely(gve_is_napi_on_home_cpu(priv, block->irq))) + return budget; + + /* If not on the cpu with which this queue's irq has affinity + * with, we avoid rescheduling napi and arm the irq instead so + * that napi gets rescheduled back eventually onto the right + * cpu. + */ + if (work_done == budget) + work_done--; + } if (likely(napi_complete_done(napi, work_done))) { /* Enable interrupts again. @@ -427,6 +543,7 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv) "Failed to receive msix vector %d\n", i); goto abort_with_some_ntfy_blocks; } + block->irq = priv->msix_vectors[msix_idx].vector; irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, get_cpu_mask(i % active_cpus)); block->irq_db_index = &priv->irq_db_indices[i].index; @@ -440,6 +557,7 @@ abort_with_some_ntfy_blocks: irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, NULL); free_irq(priv->msix_vectors[msix_idx].vector, block); + block->irq = 0; } kvfree(priv->ntfy_blocks); priv->ntfy_blocks = NULL; @@ -473,6 +591,7 @@ static void gve_free_notify_blocks(struct gve_priv *priv) irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, NULL); free_irq(priv->msix_vectors[msix_idx].vector, block); + block->irq = 0; } free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); kvfree(priv->ntfy_blocks); @@ -490,9 +609,15 @@ static int gve_setup_device_resources(struct gve_priv *priv) { int err; - err = gve_alloc_counter_array(priv); + err = gve_alloc_flow_rule_caches(priv); if (err) return err; + err = gve_alloc_rss_config_cache(priv); + if (err) + goto abort_with_flow_rule_caches; + err = gve_alloc_counter_array(priv); + if (err) + goto abort_with_rss_config_cache; err = gve_alloc_notify_blocks(priv); if (err) goto abort_with_counter; @@ -526,6 +651,12 @@ static int gve_setup_device_resources(struct gve_priv *priv) } } + err = gve_init_rss_config(priv, priv->rx_cfg.num_queues); + if (err) { + dev_err(&priv->pdev->dev, "Failed to init RSS config"); + goto abort_with_ptype_lut; + } + err = gve_adminq_report_stats(priv, priv->stats_report_len, priv->stats_report_bus, GVE_STATS_REPORT_TIMER_PERIOD); @@ -544,6 +675,10 @@ abort_with_ntfy_blocks: gve_free_notify_blocks(priv); abort_with_counter: gve_free_counter_array(priv); +abort_with_rss_config_cache: + gve_free_rss_config_cache(priv); +abort_with_flow_rule_caches: + gve_free_flow_rule_caches(priv); return err; } @@ -556,6 +691,12 @@ static void gve_teardown_device_resources(struct gve_priv *priv) /* Tell device its resources are being freed */ if (gve_get_device_resources_ok(priv)) { + err = gve_flow_rules_reset(priv); + if (err) { + dev_err(&priv->pdev->dev, + "Failed to reset flow rules: err=%d\n", err); + gve_trigger_reset(priv); + } /* detach the stats report */ err = gve_adminq_report_stats(priv, 0, 0x0, GVE_STATS_REPORT_TIMER_PERIOD); if (err) { @@ -575,155 +716,131 @@ static void gve_teardown_device_resources(struct gve_priv *priv) kvfree(priv->ptype_lut_dqo); priv->ptype_lut_dqo = NULL; + gve_free_flow_rule_caches(priv); + gve_free_rss_config_cache(priv); gve_free_counter_array(priv); gve_free_notify_blocks(priv); gve_free_stats_report(priv); gve_clear_device_resources_ok(priv); } -static void gve_add_napi(struct gve_priv *priv, int ntfy_idx, - int (*gve_poll)(struct napi_struct *, int)) +static int gve_unregister_qpl(struct gve_priv *priv, + struct gve_queue_page_list *qpl) { - struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; + int err; - netif_napi_add(priv->dev, &block->napi, gve_poll); -} + if (!qpl) + return 0; -static void gve_remove_napi(struct gve_priv *priv, int ntfy_idx) -{ - struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; + err = gve_adminq_unregister_page_list(priv, qpl->id); + if (err) { + netif_err(priv, drv, priv->dev, + "Failed to unregister queue page list %d\n", + qpl->id); + return err; + } - netif_napi_del(&block->napi); + priv->num_registered_pages -= qpl->num_entries; + return 0; } -static int gve_register_xdp_qpls(struct gve_priv *priv) +static int gve_register_qpl(struct gve_priv *priv, + struct gve_queue_page_list *qpl) { - int start_id; + int pages; int err; - int i; - start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv)); - for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) { - err = gve_adminq_register_page_list(priv, &priv->qpls[i]); - if (err) { - netif_err(priv, drv, priv->dev, - "failed to register queue page list %d\n", - priv->qpls[i].id); - /* This failure will trigger a reset - no need to clean - * up - */ - return err; - } - } - return 0; -} + if (!qpl) + return 0; -static int gve_register_qpls(struct gve_priv *priv) -{ - int start_id; - int err; - int i; + pages = qpl->num_entries; - start_id = gve_tx_start_qpl_id(priv); - for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) { - err = gve_adminq_register_page_list(priv, &priv->qpls[i]); - if (err) { - netif_err(priv, drv, priv->dev, - "failed to register queue page list %d\n", - priv->qpls[i].id); - /* This failure will trigger a reset - no need to clean - * up - */ - return err; - } + if (pages + priv->num_registered_pages > priv->max_registered_pages) { + netif_err(priv, drv, priv->dev, + "Reached max number of registered pages %llu > %llu\n", + pages + priv->num_registered_pages, + priv->max_registered_pages); + return -EINVAL; } - start_id = gve_rx_start_qpl_id(priv); - for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) { - err = gve_adminq_register_page_list(priv, &priv->qpls[i]); - if (err) { - netif_err(priv, drv, priv->dev, - "failed to register queue page list %d\n", - priv->qpls[i].id); - /* This failure will trigger a reset - no need to clean - * up - */ - return err; - } + err = gve_adminq_register_page_list(priv, qpl); + if (err) { + netif_err(priv, drv, priv->dev, + "failed to register queue page list %d\n", + qpl->id); + return err; } + + priv->num_registered_pages += pages; return 0; } -static int gve_unregister_xdp_qpls(struct gve_priv *priv) +static struct gve_queue_page_list *gve_tx_get_qpl(struct gve_priv *priv, int idx) { - int start_id; - int err; - int i; + struct gve_tx_ring *tx = &priv->tx[idx]; - start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv)); - for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) { - err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id); - /* This failure will trigger a reset - no need to clean up */ - if (err) { - netif_err(priv, drv, priv->dev, - "Failed to unregister queue page list %d\n", - priv->qpls[i].id); - return err; - } - } - return 0; + if (gve_is_gqi(priv)) + return tx->tx_fifo.qpl; + else + return tx->dqo.qpl; } -static int gve_unregister_qpls(struct gve_priv *priv) +static struct gve_queue_page_list *gve_rx_get_qpl(struct gve_priv *priv, int idx) +{ + struct gve_rx_ring *rx = &priv->rx[idx]; + + if (gve_is_gqi(priv)) + return rx->data.qpl; + else + return rx->dqo.qpl; +} + +static int gve_register_qpls(struct gve_priv *priv) { - int start_id; + int num_tx_qpls, num_rx_qpls; int err; int i; - start_id = gve_tx_start_qpl_id(priv); - for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) { - err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id); - /* This failure will trigger a reset - no need to clean up */ - if (err) { - netif_err(priv, drv, priv->dev, - "Failed to unregister queue page list %d\n", - priv->qpls[i].id); + num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_is_qpl(priv)); + num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv)); + + for (i = 0; i < num_tx_qpls; i++) { + err = gve_register_qpl(priv, gve_tx_get_qpl(priv, i)); + if (err) return err; - } } - start_id = gve_rx_start_qpl_id(priv); - for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) { - err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id); - /* This failure will trigger a reset - no need to clean up */ - if (err) { - netif_err(priv, drv, priv->dev, - "Failed to unregister queue page list %d\n", - priv->qpls[i].id); + for (i = 0; i < num_rx_qpls; i++) { + err = gve_register_qpl(priv, gve_rx_get_qpl(priv, i)); + if (err) return err; - } } + return 0; } -static int gve_create_xdp_rings(struct gve_priv *priv) +static int gve_unregister_qpls(struct gve_priv *priv) { + int num_tx_qpls, num_rx_qpls; int err; + int i; - err = gve_adminq_create_tx_queues(priv, - gve_xdp_tx_start_queue_id(priv), - priv->num_xdp_queues); - if (err) { - netif_err(priv, drv, priv->dev, "failed to create %d XDP tx queues\n", - priv->num_xdp_queues); - /* This failure will trigger a reset - no need to clean - * up - */ - return err; + num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_is_qpl(priv)); + num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv)); + + for (i = 0; i < num_tx_qpls; i++) { + err = gve_unregister_qpl(priv, gve_tx_get_qpl(priv, i)); + /* This failure will trigger a reset - no need to clean */ + if (err) + return err; } - netif_dbg(priv, drv, priv->dev, "created %d XDP tx queues\n", - priv->num_xdp_queues); + for (i = 0; i < num_rx_qpls; i++) { + err = gve_unregister_qpl(priv, gve_rx_get_qpl(priv, i)); + /* This failure will trigger a reset - no need to clean */ + if (err) + return err; + } return 0; } @@ -776,143 +893,101 @@ static int gve_create_rings(struct gve_priv *priv) return 0; } -static void add_napi_init_xdp_sync_stats(struct gve_priv *priv, - int (*napi_poll)(struct napi_struct *napi, - int budget)) +static void init_xdp_sync_stats(struct gve_priv *priv) { int start_id = gve_xdp_tx_start_queue_id(priv); int i; - /* Add xdp tx napi & init sync stats*/ - for (i = start_id; i < start_id + priv->num_xdp_queues; i++) { + /* Init stats */ + for (i = start_id; i < start_id + priv->tx_cfg.num_xdp_queues; i++) { int ntfy_idx = gve_tx_idx_to_ntfy(priv, i); u64_stats_init(&priv->tx[i].statss); priv->tx[i].ntfy_id = ntfy_idx; - gve_add_napi(priv, ntfy_idx, napi_poll); } } -static void add_napi_init_sync_stats(struct gve_priv *priv, - int (*napi_poll)(struct napi_struct *napi, - int budget)) +static void gve_init_sync_stats(struct gve_priv *priv) { int i; - /* Add tx napi & init sync stats*/ - for (i = 0; i < gve_num_tx_queues(priv); i++) { - int ntfy_idx = gve_tx_idx_to_ntfy(priv, i); - + for (i = 0; i < priv->tx_cfg.num_queues; i++) u64_stats_init(&priv->tx[i].statss); - priv->tx[i].ntfy_id = ntfy_idx; - gve_add_napi(priv, ntfy_idx, napi_poll); - } - /* Add rx napi & init sync stats*/ - for (i = 0; i < priv->rx_cfg.num_queues; i++) { - int ntfy_idx = gve_rx_idx_to_ntfy(priv, i); + /* Init stats for XDP TX queues */ + init_xdp_sync_stats(priv); + + for (i = 0; i < priv->rx_cfg.num_queues; i++) u64_stats_init(&priv->rx[i].statss); - priv->rx[i].ntfy_id = ntfy_idx; - gve_add_napi(priv, ntfy_idx, napi_poll); - } } -static void gve_tx_free_rings(struct gve_priv *priv, int start_id, int num_rings) +static void gve_tx_get_curr_alloc_cfg(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *cfg) { - if (gve_is_gqi(priv)) { - gve_tx_free_rings_gqi(priv, start_id, num_rings); - } else { - gve_tx_free_rings_dqo(priv); - } + cfg->qcfg = &priv->tx_cfg; + cfg->raw_addressing = !gve_is_qpl(priv); + cfg->ring_size = priv->tx_desc_cnt; + cfg->num_xdp_rings = cfg->qcfg->num_xdp_queues; + cfg->tx = priv->tx; } -static int gve_alloc_xdp_rings(struct gve_priv *priv) +static void gve_tx_stop_rings(struct gve_priv *priv, int num_rings) { - int start_id; - int err = 0; + int i; - if (!priv->num_xdp_queues) - return 0; + if (!priv->tx) + return; - start_id = gve_xdp_tx_start_queue_id(priv); - err = gve_tx_alloc_rings(priv, start_id, priv->num_xdp_queues); - if (err) - return err; - add_napi_init_xdp_sync_stats(priv, gve_napi_poll); + for (i = 0; i < num_rings; i++) { + if (gve_is_gqi(priv)) + gve_tx_stop_ring_gqi(priv, i); + else + gve_tx_stop_ring_dqo(priv, i); + } +} - return 0; +static void gve_tx_start_rings(struct gve_priv *priv, int num_rings) +{ + int i; + + for (i = 0; i < num_rings; i++) { + if (gve_is_gqi(priv)) + gve_tx_start_ring_gqi(priv, i); + else + gve_tx_start_ring_dqo(priv, i); + } } -static int gve_alloc_rings(struct gve_priv *priv) +static int gve_queues_mem_alloc(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, + struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) { int err; - /* Setup tx rings */ - priv->tx = kvcalloc(priv->tx_cfg.max_queues, sizeof(*priv->tx), - GFP_KERNEL); - if (!priv->tx) - return -ENOMEM; - if (gve_is_gqi(priv)) - err = gve_tx_alloc_rings(priv, 0, gve_num_tx_queues(priv)); + err = gve_tx_alloc_rings_gqi(priv, tx_alloc_cfg); else - err = gve_tx_alloc_rings_dqo(priv); + err = gve_tx_alloc_rings_dqo(priv, tx_alloc_cfg); if (err) - goto free_tx; - - /* Setup rx rings */ - priv->rx = kvcalloc(priv->rx_cfg.max_queues, sizeof(*priv->rx), - GFP_KERNEL); - if (!priv->rx) { - err = -ENOMEM; - goto free_tx_queue; - } + return err; if (gve_is_gqi(priv)) - err = gve_rx_alloc_rings(priv); + err = gve_rx_alloc_rings_gqi(priv, rx_alloc_cfg); else - err = gve_rx_alloc_rings_dqo(priv); + err = gve_rx_alloc_rings_dqo(priv, rx_alloc_cfg); if (err) - goto free_rx; - - if (gve_is_gqi(priv)) - add_napi_init_sync_stats(priv, gve_napi_poll); - else - add_napi_init_sync_stats(priv, gve_napi_poll_dqo); + goto free_tx; return 0; -free_rx: - kvfree(priv->rx); - priv->rx = NULL; -free_tx_queue: - gve_tx_free_rings(priv, 0, gve_num_tx_queues(priv)); free_tx: - kvfree(priv->tx); - priv->tx = NULL; + if (gve_is_gqi(priv)) + gve_tx_free_rings_gqi(priv, tx_alloc_cfg); + else + gve_tx_free_rings_dqo(priv, tx_alloc_cfg); return err; } -static int gve_destroy_xdp_rings(struct gve_priv *priv) -{ - int start_id; - int err; - - start_id = gve_xdp_tx_start_queue_id(priv); - err = gve_adminq_destroy_tx_queues(priv, - start_id, - priv->num_xdp_queues); - if (err) { - netif_err(priv, drv, priv->dev, - "failed to destroy XDP queues\n"); - /* This failure will trigger a reset - no need to clean up */ - return err; - } - netif_dbg(priv, drv, priv->dev, "destroyed XDP queues\n"); - - return 0; -} - static int gve_destroy_rings(struct gve_priv *priv) { int num_tx_queues = gve_num_tx_queues(priv); @@ -937,52 +1012,16 @@ static int gve_destroy_rings(struct gve_priv *priv) return 0; } -static void gve_rx_free_rings(struct gve_priv *priv) +static void gve_queues_mem_free(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *tx_cfg, + struct gve_rx_alloc_rings_cfg *rx_cfg) { - if (gve_is_gqi(priv)) - gve_rx_free_rings_gqi(priv); - else - gve_rx_free_rings_dqo(priv); -} - -static void gve_free_xdp_rings(struct gve_priv *priv) -{ - int ntfy_idx, start_id; - int i; - - start_id = gve_xdp_tx_start_queue_id(priv); - if (priv->tx) { - for (i = start_id; i < start_id + priv->num_xdp_queues; i++) { - ntfy_idx = gve_tx_idx_to_ntfy(priv, i); - gve_remove_napi(priv, ntfy_idx); - } - gve_tx_free_rings(priv, start_id, priv->num_xdp_queues); - } -} - -static void gve_free_rings(struct gve_priv *priv) -{ - int num_tx_queues = gve_num_tx_queues(priv); - int ntfy_idx; - int i; - - if (priv->tx) { - for (i = 0; i < num_tx_queues; i++) { - ntfy_idx = gve_tx_idx_to_ntfy(priv, i); - gve_remove_napi(priv, ntfy_idx); - } - gve_tx_free_rings(priv, 0, num_tx_queues); - kvfree(priv->tx); - priv->tx = NULL; - } - if (priv->rx) { - for (i = 0; i < priv->rx_cfg.num_queues; i++) { - ntfy_idx = gve_rx_idx_to_ntfy(priv, i); - gve_remove_napi(priv, ntfy_idx); - } - gve_rx_free_rings(priv); - kvfree(priv->rx); - priv->rx = NULL; + if (gve_is_gqi(priv)) { + gve_tx_free_rings_gqi(priv, tx_cfg); + gve_rx_free_rings_gqi(priv, rx_cfg); + } else { + gve_tx_free_rings_dqo(priv, tx_cfg); + gve_rx_free_rings_dqo(priv, rx_cfg); } } @@ -1004,44 +1043,41 @@ int gve_alloc_page(struct gve_priv *priv, struct device *dev, return 0; } -static int gve_alloc_queue_page_list(struct gve_priv *priv, u32 id, - int pages) +struct gve_queue_page_list *gve_alloc_queue_page_list(struct gve_priv *priv, + u32 id, int pages) { - struct gve_queue_page_list *qpl = &priv->qpls[id]; + struct gve_queue_page_list *qpl; int err; int i; - if (pages + priv->num_registered_pages > priv->max_registered_pages) { - netif_err(priv, drv, priv->dev, - "Reached max number of registered pages %llu > %llu\n", - pages + priv->num_registered_pages, - priv->max_registered_pages); - return -EINVAL; - } + qpl = kvzalloc(sizeof(*qpl), GFP_KERNEL); + if (!qpl) + return NULL; qpl->id = id; qpl->num_entries = 0; qpl->pages = kvcalloc(pages, sizeof(*qpl->pages), GFP_KERNEL); - /* caller handles clean up */ if (!qpl->pages) - return -ENOMEM; + goto abort; + qpl->page_buses = kvcalloc(pages, sizeof(*qpl->page_buses), GFP_KERNEL); - /* caller handles clean up */ if (!qpl->page_buses) - return -ENOMEM; + goto abort; for (i = 0; i < pages; i++) { err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i], &qpl->page_buses[i], gve_qpl_dma_dir(priv, id), GFP_KERNEL); - /* caller handles clean up */ if (err) - return -ENOMEM; + goto abort; qpl->num_entries++; } - priv->num_registered_pages += pages; - return 0; + return qpl; + +abort: + gve_free_queue_page_list(priv, qpl, id); + return NULL; } void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma, @@ -1053,13 +1089,16 @@ void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma, put_page(page); } -static void gve_free_queue_page_list(struct gve_priv *priv, u32 id) +void gve_free_queue_page_list(struct gve_priv *priv, + struct gve_queue_page_list *qpl, + u32 id) { - struct gve_queue_page_list *qpl = &priv->qpls[id]; int i; - if (!qpl->pages) + if (!qpl) return; + if (!qpl->pages) + goto free_qpl; if (!qpl->page_buses) goto free_pages; @@ -1072,115 +1111,8 @@ static void gve_free_queue_page_list(struct gve_priv *priv, u32 id) free_pages: kvfree(qpl->pages); qpl->pages = NULL; - priv->num_registered_pages -= qpl->num_entries; -} - -static int gve_alloc_xdp_qpls(struct gve_priv *priv) -{ - int start_id; - int i, j; - int err; - - start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv)); - for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) { - err = gve_alloc_queue_page_list(priv, i, - priv->tx_pages_per_qpl); - if (err) - goto free_qpls; - } - - return 0; - -free_qpls: - for (j = start_id; j <= i; j++) - gve_free_queue_page_list(priv, j); - return err; -} - -static int gve_alloc_qpls(struct gve_priv *priv) -{ - int max_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues; - int page_count; - int start_id; - int i, j; - int err; - - if (!gve_is_qpl(priv)) - return 0; - - priv->qpls = kvcalloc(max_queues, sizeof(*priv->qpls), GFP_KERNEL); - if (!priv->qpls) - return -ENOMEM; - - start_id = gve_tx_start_qpl_id(priv); - page_count = priv->tx_pages_per_qpl; - for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) { - err = gve_alloc_queue_page_list(priv, i, - page_count); - if (err) - goto free_qpls; - } - - start_id = gve_rx_start_qpl_id(priv); - - /* For GQI_QPL number of pages allocated have 1:1 relationship with - * number of descriptors. For DQO, number of pages required are - * more than descriptors (because of out of order completions). - */ - page_count = priv->queue_format == GVE_GQI_QPL_FORMAT ? - priv->rx_data_slot_cnt : priv->rx_pages_per_qpl; - for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) { - err = gve_alloc_queue_page_list(priv, i, - page_count); - if (err) - goto free_qpls; - } - - priv->qpl_cfg.qpl_map_size = BITS_TO_LONGS(max_queues) * - sizeof(unsigned long) * BITS_PER_BYTE; - priv->qpl_cfg.qpl_id_map = kvcalloc(BITS_TO_LONGS(max_queues), - sizeof(unsigned long), GFP_KERNEL); - if (!priv->qpl_cfg.qpl_id_map) { - err = -ENOMEM; - goto free_qpls; - } - - return 0; - -free_qpls: - for (j = 0; j <= i; j++) - gve_free_queue_page_list(priv, j); - kvfree(priv->qpls); - priv->qpls = NULL; - return err; -} - -static void gve_free_xdp_qpls(struct gve_priv *priv) -{ - int start_id; - int i; - - start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv)); - for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) - gve_free_queue_page_list(priv, i); -} - -static void gve_free_qpls(struct gve_priv *priv) -{ - int max_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues; - int i; - - if (!priv->qpls) - return; - - kvfree(priv->qpl_cfg.qpl_id_map); - priv->qpl_cfg.qpl_id_map = NULL; - - for (i = 0; i < max_queues; i++) - gve_free_queue_page_list(priv, i); - - kvfree(priv->qpls); - priv->qpls = NULL; +free_qpl: + kvfree(qpl); } /* Use this to schedule a reset when the device is capable of continuing @@ -1206,7 +1138,7 @@ static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev) int i, j; u32 tx_qid; - if (!priv->num_xdp_queues) + if (!priv->tx_cfg.num_xdp_queues) return 0; for (i = 0; i < priv->rx_cfg.num_queues; i++) { @@ -1217,8 +1149,14 @@ static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev) napi->napi_id); if (err) goto err; - err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq, - MEM_TYPE_PAGE_SHARED, NULL); + if (gve_is_qpl(priv)) + err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq, + MEM_TYPE_PAGE_SHARED, + NULL); + else + err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq, + MEM_TYPE_PAGE_POOL, + rx->dqo.page_pool); if (err) goto err; rx->xsk_pool = xsk_get_pool_from_qid(dev, i); @@ -1236,7 +1174,7 @@ static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev) } } - for (i = 0; i < priv->num_xdp_queues; i++) { + for (i = 0; i < priv->tx_cfg.num_xdp_queues; i++) { tx_qid = gve_xdp_tx_queue_id(priv, i); priv->tx[tx_qid].xsk_pool = xsk_get_pool_from_qid(dev, i); } @@ -1257,7 +1195,7 @@ static void gve_unreg_xdp_info(struct gve_priv *priv) { int i, tx_qid; - if (!priv->num_xdp_queues) + if (!priv->tx_cfg.num_xdp_queues || !priv->rx || !priv->tx) return; for (i = 0; i < priv->rx_cfg.num_queues; i++) { @@ -1270,7 +1208,7 @@ static void gve_unreg_xdp_info(struct gve_priv *priv) } } - for (i = 0; i < priv->num_xdp_queues; i++) { + for (i = 0; i < priv->tx_cfg.num_xdp_queues; i++) { tx_qid = gve_xdp_tx_queue_id(priv, i); priv->tx[tx_qid].xsk_pool = NULL; } @@ -1278,58 +1216,128 @@ static void gve_unreg_xdp_info(struct gve_priv *priv) static void gve_drain_page_cache(struct gve_priv *priv) { - struct page_frag_cache *nc; int i; - for (i = 0; i < priv->rx_cfg.num_queues; i++) { - nc = &priv->rx[i].page_cache; - if (nc->va) { - __page_frag_cache_drain(virt_to_page(nc->va), - nc->pagecnt_bias); - nc->va = NULL; - } - } + for (i = 0; i < priv->rx_cfg.num_queues; i++) + page_frag_cache_drain(&priv->rx[i].page_cache); } -static int gve_open(struct net_device *dev) +static void gve_rx_get_curr_alloc_cfg(struct gve_priv *priv, + struct gve_rx_alloc_rings_cfg *cfg) { - struct gve_priv *priv = netdev_priv(dev); - int err; + cfg->qcfg_rx = &priv->rx_cfg; + cfg->qcfg_tx = &priv->tx_cfg; + cfg->raw_addressing = !gve_is_qpl(priv); + cfg->enable_header_split = priv->header_split_enabled; + cfg->ring_size = priv->rx_desc_cnt; + cfg->packet_buffer_size = priv->rx_cfg.packet_buffer_size; + cfg->rx = priv->rx; + cfg->xdp = !!cfg->qcfg_tx->num_xdp_queues; +} - if (priv->xdp_prog) - priv->num_xdp_queues = priv->rx_cfg.num_queues; +void gve_get_curr_alloc_cfgs(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, + struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) +{ + gve_tx_get_curr_alloc_cfg(priv, tx_alloc_cfg); + gve_rx_get_curr_alloc_cfg(priv, rx_alloc_cfg); +} + +static void gve_rx_start_ring(struct gve_priv *priv, int i) +{ + if (gve_is_gqi(priv)) + gve_rx_start_ring_gqi(priv, i); else - priv->num_xdp_queues = 0; + gve_rx_start_ring_dqo(priv, i); +} - err = gve_alloc_qpls(priv); - if (err) - return err; +static void gve_rx_start_rings(struct gve_priv *priv, int num_rings) +{ + int i; - err = gve_alloc_rings(priv); - if (err) - goto free_qpls; + for (i = 0; i < num_rings; i++) + gve_rx_start_ring(priv, i); +} + +static void gve_rx_stop_ring(struct gve_priv *priv, int i) +{ + if (gve_is_gqi(priv)) + gve_rx_stop_ring_gqi(priv, i); + else + gve_rx_stop_ring_dqo(priv, i); +} + +static void gve_rx_stop_rings(struct gve_priv *priv, int num_rings) +{ + int i; + + if (!priv->rx) + return; + + for (i = 0; i < num_rings; i++) + gve_rx_stop_ring(priv, i); +} + +static void gve_queues_mem_remove(struct gve_priv *priv) +{ + struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; + struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; + + gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); + gve_queues_mem_free(priv, &tx_alloc_cfg, &rx_alloc_cfg); + priv->tx = NULL; + priv->rx = NULL; +} + +/* The passed-in queue memory is stored into priv and the queues are made live. + * No memory is allocated. Passed-in memory is freed on errors. + */ +static int gve_queues_start(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, + struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) +{ + struct net_device *dev = priv->dev; + int err; + + /* Record new resources into priv */ + priv->tx = tx_alloc_cfg->tx; + priv->rx = rx_alloc_cfg->rx; + + /* Record new configs into priv */ + priv->tx_cfg = *tx_alloc_cfg->qcfg; + priv->tx_cfg.num_xdp_queues = tx_alloc_cfg->num_xdp_rings; + priv->rx_cfg = *rx_alloc_cfg->qcfg_rx; + priv->tx_desc_cnt = tx_alloc_cfg->ring_size; + priv->rx_desc_cnt = rx_alloc_cfg->ring_size; + + gve_tx_start_rings(priv, gve_num_tx_queues(priv)); + gve_rx_start_rings(priv, rx_alloc_cfg->qcfg_rx->num_queues); + gve_init_sync_stats(priv); err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues); if (err) - goto free_rings; + goto stop_and_free_rings; err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues); if (err) - goto free_rings; + goto stop_and_free_rings; err = gve_reg_xdp_info(priv, dev); if (err) - goto free_rings; + goto stop_and_free_rings; + + if (rx_alloc_cfg->reset_rss) { + err = gve_init_rss_config(priv, priv->rx_cfg.num_queues); + if (err) + goto reset; + } err = gve_register_qpls(priv); if (err) goto reset; - if (!gve_is_gqi(priv)) { - /* Hard code this for now. This may be tuned in the future for - * performance. - */ - priv->data_buffer_size_dqo = GVE_DEFAULT_RX_BUFFER_SIZE; - } + priv->header_split_enabled = rx_alloc_cfg->enable_header_split; + priv->rx_cfg.packet_buffer_size = rx_alloc_cfg->packet_buffer_size; + err = gve_create_rings(priv); if (err) goto reset; @@ -1346,32 +1354,49 @@ static int gve_open(struct net_device *dev) priv->interface_up_cnt++; return 0; -free_rings: - gve_free_rings(priv); -free_qpls: - gve_free_qpls(priv); - return err; - reset: - /* This must have been called from a reset due to the rtnl lock - * so just return at this point. - */ if (gve_get_reset_in_progress(priv)) - return err; - /* Otherwise reset before returning */ + goto stop_and_free_rings; gve_reset_and_teardown(priv, true); /* if this fails there is nothing we can do so just ignore the return */ gve_reset_recovery(priv, false); /* return the original error */ return err; +stop_and_free_rings: + gve_tx_stop_rings(priv, gve_num_tx_queues(priv)); + gve_rx_stop_rings(priv, priv->rx_cfg.num_queues); + gve_queues_mem_remove(priv); + return err; } -static int gve_close(struct net_device *dev) +static int gve_open(struct net_device *dev) { + struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; + struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; struct gve_priv *priv = netdev_priv(dev); int err; - netif_carrier_off(dev); + gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); + + err = gve_queues_mem_alloc(priv, &tx_alloc_cfg, &rx_alloc_cfg); + if (err) + return err; + + /* No need to free on error: ownership of resources is lost after + * calling gve_queues_start. + */ + err = gve_queues_start(priv, &tx_alloc_cfg, &rx_alloc_cfg); + if (err) + return err; + + return 0; +} + +static int gve_queues_stop(struct gve_priv *priv) +{ + int err; + + netif_carrier_off(priv->dev); if (gve_get_device_rings_ok(priv)) { gve_turndown(priv); gve_drain_page_cache(priv); @@ -1383,11 +1408,13 @@ static int gve_close(struct net_device *dev) goto err; gve_clear_device_rings_ok(priv); } - del_timer_sync(&priv->stats_report_timer); + timer_delete_sync(&priv->stats_report_timer); gve_unreg_xdp_info(priv); - gve_free_rings(priv); - gve_free_qpls(priv); + + gve_tx_stop_rings(priv, gve_num_tx_queues(priv)); + gve_rx_stop_rings(priv, priv->rx_cfg.num_queues); + priv->interface_down_cnt++; return 0; @@ -1402,62 +1429,19 @@ err: return gve_reset_recovery(priv, false); } -static int gve_remove_xdp_queues(struct gve_priv *priv) +static int gve_close(struct net_device *dev) { + struct gve_priv *priv = netdev_priv(dev); int err; - err = gve_destroy_xdp_rings(priv); + err = gve_queues_stop(priv); if (err) return err; - err = gve_unregister_xdp_qpls(priv); - if (err) - return err; - - gve_unreg_xdp_info(priv); - gve_free_xdp_rings(priv); - gve_free_xdp_qpls(priv); - priv->num_xdp_queues = 0; + gve_queues_mem_remove(priv); return 0; } -static int gve_add_xdp_queues(struct gve_priv *priv) -{ - int err; - - priv->num_xdp_queues = priv->tx_cfg.num_queues; - - err = gve_alloc_xdp_qpls(priv); - if (err) - goto err; - - err = gve_alloc_xdp_rings(priv); - if (err) - goto free_xdp_qpls; - - err = gve_reg_xdp_info(priv, priv->dev); - if (err) - goto free_xdp_rings; - - err = gve_register_xdp_qpls(priv); - if (err) - goto free_xdp_rings; - - err = gve_create_xdp_rings(priv); - if (err) - goto free_xdp_rings; - - return 0; - -free_xdp_rings: - gve_free_xdp_rings(priv); -free_xdp_qpls: - gve_free_xdp_qpls(priv); -err: - priv->num_xdp_queues = 0; - return err; -} - static void gve_handle_link_status(struct gve_priv *priv, bool link_status) { if (!gve_get_napi_enabled(priv)) @@ -1475,6 +1459,19 @@ static void gve_handle_link_status(struct gve_priv *priv, bool link_status) } } +static int gve_configure_rings_xdp(struct gve_priv *priv, + u16 num_xdp_rings) +{ + struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; + struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; + + gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); + tx_alloc_cfg.num_xdp_rings = num_xdp_rings; + + rx_alloc_cfg.xdp = !!num_xdp_rings; + return gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); +} + static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog, struct netlink_ext_ack *extack) { @@ -1483,33 +1480,30 @@ static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog, u32 status; old_prog = READ_ONCE(priv->xdp_prog); - if (!netif_carrier_ok(priv->dev)) { + if (!netif_running(priv->dev)) { WRITE_ONCE(priv->xdp_prog, prog); if (old_prog) bpf_prog_put(old_prog); + + /* Update priv XDP queue configuration */ + priv->tx_cfg.num_xdp_queues = priv->xdp_prog ? + priv->rx_cfg.num_queues : 0; return 0; } - gve_turndown(priv); - if (!old_prog && prog) { - // Allocate XDP TX queues if an XDP program is - // being installed - err = gve_add_xdp_queues(priv); - if (err) - goto out; - } else if (old_prog && !prog) { - // Remove XDP TX queues if an XDP program is - // being uninstalled - err = gve_remove_xdp_queues(priv); - if (err) - goto out; - } + if (!old_prog && prog) + err = gve_configure_rings_xdp(priv, priv->rx_cfg.num_queues); + else if (old_prog && !prog) + err = gve_configure_rings_xdp(priv, 0); + + if (err) + goto out; + WRITE_ONCE(priv->xdp_prog, prog); if (old_prog) bpf_prog_put(old_prog); out: - gve_turnup(priv); status = ioread32be(&priv->reg_bar0->device_status); gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); return err; @@ -1540,8 +1534,8 @@ static int gve_xsk_pool_enable(struct net_device *dev, if (err) return err; - /* If XDP prog is not installed, return */ - if (!priv->xdp_prog) + /* If XDP prog is not installed or interface is down, return. */ + if (!priv->xdp_prog || !netif_running(dev)) return 0; rx = &priv->rx[qid]; @@ -1586,21 +1580,16 @@ static int gve_xsk_pool_disable(struct net_device *dev, if (qid >= priv->rx_cfg.num_queues) return -EINVAL; - /* If XDP prog is not installed, unmap DMA and return */ - if (!priv->xdp_prog) - goto done; - - tx_qid = gve_xdp_tx_queue_id(priv, qid); - if (!netif_running(dev)) { - priv->rx[qid].xsk_pool = NULL; - xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq); - priv->tx[tx_qid].xsk_pool = NULL; + /* If XDP prog is not installed or interface is down, unmap DMA and + * return. + */ + if (!priv->xdp_prog || !netif_running(dev)) goto done; - } napi_rx = &priv->ntfy_blocks[priv->rx[qid].ntfy_id].napi; napi_disable(napi_rx); /* make sure current rx poll is done */ + tx_qid = gve_xdp_tx_queue_id(priv, qid); napi_tx = &priv->ntfy_blocks[priv->tx[tx_qid].ntfy_id].napi; napi_disable(napi_tx); /* make sure current tx poll is done */ @@ -1626,24 +1615,20 @@ done: static int gve_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) { struct gve_priv *priv = netdev_priv(dev); - int tx_queue_id = gve_xdp_tx_queue_id(priv, queue_id); + struct napi_struct *napi; + + if (!gve_get_napi_enabled(priv)) + return -ENETDOWN; if (queue_id >= priv->rx_cfg.num_queues || !priv->xdp_prog) return -EINVAL; - if (flags & XDP_WAKEUP_TX) { - struct gve_tx_ring *tx = &priv->tx[tx_queue_id]; - struct napi_struct *napi = - &priv->ntfy_blocks[tx->ntfy_id].napi; - - if (!napi_if_scheduled_mark_missed(napi)) { - /* Call local_bh_enable to trigger SoftIRQ processing */ - local_bh_disable(); - napi_schedule(napi); - local_bh_enable(); - } - - tx->xdp_xsk_wakeup++; + napi = &priv->ntfy_blocks[gve_rx_idx_to_ntfy(priv, queue_id)].napi; + if (!napi_if_scheduled_mark_missed(napi)) { + /* Call local_bh_enable to trigger SoftIRQ processing */ + local_bh_disable(); + napi_schedule(napi); + local_bh_enable(); } return 0; @@ -1652,6 +1637,7 @@ static int gve_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) static int verify_xdp_configuration(struct net_device *dev) { struct gve_priv *priv = netdev_priv(dev); + u16 max_xdp_mtu; if (dev->features & NETIF_F_LRO) { netdev_warn(dev, "XDP is not supported when LRO is on.\n"); @@ -1664,7 +1650,11 @@ static int verify_xdp_configuration(struct net_device *dev) return -EOPNOTSUPP; } - if (dev->mtu > GVE_DEFAULT_RX_BUFFER_SIZE - sizeof(struct ethhdr) - GVE_RX_PAD) { + max_xdp_mtu = priv->rx_cfg.packet_buffer_size - sizeof(struct ethhdr); + if (priv->queue_format == GVE_GQI_QPL_FORMAT) + max_xdp_mtu -= GVE_RX_PAD; + + if (dev->mtu > max_xdp_mtu) { netdev_warn(dev, "XDP is not supported for mtu %d.\n", dev->mtu); return -EOPNOTSUPP; @@ -1702,42 +1692,103 @@ static int gve_xdp(struct net_device *dev, struct netdev_bpf *xdp) } } -int gve_adjust_queues(struct gve_priv *priv, - struct gve_queue_config new_rx_config, - struct gve_queue_config new_tx_config) +int gve_init_rss_config(struct gve_priv *priv, u16 num_queues) +{ + struct gve_rss_config *rss_config = &priv->rss_config; + struct ethtool_rxfh_param rxfh = {0}; + u16 i; + + if (!priv->cache_rss_config) + return 0; + + for (i = 0; i < priv->rss_lut_size; i++) + rss_config->hash_lut[i] = + ethtool_rxfh_indir_default(i, num_queues); + + netdev_rss_key_fill(rss_config->hash_key, priv->rss_key_size); + + rxfh.hfunc = ETH_RSS_HASH_TOP; + + return gve_adminq_configure_rss(priv, &rxfh); +} + +int gve_flow_rules_reset(struct gve_priv *priv) +{ + if (!priv->max_flow_rules) + return 0; + + return gve_adminq_reset_flow_rules(priv); +} + +int gve_adjust_config(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, + struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) { int err; - if (netif_carrier_ok(priv->dev)) { - /* To make this process as simple as possible we teardown the - * device, set the new configuration, and then bring the device - * up again. - */ - err = gve_close(priv->dev); - /* we have already tried to reset in close, - * just fail at this point + /* Allocate resources for the new confiugration */ + err = gve_queues_mem_alloc(priv, tx_alloc_cfg, rx_alloc_cfg); + if (err) { + netif_err(priv, drv, priv->dev, + "Adjust config failed to alloc new queues"); + return err; + } + + /* Teardown the device and free existing resources */ + err = gve_close(priv->dev); + if (err) { + netif_err(priv, drv, priv->dev, + "Adjust config failed to close old queues"); + gve_queues_mem_free(priv, tx_alloc_cfg, rx_alloc_cfg); + return err; + } + + /* Bring the device back up again with the new resources. */ + err = gve_queues_start(priv, tx_alloc_cfg, rx_alloc_cfg); + if (err) { + netif_err(priv, drv, priv->dev, + "Adjust config failed to start new queues, !!! DISABLING ALL QUEUES !!!\n"); + /* No need to free on error: ownership of resources is lost after + * calling gve_queues_start. */ - if (err) - return err; - priv->tx_cfg = new_tx_config; - priv->rx_cfg = new_rx_config; + gve_turndown(priv); + return err; + } - err = gve_open(priv->dev); - if (err) - goto err; + return 0; +} - return 0; +int gve_adjust_queues(struct gve_priv *priv, + struct gve_rx_queue_config new_rx_config, + struct gve_tx_queue_config new_tx_config, + bool reset_rss) +{ + struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; + struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; + int err; + + gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); + + /* Relay the new config from ethtool */ + tx_alloc_cfg.qcfg = &new_tx_config; + rx_alloc_cfg.qcfg_tx = &new_tx_config; + rx_alloc_cfg.qcfg_rx = &new_rx_config; + rx_alloc_cfg.reset_rss = reset_rss; + + if (netif_running(priv->dev)) { + err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); + return err; } /* Set the config for the next up. */ + if (reset_rss) { + err = gve_init_rss_config(priv, new_rx_config.num_queues); + if (err) + return err; + } priv->tx_cfg = new_tx_config; priv->rx_cfg = new_rx_config; return 0; -err: - netif_err(priv, drv, priv->dev, - "Adjust queues failed! !!! DISABLING ALL QUEUES !!!\n"); - gve_turndown(priv); - return err; } static void gve_turndown(struct gve_priv *priv) @@ -1755,20 +1806,37 @@ static void gve_turndown(struct gve_priv *priv) int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; - napi_disable(&block->napi); + if (!gve_tx_was_added_to_block(priv, idx)) + continue; + + if (idx < priv->tx_cfg.num_queues) + netif_queue_set_napi(priv->dev, idx, + NETDEV_QUEUE_TYPE_TX, NULL); + + napi_disable_locked(&block->napi); } for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; - napi_disable(&block->napi); + if (!gve_rx_was_added_to_block(priv, idx)) + continue; + + netif_queue_set_napi(priv->dev, idx, NETDEV_QUEUE_TYPE_RX, + NULL); + napi_disable_locked(&block->napi); } /* Stop tx queues */ netif_tx_disable(priv->dev); + xdp_features_clear_redirect_target_locked(priv->dev); + gve_clear_napi_enabled(priv); gve_clear_report_stats(priv); + + /* Make sure that all traffic is finished processing. */ + synchronize_net(); } static void gve_turnup(struct gve_priv *priv) @@ -1783,30 +1851,71 @@ static void gve_turnup(struct gve_priv *priv) int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; - napi_enable(&block->napi); + if (!gve_tx_was_added_to_block(priv, idx)) + continue; + + napi_enable_locked(&block->napi); + + if (idx < priv->tx_cfg.num_queues) + netif_queue_set_napi(priv->dev, idx, + NETDEV_QUEUE_TYPE_TX, + &block->napi); + if (gve_is_gqi(priv)) { iowrite32be(0, gve_irq_doorbell(priv, block)); } else { gve_set_itr_coalesce_usecs_dqo(priv, block, priv->tx_coalesce_usecs); } + + /* Any descs written by the NIC before this barrier will be + * handled by the one-off napi schedule below. Whereas any + * descs after the barrier will generate interrupts. + */ + mb(); + napi_schedule(&block->napi); } for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; - napi_enable(&block->napi); + if (!gve_rx_was_added_to_block(priv, idx)) + continue; + + napi_enable_locked(&block->napi); + netif_queue_set_napi(priv->dev, idx, NETDEV_QUEUE_TYPE_RX, + &block->napi); + if (gve_is_gqi(priv)) { iowrite32be(0, gve_irq_doorbell(priv, block)); } else { gve_set_itr_coalesce_usecs_dqo(priv, block, priv->rx_coalesce_usecs); } + + /* Any descs written by the NIC before this barrier will be + * handled by the one-off napi schedule below. Whereas any + * descs after the barrier will generate interrupts. + */ + mb(); + napi_schedule(&block->napi); } + if (priv->tx_cfg.num_xdp_queues && gve_supports_xdp_xmit(priv)) + xdp_features_set_redirect_target_locked(priv->dev, false); + gve_set_napi_enabled(priv); } +static void gve_turnup_and_check_status(struct gve_priv *priv) +{ + u32 status; + + gve_turnup(priv); + status = ioread32be(&priv->reg_bar0->device_status); + gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); +} + static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct gve_notify_block *block; @@ -1853,39 +1962,82 @@ out: priv->tx_timeo_cnt++; } +u16 gve_get_pkt_buf_size(const struct gve_priv *priv, bool enable_hsplit) +{ + if (enable_hsplit && priv->max_rx_buffer_size >= GVE_MAX_RX_BUFFER_SIZE) + return GVE_MAX_RX_BUFFER_SIZE; + else + return GVE_DEFAULT_RX_BUFFER_SIZE; +} + +/* header-split is not supported on non-DQO_RDA yet even if device advertises it */ +bool gve_header_split_supported(const struct gve_priv *priv) +{ + return priv->header_buf_size && priv->queue_format == GVE_DQO_RDA_FORMAT; +} + +int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split) +{ + struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; + struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; + bool enable_hdr_split; + int err = 0; + + if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_UNKNOWN) + return 0; + + if (!gve_header_split_supported(priv)) { + dev_err(&priv->pdev->dev, "Header-split not supported\n"); + return -EOPNOTSUPP; + } + + if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_ENABLED) + enable_hdr_split = true; + else + enable_hdr_split = false; + + if (enable_hdr_split == priv->header_split_enabled) + return 0; + + gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); + + rx_alloc_cfg.enable_header_split = enable_hdr_split; + rx_alloc_cfg.packet_buffer_size = gve_get_pkt_buf_size(priv, enable_hdr_split); + + if (netif_running(priv->dev)) + err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); + return err; +} + static int gve_set_features(struct net_device *netdev, netdev_features_t features) { const netdev_features_t orig_features = netdev->features; + struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; + struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; struct gve_priv *priv = netdev_priv(netdev); int err; + gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); + if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) { netdev->features ^= NETIF_F_LRO; - if (netif_carrier_ok(netdev)) { - /* To make this process as simple as possible we - * teardown the device, set the new configuration, - * and then bring the device up again. - */ - err = gve_close(netdev); - /* We have already tried to reset in close, just fail - * at this point. - */ - if (err) - goto err; - - err = gve_open(netdev); + if (netif_running(netdev)) { + err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); if (err) - goto err; + goto revert_features; } } + if ((netdev->features & NETIF_F_NTUPLE) && !(features & NETIF_F_NTUPLE)) { + err = gve_flow_rules_reset(priv); + if (err) + goto revert_features; + } return 0; -err: - /* Reverts the change on error. */ + +revert_features: netdev->features = orig_features; - netif_err(priv, drv, netdev, - "Set features failed! !!! DISABLING ALL QUEUES !!!\n"); return err; } @@ -1925,7 +2077,9 @@ static void gve_handle_reset(struct gve_priv *priv) if (gve_get_do_reset(priv)) { rtnl_lock(); + netdev_lock(priv->dev); gve_reset(priv, false); + netdev_unlock(priv->dev); rtnl_unlock(); } } @@ -2021,14 +2175,17 @@ static void gve_service_task(struct work_struct *work) static void gve_set_netdev_xdp_features(struct gve_priv *priv) { + xdp_features_t xdp_features; + if (priv->queue_format == GVE_GQI_QPL_FORMAT) { - priv->dev->xdp_features = NETDEV_XDP_ACT_BASIC; - priv->dev->xdp_features |= NETDEV_XDP_ACT_REDIRECT; - priv->dev->xdp_features |= NETDEV_XDP_ACT_NDO_XMIT; - priv->dev->xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY; + xdp_features = NETDEV_XDP_ACT_BASIC; + xdp_features |= NETDEV_XDP_ACT_REDIRECT; + xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY; } else { - priv->dev->xdp_features = 0; + xdp_features = 0; } + + xdp_set_features_flag_locked(priv->dev, xdp_features); } static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) @@ -2051,6 +2208,8 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) goto err; } + priv->num_registered_pages = 0; + if (skip_describe_device) goto setup_device; @@ -2080,7 +2239,6 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) if (!gve_is_gqi(priv)) netif_set_tso_max_size(priv->dev, GVE_DQO_TX_MAX); - priv->num_registered_pages = 0; priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK; /* gvnic has one Notification Block per MSI-x vector, except for the * management vector @@ -2101,6 +2259,7 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues, priv->rx_cfg.num_queues); } + priv->tx_cfg.num_xdp_queues = 0; dev_info(&priv->pdev->dev, "TX queues %d, RX queues %d\n", priv->tx_cfg.num_queues, priv->rx_cfg.num_queues); @@ -2164,7 +2323,7 @@ err: int gve_reset(struct gve_priv *priv, bool attempt_teardown) { - bool was_up = netif_carrier_ok(priv->dev); + bool was_up = netif_running(priv->dev); int err; dev_info(&priv->pdev->dev, "Performing reset\n"); @@ -2215,6 +2374,188 @@ static void gve_write_version(u8 __iomem *driver_version_register) writeb('\n', driver_version_register); } +static int gve_rx_queue_stop(struct net_device *dev, void *per_q_mem, int idx) +{ + struct gve_priv *priv = netdev_priv(dev); + struct gve_rx_ring *gve_per_q_mem; + int err; + + if (!priv->rx) + return -EAGAIN; + + /* Destroying queue 0 while other queues exist is not supported in DQO */ + if (!gve_is_gqi(priv) && idx == 0) + return -ERANGE; + + /* Single-queue destruction requires quiescence on all queues */ + gve_turndown(priv); + + /* This failure will trigger a reset - no need to clean up */ + err = gve_adminq_destroy_single_rx_queue(priv, idx); + if (err) + return err; + + if (gve_is_qpl(priv)) { + /* This failure will trigger a reset - no need to clean up */ + err = gve_unregister_qpl(priv, gve_rx_get_qpl(priv, idx)); + if (err) + return err; + } + + gve_rx_stop_ring(priv, idx); + + /* Turn the unstopped queues back up */ + gve_turnup_and_check_status(priv); + + gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; + *gve_per_q_mem = priv->rx[idx]; + memset(&priv->rx[idx], 0, sizeof(priv->rx[idx])); + return 0; +} + +static void gve_rx_queue_mem_free(struct net_device *dev, void *per_q_mem) +{ + struct gve_priv *priv = netdev_priv(dev); + struct gve_rx_alloc_rings_cfg cfg = {0}; + struct gve_rx_ring *gve_per_q_mem; + + gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; + gve_rx_get_curr_alloc_cfg(priv, &cfg); + + if (gve_is_gqi(priv)) + gve_rx_free_ring_gqi(priv, gve_per_q_mem, &cfg); + else + gve_rx_free_ring_dqo(priv, gve_per_q_mem, &cfg); +} + +static int gve_rx_queue_mem_alloc(struct net_device *dev, void *per_q_mem, + int idx) +{ + struct gve_priv *priv = netdev_priv(dev); + struct gve_rx_alloc_rings_cfg cfg = {0}; + struct gve_rx_ring *gve_per_q_mem; + int err; + + if (!priv->rx) + return -EAGAIN; + + gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; + gve_rx_get_curr_alloc_cfg(priv, &cfg); + + if (gve_is_gqi(priv)) + err = gve_rx_alloc_ring_gqi(priv, &cfg, gve_per_q_mem, idx); + else + err = gve_rx_alloc_ring_dqo(priv, &cfg, gve_per_q_mem, idx); + + return err; +} + +static int gve_rx_queue_start(struct net_device *dev, void *per_q_mem, int idx) +{ + struct gve_priv *priv = netdev_priv(dev); + struct gve_rx_ring *gve_per_q_mem; + int err; + + if (!priv->rx) + return -EAGAIN; + + gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; + priv->rx[idx] = *gve_per_q_mem; + + /* Single-queue creation requires quiescence on all queues */ + gve_turndown(priv); + + gve_rx_start_ring(priv, idx); + + if (gve_is_qpl(priv)) { + /* This failure will trigger a reset - no need to clean up */ + err = gve_register_qpl(priv, gve_rx_get_qpl(priv, idx)); + if (err) + goto abort; + } + + /* This failure will trigger a reset - no need to clean up */ + err = gve_adminq_create_single_rx_queue(priv, idx); + if (err) + goto abort; + + if (gve_is_gqi(priv)) + gve_rx_write_doorbell(priv, &priv->rx[idx]); + else + gve_rx_post_buffers_dqo(&priv->rx[idx]); + + /* Turn the unstopped queues back up */ + gve_turnup_and_check_status(priv); + return 0; + +abort: + gve_rx_stop_ring(priv, idx); + + /* All failures in this func result in a reset, by clearing the struct + * at idx, we prevent a double free when that reset runs. The reset, + * which needs the rtnl lock, will not run till this func returns and + * its caller gives up the lock. + */ + memset(&priv->rx[idx], 0, sizeof(priv->rx[idx])); + return err; +} + +static const struct netdev_queue_mgmt_ops gve_queue_mgmt_ops = { + .ndo_queue_mem_size = sizeof(struct gve_rx_ring), + .ndo_queue_mem_alloc = gve_rx_queue_mem_alloc, + .ndo_queue_mem_free = gve_rx_queue_mem_free, + .ndo_queue_start = gve_rx_queue_start, + .ndo_queue_stop = gve_rx_queue_stop, +}; + +static void gve_get_rx_queue_stats(struct net_device *dev, int idx, + struct netdev_queue_stats_rx *rx_stats) +{ + struct gve_priv *priv = netdev_priv(dev); + struct gve_rx_ring *rx = &priv->rx[idx]; + unsigned int start; + + do { + start = u64_stats_fetch_begin(&rx->statss); + rx_stats->packets = rx->rpackets; + rx_stats->bytes = rx->rbytes; + rx_stats->alloc_fail = rx->rx_skb_alloc_fail + + rx->rx_buf_alloc_fail; + } while (u64_stats_fetch_retry(&rx->statss, start)); +} + +static void gve_get_tx_queue_stats(struct net_device *dev, int idx, + struct netdev_queue_stats_tx *tx_stats) +{ + struct gve_priv *priv = netdev_priv(dev); + struct gve_tx_ring *tx = &priv->tx[idx]; + unsigned int start; + + do { + start = u64_stats_fetch_begin(&tx->statss); + tx_stats->packets = tx->pkt_done; + tx_stats->bytes = tx->bytes_done; + } while (u64_stats_fetch_retry(&tx->statss, start)); +} + +static void gve_get_base_stats(struct net_device *dev, + struct netdev_queue_stats_rx *rx, + struct netdev_queue_stats_tx *tx) +{ + rx->packets = 0; + rx->bytes = 0; + rx->alloc_fail = 0; + + tx->packets = 0; + tx->bytes = 0; +} + +static const struct netdev_stat_ops gve_stat_ops = { + .get_queue_stats_rx = gve_get_rx_queue_stats, + .get_queue_stats_tx = gve_get_tx_queue_stats, + .get_base_stats = gve_get_base_stats, +}; + static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { int max_tx_queues, max_rx_queues; @@ -2269,6 +2610,8 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_drvdata(pdev, dev); dev->ethtool_ops = &gve_ethtool_ops; dev->netdev_ops = &gve_netdev_ops; + dev->queue_mgmt_ops = &gve_queue_mgmt_ops; + dev->stat_ops = &gve_stat_ops; /* Set default and supported features. * @@ -2297,6 +2640,8 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) priv->service_task_flags = 0x0; priv->state_flags = 0x0; priv->ethtool_flags = 0x0; + priv->rx_cfg.packet_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE; + priv->max_rx_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE; gve_set_probe_in_progress(priv); priv->gve_wq = alloc_ordered_workqueue("gve", 0); @@ -2314,6 +2659,9 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) goto abort_with_wq; + if (!gve_is_gqi(priv) && !gve_is_qpl(priv)) + dev->netmem_tx = true; + err = register_netdev(dev); if (err) goto abort_with_gve_init; @@ -2368,9 +2716,10 @@ static void gve_shutdown(struct pci_dev *pdev) { struct net_device *netdev = pci_get_drvdata(pdev); struct gve_priv *priv = netdev_priv(netdev); - bool was_up = netif_carrier_ok(priv->dev); + bool was_up = netif_running(priv->dev); rtnl_lock(); + netdev_lock(netdev); if (was_up && gve_close(priv->dev)) { /* If the dev was up, attempt to close, if close fails, reset */ gve_reset_and_teardown(priv, was_up); @@ -2378,6 +2727,7 @@ static void gve_shutdown(struct pci_dev *pdev) /* If the dev wasn't up or close worked, finish tearing down */ gve_teardown_priv_resources(priv); } + netdev_unlock(netdev); rtnl_unlock(); } @@ -2386,10 +2736,11 @@ static int gve_suspend(struct pci_dev *pdev, pm_message_t state) { struct net_device *netdev = pci_get_drvdata(pdev); struct gve_priv *priv = netdev_priv(netdev); - bool was_up = netif_carrier_ok(priv->dev); + bool was_up = netif_running(priv->dev); priv->suspend_cnt++; rtnl_lock(); + netdev_lock(netdev); if (was_up && gve_close(priv->dev)) { /* If the dev was up, attempt to close, if close fails, reset */ gve_reset_and_teardown(priv, was_up); @@ -2398,6 +2749,7 @@ static int gve_suspend(struct pci_dev *pdev, pm_message_t state) gve_teardown_priv_resources(priv); } priv->up_before_suspend = was_up; + netdev_unlock(netdev); rtnl_unlock(); return 0; } @@ -2410,7 +2762,9 @@ static int gve_resume(struct pci_dev *pdev) priv->resume_cnt++; rtnl_lock(); + netdev_lock(netdev); err = gve_reset_recovery(priv, priv->up_before_suspend); + netdev_unlock(netdev); rtnl_unlock(); return err; } diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c index 76615d47e055..90e875c1832f 100644 --- a/drivers/net/ethernet/google/gve/gve_rx.c +++ b/drivers/net/ethernet/google/gve/gve_rx.c @@ -23,11 +23,16 @@ static void gve_rx_free_buffer(struct device *dev, gve_free_page(dev, page_info->page, dma, DMA_FROM_DEVICE); } -static void gve_rx_unfill_pages(struct gve_priv *priv, struct gve_rx_ring *rx) +static void gve_rx_unfill_pages(struct gve_priv *priv, + struct gve_rx_ring *rx, + struct gve_rx_alloc_rings_cfg *cfg) { u32 slots = rx->mask + 1; int i; + if (!rx->data.page_info) + return; + if (rx->data.raw_addressing) { for (i = 0; i < slots; i++) gve_rx_free_buffer(&priv->pdev->dev, &rx->data.page_info[i], @@ -36,8 +41,6 @@ static void gve_rx_unfill_pages(struct gve_priv *priv, struct gve_rx_ring *rx) for (i = 0; i < slots; i++) page_ref_sub(rx->data.page_info[i].page, rx->data.page_info[i].pagecnt_bias - 1); - gve_unassign_qpl(priv, rx->data.qpl->id); - rx->data.qpl = NULL; for (i = 0; i < rx->qpl_copy_pool_mask + 1; i++) { page_ref_sub(rx->qpl_copy_pool[i].page, @@ -49,42 +52,104 @@ static void gve_rx_unfill_pages(struct gve_priv *priv, struct gve_rx_ring *rx) rx->data.page_info = NULL; } -static void gve_rx_free_ring(struct gve_priv *priv, int idx) +static void gve_rx_ctx_clear(struct gve_rx_ctx *ctx) +{ + ctx->skb_head = NULL; + ctx->skb_tail = NULL; + ctx->total_size = 0; + ctx->frag_cnt = 0; + ctx->drop_pkt = false; +} + +static void gve_rx_init_ring_state_gqi(struct gve_rx_ring *rx) +{ + rx->desc.seqno = 1; + rx->cnt = 0; + gve_rx_ctx_clear(&rx->ctx); +} + +static void gve_rx_reset_ring_gqi(struct gve_priv *priv, int idx) { struct gve_rx_ring *rx = &priv->rx[idx]; + const u32 slots = priv->rx_desc_cnt; + size_t size; + + /* Reset desc ring */ + if (rx->desc.desc_ring) { + size = slots * sizeof(rx->desc.desc_ring[0]); + memset(rx->desc.desc_ring, 0, size); + } + + /* Reset q_resources */ + if (rx->q_resources) + memset(rx->q_resources, 0, sizeof(*rx->q_resources)); + + gve_rx_init_ring_state_gqi(rx); +} + +void gve_rx_stop_ring_gqi(struct gve_priv *priv, int idx) +{ + int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); + + if (!gve_rx_was_added_to_block(priv, idx)) + return; + + gve_remove_napi(priv, ntfy_idx); + gve_rx_remove_from_block(priv, idx); + gve_rx_reset_ring_gqi(priv, idx); +} + +void gve_rx_free_ring_gqi(struct gve_priv *priv, struct gve_rx_ring *rx, + struct gve_rx_alloc_rings_cfg *cfg) +{ struct device *dev = &priv->pdev->dev; u32 slots = rx->mask + 1; + int idx = rx->q_num; size_t bytes; + u32 qpl_id; - gve_rx_remove_from_block(priv, idx); - - bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt; - dma_free_coherent(dev, bytes, rx->desc.desc_ring, rx->desc.bus); - rx->desc.desc_ring = NULL; + if (rx->desc.desc_ring) { + bytes = sizeof(struct gve_rx_desc) * cfg->ring_size; + dma_free_coherent(dev, bytes, rx->desc.desc_ring, rx->desc.bus); + rx->desc.desc_ring = NULL; + } - dma_free_coherent(dev, sizeof(*rx->q_resources), - rx->q_resources, rx->q_resources_bus); - rx->q_resources = NULL; + if (rx->q_resources) { + dma_free_coherent(dev, sizeof(*rx->q_resources), + rx->q_resources, rx->q_resources_bus); + rx->q_resources = NULL; + } - gve_rx_unfill_pages(priv, rx); + gve_rx_unfill_pages(priv, rx, cfg); - bytes = sizeof(*rx->data.data_ring) * slots; - dma_free_coherent(dev, bytes, rx->data.data_ring, - rx->data.data_bus); - rx->data.data_ring = NULL; + if (rx->data.data_ring) { + bytes = sizeof(*rx->data.data_ring) * slots; + dma_free_coherent(dev, bytes, rx->data.data_ring, + rx->data.data_bus); + rx->data.data_ring = NULL; + } kvfree(rx->qpl_copy_pool); rx->qpl_copy_pool = NULL; + if (rx->data.qpl) { + qpl_id = gve_get_rx_qpl_id(cfg->qcfg_tx, idx); + gve_free_queue_page_list(priv, rx->data.qpl, qpl_id); + rx->data.qpl = NULL; + } + netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx); } -static void gve_setup_rx_buffer(struct gve_rx_slot_page_info *page_info, - dma_addr_t addr, struct page *page, __be64 *slot_addr) +static void gve_setup_rx_buffer(struct gve_rx_ring *rx, + struct gve_rx_slot_page_info *page_info, + dma_addr_t addr, struct page *page, + __be64 *slot_addr) { page_info->page = page; page_info->page_offset = 0; page_info->page_address = page_address(page); + page_info->buf_size = rx->packet_buffer_size; *slot_addr = cpu_to_be64(addr); /* The page already has 1 ref */ page_ref_add(page, INT_MAX - 1); @@ -93,7 +158,8 @@ static void gve_setup_rx_buffer(struct gve_rx_slot_page_info *page_info, static int gve_rx_alloc_buffer(struct gve_priv *priv, struct device *dev, struct gve_rx_slot_page_info *page_info, - union gve_rx_data_slot *data_slot) + union gve_rx_data_slot *data_slot, + struct gve_rx_ring *rx) { struct page *page; dma_addr_t dma; @@ -101,14 +167,19 @@ static int gve_rx_alloc_buffer(struct gve_priv *priv, struct device *dev, err = gve_alloc_page(priv, dev, &page, &dma, DMA_FROM_DEVICE, GFP_ATOMIC); - if (err) + if (err) { + u64_stats_update_begin(&rx->statss); + rx->rx_buf_alloc_fail++; + u64_stats_update_end(&rx->statss); return err; + } - gve_setup_rx_buffer(page_info, dma, page, &data_slot->addr); + gve_setup_rx_buffer(rx, page_info, dma, page, &data_slot->addr); return 0; } -static int gve_prefill_rx_pages(struct gve_rx_ring *rx) +static int gve_rx_prefill_pages(struct gve_rx_ring *rx, + struct gve_rx_alloc_rings_cfg *cfg) { struct gve_priv *priv = rx->gve; u32 slots; @@ -126,25 +197,19 @@ static int gve_prefill_rx_pages(struct gve_rx_ring *rx) if (!rx->data.page_info) return -ENOMEM; - if (!rx->data.raw_addressing) { - rx->data.qpl = gve_assign_rx_qpl(priv, rx->q_num); - if (!rx->data.qpl) { - kvfree(rx->data.page_info); - rx->data.page_info = NULL; - return -ENOMEM; - } - } for (i = 0; i < slots; i++) { if (!rx->data.raw_addressing) { struct page *page = rx->data.qpl->pages[i]; dma_addr_t addr = i * PAGE_SIZE; - gve_setup_rx_buffer(&rx->data.page_info[i], addr, page, + gve_setup_rx_buffer(rx, &rx->data.page_info[i], addr, + page, &rx->data.data_ring[i].qpl_offset); continue; } - err = gve_rx_alloc_buffer(priv, &priv->pdev->dev, &rx->data.page_info[i], - &rx->data.data_ring[i]); + err = gve_rx_alloc_buffer(priv, &priv->pdev->dev, + &rx->data.page_info[i], + &rx->data.data_ring[i], rx); if (err) goto alloc_err_rda; } @@ -161,6 +226,7 @@ static int gve_prefill_rx_pages(struct gve_rx_ring *rx) rx->qpl_copy_pool[j].page = page; rx->qpl_copy_pool[j].page_offset = 0; rx->qpl_copy_pool[j].page_address = page_address(page); + rx->qpl_copy_pool[j].buf_size = rx->packet_buffer_size; /* The page already has 1 ref. */ page_ref_add(page, INT_MAX - 1); @@ -185,9 +251,6 @@ alloc_err_qpl: page_ref_sub(rx->data.page_info[i].page, rx->data.page_info[i].pagecnt_bias - 1); - gve_unassign_qpl(priv, rx->data.qpl->id); - rx->data.qpl = NULL; - return err; alloc_err_rda: @@ -198,22 +261,25 @@ alloc_err_rda: return err; } -static void gve_rx_ctx_clear(struct gve_rx_ctx *ctx) +void gve_rx_start_ring_gqi(struct gve_priv *priv, int idx) { - ctx->skb_head = NULL; - ctx->skb_tail = NULL; - ctx->total_size = 0; - ctx->frag_cnt = 0; - ctx->drop_pkt = false; + int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); + + gve_rx_add_to_block(priv, idx); + gve_add_napi(priv, ntfy_idx, gve_napi_poll); } -static int gve_rx_alloc_ring(struct gve_priv *priv, int idx) +int gve_rx_alloc_ring_gqi(struct gve_priv *priv, + struct gve_rx_alloc_rings_cfg *cfg, + struct gve_rx_ring *rx, + int idx) { - struct gve_rx_ring *rx = &priv->rx[idx]; struct device *hdev = &priv->pdev->dev; + u32 slots = cfg->ring_size; int filled_pages; + int qpl_page_cnt; + u32 qpl_id = 0; size_t bytes; - u32 slots; int err; netif_dbg(priv, drv, priv->dev, "allocating rx ring\n"); @@ -222,10 +288,10 @@ static int gve_rx_alloc_ring(struct gve_priv *priv, int idx) rx->gve = priv; rx->q_num = idx; + rx->packet_buffer_size = cfg->packet_buffer_size; - slots = priv->rx_data_slot_cnt; rx->mask = slots - 1; - rx->data.raw_addressing = priv->queue_format == GVE_GQI_RDA_FORMAT; + rx->data.raw_addressing = cfg->raw_addressing; /* alloc rx data ring */ bytes = sizeof(*rx->data.data_ring) * slots; @@ -246,10 +312,22 @@ static int gve_rx_alloc_ring(struct gve_priv *priv, int idx) goto abort_with_slots; } - filled_pages = gve_prefill_rx_pages(rx); + if (!rx->data.raw_addressing) { + qpl_id = gve_get_rx_qpl_id(cfg->qcfg_tx, rx->q_num); + qpl_page_cnt = cfg->ring_size; + + rx->data.qpl = gve_alloc_queue_page_list(priv, qpl_id, + qpl_page_cnt); + if (!rx->data.qpl) { + err = -ENOMEM; + goto abort_with_copy_pool; + } + } + + filled_pages = gve_rx_prefill_pages(rx, cfg); if (filled_pages < 0) { err = -ENOMEM; - goto abort_with_copy_pool; + goto abort_with_qpl; } rx->fill_cnt = filled_pages; /* Ensure data ring slots (packet buffers) are visible. */ @@ -269,23 +347,17 @@ static int gve_rx_alloc_ring(struct gve_priv *priv, int idx) (unsigned long)rx->data.data_bus); /* alloc rx desc ring */ - bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt; + bytes = sizeof(struct gve_rx_desc) * cfg->ring_size; rx->desc.desc_ring = dma_alloc_coherent(hdev, bytes, &rx->desc.bus, GFP_KERNEL); if (!rx->desc.desc_ring) { err = -ENOMEM; goto abort_with_q_resources; } - rx->cnt = 0; - rx->db_threshold = priv->rx_desc_cnt / 2; - rx->desc.seqno = 1; + rx->db_threshold = slots / 2; + gve_rx_init_ring_state_gqi(rx); - /* Allocating half-page buffers allows page-flipping which is faster - * than copying or allocating new pages. - */ - rx->packet_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE; gve_rx_ctx_clear(&rx->ctx); - gve_rx_add_to_block(priv, idx); return 0; @@ -294,7 +366,12 @@ abort_with_q_resources: rx->q_resources, rx->q_resources_bus); rx->q_resources = NULL; abort_filled: - gve_rx_unfill_pages(priv, rx); + gve_rx_unfill_pages(priv, rx, cfg); +abort_with_qpl: + if (!rx->data.raw_addressing) { + gve_free_queue_page_list(priv, rx->data.qpl, qpl_id); + rx->data.qpl = NULL; + } abort_with_copy_pool: kvfree(rx->qpl_copy_pool); rx->qpl_copy_pool = NULL; @@ -306,36 +383,52 @@ abort_with_slots: return err; } -int gve_rx_alloc_rings(struct gve_priv *priv) +int gve_rx_alloc_rings_gqi(struct gve_priv *priv, + struct gve_rx_alloc_rings_cfg *cfg) { + struct gve_rx_ring *rx; int err = 0; - int i; + int i, j; + + rx = kvcalloc(cfg->qcfg_rx->max_queues, sizeof(struct gve_rx_ring), + GFP_KERNEL); + if (!rx) + return -ENOMEM; - for (i = 0; i < priv->rx_cfg.num_queues; i++) { - err = gve_rx_alloc_ring(priv, i); + for (i = 0; i < cfg->qcfg_rx->num_queues; i++) { + err = gve_rx_alloc_ring_gqi(priv, cfg, &rx[i], i); if (err) { netif_err(priv, drv, priv->dev, "Failed to alloc rx ring=%d: err=%d\n", i, err); - break; + goto cleanup; } } - /* Unallocate if there was an error */ - if (err) { - int j; - for (j = 0; j < i; j++) - gve_rx_free_ring(priv, j); - } + cfg->rx = rx; + return 0; + +cleanup: + for (j = 0; j < i; j++) + gve_rx_free_ring_gqi(priv, &rx[j], cfg); + kvfree(rx); return err; } -void gve_rx_free_rings_gqi(struct gve_priv *priv) +void gve_rx_free_rings_gqi(struct gve_priv *priv, + struct gve_rx_alloc_rings_cfg *cfg) { + struct gve_rx_ring *rx = cfg->rx; int i; - for (i = 0; i < priv->rx_cfg.num_queues; i++) - gve_rx_free_ring(priv, i); + if (!rx) + return; + + for (i = 0; i < cfg->qcfg_rx->num_queues; i++) + gve_rx_free_ring_gqi(priv, &rx[i], cfg); + + kvfree(rx); + cfg->rx = NULL; } void gve_rx_write_doorbell(struct gve_priv *priv, struct gve_rx_ring *rx) @@ -502,7 +595,7 @@ static struct sk_buff *gve_rx_copy_to_pool(struct gve_rx_ring *rx, copy_page_info->pad = page_info->pad; skb = gve_rx_add_frags(napi, copy_page_info, - rx->packet_buffer_size, len, ctx); + copy_page_info->buf_size, len, ctx); if (unlikely(!skb)) return NULL; @@ -542,7 +635,8 @@ gve_rx_qpl(struct device *dev, struct net_device *netdev, * device. */ if (page_info->can_flip) { - skb = gve_rx_add_frags(napi, page_info, rx->packet_buffer_size, len, ctx); + skb = gve_rx_add_frags(napi, page_info, page_info->buf_size, + len, ctx); /* No point in recycling if we didn't get the skb */ if (skb) { /* Make sure that the page isn't freed. */ @@ -592,7 +686,7 @@ static struct sk_buff *gve_rx_skb(struct gve_priv *priv, struct gve_rx_ring *rx, skb = gve_rx_raw_addressing(&priv->pdev->dev, netdev, page_info, len, napi, data_slot, - rx->packet_buffer_size, ctx); + page_info->buf_size, ctx); } else { skb = gve_rx_qpl(&priv->pdev->dev, netdev, rx, page_info, len, napi, data_slot); @@ -767,7 +861,7 @@ static void gve_rx(struct gve_rx_ring *rx, netdev_features_t feat, void *old_data; int xdp_act; - xdp_init_buff(&xdp, rx->packet_buffer_size, &rx->xdp_rxq); + xdp_init_buff(&xdp, page_info->buf_size, &rx->xdp_rxq); xdp_prepare_buff(&xdp, page_info->page_address + page_info->page_offset, GVE_RX_PAD, len, false); @@ -896,10 +990,7 @@ static bool gve_rx_refill_buffers(struct gve_priv *priv, struct gve_rx_ring *rx) gve_rx_free_buffer(dev, page_info, data_slot); page_info->page = NULL; if (gve_rx_alloc_buffer(priv, dev, page_info, - data_slot)) { - u64_stats_update_begin(&rx->statss); - rx->rx_buf_alloc_fail++; - u64_stats_update_end(&rx->statss); + data_slot, rx)) { break; } } diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c index f281e42a7ef9..dcb0545baa50 100644 --- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c @@ -16,203 +16,125 @@ #include <net/ipv6.h> #include <net/tcp.h> -static int gve_buf_ref_cnt(struct gve_rx_buf_state_dqo *bs) +static void gve_rx_free_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx) { - return page_count(bs->page_info.page) - bs->page_info.pagecnt_bias; -} - -static void gve_free_page_dqo(struct gve_priv *priv, - struct gve_rx_buf_state_dqo *bs, - bool free_page) -{ - page_ref_sub(bs->page_info.page, bs->page_info.pagecnt_bias - 1); - if (free_page) - gve_free_page(&priv->pdev->dev, bs->page_info.page, bs->addr, - DMA_FROM_DEVICE); - bs->page_info.page = NULL; -} - -static struct gve_rx_buf_state_dqo *gve_alloc_buf_state(struct gve_rx_ring *rx) -{ - struct gve_rx_buf_state_dqo *buf_state; - s16 buffer_id; - - buffer_id = rx->dqo.free_buf_states; - if (unlikely(buffer_id == -1)) - return NULL; - - buf_state = &rx->dqo.buf_states[buffer_id]; - - /* Remove buf_state from free list */ - rx->dqo.free_buf_states = buf_state->next; - - /* Point buf_state to itself to mark it as allocated */ - buf_state->next = buffer_id; - - return buf_state; -} - -static bool gve_buf_state_is_allocated(struct gve_rx_ring *rx, - struct gve_rx_buf_state_dqo *buf_state) -{ - s16 buffer_id = buf_state - rx->dqo.buf_states; - - return buf_state->next == buffer_id; -} - -static void gve_free_buf_state(struct gve_rx_ring *rx, - struct gve_rx_buf_state_dqo *buf_state) -{ - s16 buffer_id = buf_state - rx->dqo.buf_states; + struct device *hdev = &priv->pdev->dev; + int buf_count = rx->dqo.bufq.mask + 1; - buf_state->next = rx->dqo.free_buf_states; - rx->dqo.free_buf_states = buffer_id; + if (rx->dqo.hdr_bufs.data) { + dma_free_coherent(hdev, priv->header_buf_size * buf_count, + rx->dqo.hdr_bufs.data, rx->dqo.hdr_bufs.addr); + rx->dqo.hdr_bufs.data = NULL; + } } -static struct gve_rx_buf_state_dqo * -gve_dequeue_buf_state(struct gve_rx_ring *rx, struct gve_index_list *list) +static void gve_rx_init_ring_state_dqo(struct gve_rx_ring *rx, + const u32 buffer_queue_slots, + const u32 completion_queue_slots) { - struct gve_rx_buf_state_dqo *buf_state; - s16 buffer_id; - - buffer_id = list->head; - if (unlikely(buffer_id == -1)) - return NULL; - - buf_state = &rx->dqo.buf_states[buffer_id]; - - /* Remove buf_state from list */ - list->head = buf_state->next; - if (buf_state->next == -1) - list->tail = -1; - - /* Point buf_state to itself to mark it as allocated */ - buf_state->next = buffer_id; - - return buf_state; -} + int i; -static void gve_enqueue_buf_state(struct gve_rx_ring *rx, - struct gve_index_list *list, - struct gve_rx_buf_state_dqo *buf_state) -{ - s16 buffer_id = buf_state - rx->dqo.buf_states; + /* Set buffer queue state */ + rx->dqo.bufq.mask = buffer_queue_slots - 1; + rx->dqo.bufq.head = 0; + rx->dqo.bufq.tail = 0; - buf_state->next = -1; + /* Set completion queue state */ + rx->dqo.complq.num_free_slots = completion_queue_slots; + rx->dqo.complq.mask = completion_queue_slots - 1; + rx->dqo.complq.cur_gen_bit = 0; + rx->dqo.complq.head = 0; - if (list->head == -1) { - list->head = buffer_id; - list->tail = buffer_id; - } else { - int tail = list->tail; + /* Set RX SKB context */ + rx->ctx.skb_head = NULL; + rx->ctx.skb_tail = NULL; - rx->dqo.buf_states[tail].next = buffer_id; - list->tail = buffer_id; + /* Set up linked list of buffer IDs */ + if (rx->dqo.buf_states) { + for (i = 0; i < rx->dqo.num_buf_states - 1; i++) + rx->dqo.buf_states[i].next = i + 1; + rx->dqo.buf_states[rx->dqo.num_buf_states - 1].next = -1; } + + rx->dqo.free_buf_states = 0; + rx->dqo.recycled_buf_states.head = -1; + rx->dqo.recycled_buf_states.tail = -1; + rx->dqo.used_buf_states.head = -1; + rx->dqo.used_buf_states.tail = -1; } -static struct gve_rx_buf_state_dqo * -gve_get_recycled_buf_state(struct gve_rx_ring *rx) +static void gve_rx_reset_ring_dqo(struct gve_priv *priv, int idx) { - struct gve_rx_buf_state_dqo *buf_state; + struct gve_rx_ring *rx = &priv->rx[idx]; + size_t size; int i; - /* Recycled buf states are immediately usable. */ - buf_state = gve_dequeue_buf_state(rx, &rx->dqo.recycled_buf_states); - if (likely(buf_state)) - return buf_state; - - if (unlikely(rx->dqo.used_buf_states.head == -1)) - return NULL; + const u32 buffer_queue_slots = priv->rx_desc_cnt; + const u32 completion_queue_slots = priv->rx_desc_cnt; - /* Used buf states are only usable when ref count reaches 0, which means - * no SKBs refer to them. - * - * Search a limited number before giving up. - */ - for (i = 0; i < 5; i++) { - buf_state = gve_dequeue_buf_state(rx, &rx->dqo.used_buf_states); - if (gve_buf_ref_cnt(buf_state) == 0) { - rx->dqo.used_buf_states_cnt--; - return buf_state; - } + /* Reset buffer queue */ + if (rx->dqo.bufq.desc_ring) { + size = sizeof(rx->dqo.bufq.desc_ring[0]) * + buffer_queue_slots; + memset(rx->dqo.bufq.desc_ring, 0, size); + } - gve_enqueue_buf_state(rx, &rx->dqo.used_buf_states, buf_state); + /* Reset completion queue */ + if (rx->dqo.complq.desc_ring) { + size = sizeof(rx->dqo.complq.desc_ring[0]) * + completion_queue_slots; + memset(rx->dqo.complq.desc_ring, 0, size); } - /* For QPL, we cannot allocate any new buffers and must - * wait for the existing ones to be available. - */ - if (rx->dqo.qpl) - return NULL; + /* Reset q_resources */ + if (rx->q_resources) + memset(rx->q_resources, 0, sizeof(*rx->q_resources)); - /* If there are no free buf states discard an entry from - * `used_buf_states` so it can be used. - */ - if (unlikely(rx->dqo.free_buf_states == -1)) { - buf_state = gve_dequeue_buf_state(rx, &rx->dqo.used_buf_states); - if (gve_buf_ref_cnt(buf_state) == 0) - return buf_state; + /* Reset buf states */ + if (rx->dqo.buf_states) { + for (i = 0; i < rx->dqo.num_buf_states; i++) { + struct gve_rx_buf_state_dqo *bs = &rx->dqo.buf_states[i]; - gve_free_page_dqo(rx->gve, buf_state, true); - gve_free_buf_state(rx, buf_state); + if (rx->dqo.page_pool) + gve_free_to_page_pool(rx, bs, false); + else + gve_free_qpl_page_dqo(bs); + } } - return NULL; + gve_rx_init_ring_state_dqo(rx, buffer_queue_slots, + completion_queue_slots); } -static int gve_alloc_page_dqo(struct gve_rx_ring *rx, - struct gve_rx_buf_state_dqo *buf_state) +void gve_rx_stop_ring_dqo(struct gve_priv *priv, int idx) { - struct gve_priv *priv = rx->gve; - u32 idx; - - if (!rx->dqo.qpl) { - int err; - - err = gve_alloc_page(priv, &priv->pdev->dev, - &buf_state->page_info.page, - &buf_state->addr, - DMA_FROM_DEVICE, GFP_ATOMIC); - if (err) - return err; - } else { - idx = rx->dqo.next_qpl_page_idx; - if (idx >= priv->rx_pages_per_qpl) { - net_err_ratelimited("%s: Out of QPL pages\n", - priv->dev->name); - return -ENOMEM; - } - buf_state->page_info.page = rx->dqo.qpl->pages[idx]; - buf_state->addr = rx->dqo.qpl->page_buses[idx]; - rx->dqo.next_qpl_page_idx++; - } - buf_state->page_info.page_offset = 0; - buf_state->page_info.page_address = - page_address(buf_state->page_info.page); - buf_state->last_single_ref_offset = 0; + int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); + struct gve_rx_ring *rx = &priv->rx[idx]; - /* The page already has 1 ref. */ - page_ref_add(buf_state->page_info.page, INT_MAX - 1); - buf_state->page_info.pagecnt_bias = INT_MAX; + if (!gve_rx_was_added_to_block(priv, idx)) + return; - return 0; + if (rx->dqo.page_pool) + page_pool_disable_direct_recycling(rx->dqo.page_pool); + gve_remove_napi(priv, ntfy_idx); + gve_rx_remove_from_block(priv, idx); + gve_rx_reset_ring_dqo(priv, idx); } -static void gve_rx_free_ring_dqo(struct gve_priv *priv, int idx) +void gve_rx_free_ring_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, + struct gve_rx_alloc_rings_cfg *cfg) { - struct gve_rx_ring *rx = &priv->rx[idx]; struct device *hdev = &priv->pdev->dev; size_t completion_queue_slots; size_t buffer_queue_slots; + int idx = rx->q_num; size_t size; + u32 qpl_id; int i; completion_queue_slots = rx->dqo.complq.mask + 1; buffer_queue_slots = rx->dqo.bufq.mask + 1; - gve_rx_remove_from_block(priv, idx); - if (rx->q_resources) { dma_free_coherent(hdev, sizeof(*rx->q_resources), rx->q_resources, rx->q_resources_bus); @@ -221,12 +143,16 @@ static void gve_rx_free_ring_dqo(struct gve_priv *priv, int idx) for (i = 0; i < rx->dqo.num_buf_states; i++) { struct gve_rx_buf_state_dqo *bs = &rx->dqo.buf_states[i]; - /* Only free page for RDA. QPL pages are freed in gve_main. */ - if (bs->page_info.page) - gve_free_page_dqo(priv, bs, !rx->dqo.qpl); + + if (rx->dqo.page_pool) + gve_free_to_page_pool(rx, bs, false); + else + gve_free_qpl_page_dqo(bs); } + if (rx->dqo.qpl) { - gve_unassign_qpl(priv, rx->dqo.qpl->id); + qpl_id = gve_get_rx_qpl_id(cfg->qcfg_tx, rx->q_num); + gve_free_queue_page_list(priv, rx->dqo.qpl, qpl_id); rx->dqo.qpl = NULL; } @@ -248,50 +174,78 @@ static void gve_rx_free_ring_dqo(struct gve_priv *priv, int idx) kvfree(rx->dqo.buf_states); rx->dqo.buf_states = NULL; + if (rx->dqo.page_pool) { + page_pool_destroy(rx->dqo.page_pool); + rx->dqo.page_pool = NULL; + } + + gve_rx_free_hdr_bufs(priv, rx); + netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx); } -static int gve_rx_alloc_ring_dqo(struct gve_priv *priv, int idx) +static int gve_rx_alloc_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx, + const u32 buf_count) +{ + struct device *hdev = &priv->pdev->dev; + + rx->dqo.hdr_bufs.data = dma_alloc_coherent(hdev, priv->header_buf_size * buf_count, + &rx->dqo.hdr_bufs.addr, GFP_KERNEL); + if (!rx->dqo.hdr_bufs.data) + return -ENOMEM; + + return 0; +} + +void gve_rx_start_ring_dqo(struct gve_priv *priv, int idx) +{ + int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); + + gve_rx_add_to_block(priv, idx); + gve_add_napi(priv, ntfy_idx, gve_napi_poll_dqo); +} + +int gve_rx_alloc_ring_dqo(struct gve_priv *priv, + struct gve_rx_alloc_rings_cfg *cfg, + struct gve_rx_ring *rx, + int idx) { - struct gve_rx_ring *rx = &priv->rx[idx]; struct device *hdev = &priv->pdev->dev; + struct page_pool *pool; + int qpl_page_cnt; size_t size; - int i; + u32 qpl_id; - const u32 buffer_queue_slots = - priv->queue_format == GVE_DQO_RDA_FORMAT ? - priv->options_dqo_rda.rx_buff_ring_entries : priv->rx_desc_cnt; - const u32 completion_queue_slots = priv->rx_desc_cnt; + const u32 buffer_queue_slots = cfg->ring_size; + const u32 completion_queue_slots = cfg->ring_size; netif_dbg(priv, drv, priv->dev, "allocating rx ring DQO\n"); memset(rx, 0, sizeof(*rx)); rx->gve = priv; rx->q_num = idx; - rx->dqo.bufq.mask = buffer_queue_slots - 1; - rx->dqo.complq.num_free_slots = completion_queue_slots; - rx->dqo.complq.mask = completion_queue_slots - 1; - rx->ctx.skb_head = NULL; - rx->ctx.skb_tail = NULL; + rx->packet_buffer_size = cfg->packet_buffer_size; + + if (cfg->xdp) { + rx->packet_buffer_truesize = GVE_XDP_RX_BUFFER_SIZE_DQO; + rx->rx_headroom = XDP_PACKET_HEADROOM; + } else { + rx->packet_buffer_truesize = rx->packet_buffer_size; + rx->rx_headroom = 0; + } - rx->dqo.num_buf_states = priv->queue_format == GVE_DQO_RDA_FORMAT ? - min_t(s16, S16_MAX, buffer_queue_slots * 4) : - priv->rx_pages_per_qpl; + rx->dqo.num_buf_states = cfg->raw_addressing ? buffer_queue_slots : + gve_get_rx_pages_per_qpl_dqo(cfg->ring_size); rx->dqo.buf_states = kvcalloc(rx->dqo.num_buf_states, sizeof(rx->dqo.buf_states[0]), GFP_KERNEL); if (!rx->dqo.buf_states) return -ENOMEM; - /* Set up linked list of buffer IDs */ - for (i = 0; i < rx->dqo.num_buf_states - 1; i++) - rx->dqo.buf_states[i].next = i + 1; - - rx->dqo.buf_states[rx->dqo.num_buf_states - 1].next = -1; - rx->dqo.recycled_buf_states.head = -1; - rx->dqo.recycled_buf_states.tail = -1; - rx->dqo.used_buf_states.head = -1; - rx->dqo.used_buf_states.tail = -1; + /* Allocate header buffers for header-split */ + if (cfg->enable_header_split) + if (gve_rx_alloc_hdr_bufs(priv, rx, buffer_queue_slots)) + goto err; /* Allocate RX completion queue */ size = sizeof(rx->dqo.complq.desc_ring[0]) * @@ -308,8 +262,18 @@ static int gve_rx_alloc_ring_dqo(struct gve_priv *priv, int idx) if (!rx->dqo.bufq.desc_ring) goto err; - if (priv->queue_format != GVE_DQO_RDA_FORMAT) { - rx->dqo.qpl = gve_assign_rx_qpl(priv, rx->q_num); + if (cfg->raw_addressing) { + pool = gve_rx_create_page_pool(priv, rx, cfg->xdp); + if (IS_ERR(pool)) + goto err; + + rx->dqo.page_pool = pool; + } else { + qpl_id = gve_get_rx_qpl_id(cfg->qcfg_tx, rx->q_num); + qpl_page_cnt = gve_get_rx_pages_per_qpl_dqo(cfg->ring_size); + + rx->dqo.qpl = gve_alloc_queue_page_list(priv, qpl_id, + qpl_page_cnt); if (!rx->dqo.qpl) goto err; rx->dqo.next_qpl_page_idx = 0; @@ -320,12 +284,13 @@ static int gve_rx_alloc_ring_dqo(struct gve_priv *priv, int idx) if (!rx->q_resources) goto err; - gve_rx_add_to_block(priv, idx); + gve_rx_init_ring_state_dqo(rx, buffer_queue_slots, + completion_queue_slots); return 0; err: - gve_rx_free_ring_dqo(priv, idx); + gve_rx_free_ring_dqo(priv, rx, cfg); return -ENOMEM; } @@ -337,13 +302,20 @@ void gve_rx_write_doorbell_dqo(const struct gve_priv *priv, int queue_idx) iowrite32(rx->dqo.bufq.tail, &priv->db_bar2[index]); } -int gve_rx_alloc_rings_dqo(struct gve_priv *priv) +int gve_rx_alloc_rings_dqo(struct gve_priv *priv, + struct gve_rx_alloc_rings_cfg *cfg) { - int err = 0; + struct gve_rx_ring *rx; + int err; int i; - for (i = 0; i < priv->rx_cfg.num_queues; i++) { - err = gve_rx_alloc_ring_dqo(priv, i); + rx = kvcalloc(cfg->qcfg_rx->max_queues, sizeof(struct gve_rx_ring), + GFP_KERNEL); + if (!rx) + return -ENOMEM; + + for (i = 0; i < cfg->qcfg_rx->num_queues; i++) { + err = gve_rx_alloc_ring_dqo(priv, cfg, &rx[i], i); if (err) { netif_err(priv, drv, priv->dev, "Failed to alloc rx ring=%d: err=%d\n", @@ -352,21 +324,30 @@ int gve_rx_alloc_rings_dqo(struct gve_priv *priv) } } + cfg->rx = rx; return 0; err: for (i--; i >= 0; i--) - gve_rx_free_ring_dqo(priv, i); - + gve_rx_free_ring_dqo(priv, &rx[i], cfg); + kvfree(rx); return err; } -void gve_rx_free_rings_dqo(struct gve_priv *priv) +void gve_rx_free_rings_dqo(struct gve_priv *priv, + struct gve_rx_alloc_rings_cfg *cfg) { + struct gve_rx_ring *rx = cfg->rx; int i; - for (i = 0; i < priv->rx_cfg.num_queues; i++) - gve_rx_free_ring_dqo(priv, i); + if (!rx) + return; + + for (i = 0; i < cfg->qcfg_rx->num_queues; i++) + gve_rx_free_ring_dqo(priv, &rx[i], cfg); + + kvfree(rx); + cfg->rx = NULL; } void gve_rx_post_buffers_dqo(struct gve_rx_ring *rx) @@ -384,26 +365,18 @@ void gve_rx_post_buffers_dqo(struct gve_rx_ring *rx) num_avail_slots = min_t(u32, num_avail_slots, complq->num_free_slots); while (num_posted < num_avail_slots) { struct gve_rx_desc_dqo *desc = &bufq->desc_ring[bufq->tail]; - struct gve_rx_buf_state_dqo *buf_state; - - buf_state = gve_get_recycled_buf_state(rx); - if (unlikely(!buf_state)) { - buf_state = gve_alloc_buf_state(rx); - if (unlikely(!buf_state)) - break; - - if (unlikely(gve_alloc_page_dqo(rx, buf_state))) { - u64_stats_update_begin(&rx->statss); - rx->rx_buf_alloc_fail++; - u64_stats_update_end(&rx->statss); - gve_free_buf_state(rx, buf_state); - break; - } + + if (unlikely(gve_alloc_buffer(rx, desc))) { + u64_stats_update_begin(&rx->statss); + rx->rx_buf_alloc_fail++; + u64_stats_update_end(&rx->statss); + break; } - desc->buf_id = cpu_to_le16(buf_state - rx->dqo.buf_states); - desc->buf_addr = cpu_to_le64(buf_state->addr + - buf_state->page_info.page_offset); + if (rx->dqo.hdr_bufs.data) + desc->header_buf_addr = + cpu_to_le64(rx->dqo.hdr_bufs.addr + + priv->header_buf_size * bufq->tail); bufq->tail = (bufq->tail + 1) & bufq->mask; complq->num_free_slots--; @@ -416,48 +389,6 @@ void gve_rx_post_buffers_dqo(struct gve_rx_ring *rx) rx->fill_cnt += num_posted; } -static void gve_try_recycle_buf(struct gve_priv *priv, struct gve_rx_ring *rx, - struct gve_rx_buf_state_dqo *buf_state) -{ - const int data_buffer_size = priv->data_buffer_size_dqo; - int pagecount; - - /* Can't reuse if we only fit one buffer per page */ - if (data_buffer_size * 2 > PAGE_SIZE) - goto mark_used; - - pagecount = gve_buf_ref_cnt(buf_state); - - /* Record the offset when we have a single remaining reference. - * - * When this happens, we know all of the other offsets of the page are - * usable. - */ - if (pagecount == 1) { - buf_state->last_single_ref_offset = - buf_state->page_info.page_offset; - } - - /* Use the next buffer sized chunk in the page. */ - buf_state->page_info.page_offset += data_buffer_size; - buf_state->page_info.page_offset &= (PAGE_SIZE - 1); - - /* If we wrap around to the same offset without ever dropping to 1 - * reference, then we don't know if this offset was ever freed. - */ - if (buf_state->page_info.page_offset == - buf_state->last_single_ref_offset) { - goto mark_used; - } - - gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state); - return; - -mark_used: - gve_enqueue_buf_state(rx, &rx->dqo.used_buf_states, buf_state); - rx->dqo.used_buf_states_cnt++; -} - static void gve_rx_skb_csum(struct sk_buff *skb, const struct gve_rx_compl_desc_dqo *desc, struct gve_ptype ptype) @@ -506,11 +437,13 @@ static void gve_rx_skb_hash(struct sk_buff *skb, skb_set_hash(skb, le32_to_cpu(compl_desc->hash), hash_type); } -static void gve_rx_free_skb(struct gve_rx_ring *rx) +static void gve_rx_free_skb(struct napi_struct *napi, struct gve_rx_ring *rx) { if (!rx->ctx.skb_head) return; + if (rx->ctx.skb_head == napi->skb) + napi->skb = NULL; dev_kfree_skb_any(rx->ctx.skb_head); rx->ctx.skb_head = NULL; rx->ctx.skb_tail = NULL; @@ -553,6 +486,25 @@ static int gve_rx_copy_ondemand(struct gve_rx_ring *rx, return 0; } +static void gve_skb_add_rx_frag(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state, + int num_frags, u16 buf_len) +{ + if (rx->dqo.page_pool) { + skb_add_rx_frag_netmem(rx->ctx.skb_tail, num_frags, + buf_state->page_info.netmem, + buf_state->page_info.page_offset + + buf_state->page_info.pad, buf_len, + buf_state->page_info.buf_size); + } else { + skb_add_rx_frag(rx->ctx.skb_tail, num_frags, + buf_state->page_info.page, + buf_state->page_info.page_offset + + buf_state->page_info.pad, buf_len, + buf_state->page_info.buf_size); + } +} + /* Chains multi skbs for single rx packet. * Returns 0 if buffer is appended, -1 otherwise. */ @@ -570,6 +522,9 @@ static int gve_rx_append_frags(struct napi_struct *napi, if (!skb) return -1; + if (rx->dqo.page_pool) + skb_mark_for_recycle(skb); + if (rx->ctx.skb_tail == rx->ctx.skb_head) skb_shinfo(rx->ctx.skb_head)->frag_list = skb; else @@ -580,39 +535,58 @@ static int gve_rx_append_frags(struct napi_struct *napi, if (rx->ctx.skb_tail != rx->ctx.skb_head) { rx->ctx.skb_head->len += buf_len; rx->ctx.skb_head->data_len += buf_len; - rx->ctx.skb_head->truesize += priv->data_buffer_size_dqo; + rx->ctx.skb_head->truesize += buf_state->page_info.buf_size; } /* Trigger ondemand page allocation if we are running low on buffers */ if (gve_rx_should_trigger_copy_ondemand(rx)) return gve_rx_copy_ondemand(rx, buf_state, buf_len); - skb_add_rx_frag(rx->ctx.skb_tail, num_frags, - buf_state->page_info.page, - buf_state->page_info.page_offset, - buf_len, priv->data_buffer_size_dqo); - gve_dec_pagecnt_bias(&buf_state->page_info); - - /* Advances buffer page-offset if page is partially used. - * Marks buffer as used if page is full. - */ - gve_try_recycle_buf(priv, rx, buf_state); + gve_skb_add_rx_frag(rx, buf_state, num_frags, buf_len); + gve_reuse_buffer(rx, buf_state); return 0; } +static void gve_xdp_done_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, + struct xdp_buff *xdp, struct bpf_prog *xprog, + int xdp_act, + struct gve_rx_buf_state_dqo *buf_state) +{ + u64_stats_update_begin(&rx->statss); + switch (xdp_act) { + case XDP_ABORTED: + case XDP_DROP: + default: + rx->xdp_actions[xdp_act]++; + break; + case XDP_TX: + rx->xdp_tx_errors++; + break; + case XDP_REDIRECT: + rx->xdp_redirect_errors++; + break; + } + u64_stats_update_end(&rx->statss); + gve_free_buffer(rx, buf_state); +} + /* Returns 0 if descriptor is completed successfully. * Returns -EINVAL if descriptor is invalid. * Returns -ENOMEM if data cannot be copied to skb. */ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, const struct gve_rx_compl_desc_dqo *compl_desc, - int queue_idx) + u32 desc_idx, int queue_idx) { const u16 buffer_id = le16_to_cpu(compl_desc->buf_id); + const bool hbo = compl_desc->header_buffer_overflow; const bool eop = compl_desc->end_of_packet != 0; + const bool hsplit = compl_desc->split_header; struct gve_rx_buf_state_dqo *buf_state; struct gve_priv *priv = rx->gve; + struct bpf_prog *xprog; u16 buf_len; + u16 hdr_len; if (unlikely(buffer_id >= rx->dqo.num_buf_states)) { net_err_ratelimited("%s: Invalid RX buffer_id=%u\n", @@ -627,21 +601,52 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, } if (unlikely(compl_desc->rx_error)) { - gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, - buf_state); + gve_free_buffer(rx, buf_state); return -EINVAL; } buf_len = compl_desc->packet_len; + hdr_len = compl_desc->header_len; /* Page might have not been used for awhile and was likely last written * by a different thread. */ - prefetch(buf_state->page_info.page); + if (rx->dqo.page_pool) { + if (!netmem_is_net_iov(buf_state->page_info.netmem)) + prefetch(netmem_to_page(buf_state->page_info.netmem)); + } else { + prefetch(buf_state->page_info.page); + } + + /* Copy the header into the skb in the case of header split */ + if (hsplit) { + int unsplit = 0; + + if (hdr_len && !hbo) { + rx->ctx.skb_head = gve_rx_copy_data(priv->dev, napi, + rx->dqo.hdr_bufs.data + + desc_idx * priv->header_buf_size, + hdr_len); + if (unlikely(!rx->ctx.skb_head)) + goto error; + rx->ctx.skb_tail = rx->ctx.skb_head; + + if (rx->dqo.page_pool) + skb_mark_for_recycle(rx->ctx.skb_head); + } else { + unsplit = 1; + } + u64_stats_update_begin(&rx->statss); + rx->rx_hsplit_pkt++; + rx->rx_hsplit_unsplit_pkt += unsplit; + rx->rx_hsplit_bytes += hdr_len; + u64_stats_update_end(&rx->statss); + } /* Sync the portion of dma buffer for CPU to read. */ dma_sync_single_range_for_cpu(&priv->pdev->dev, buf_state->addr, - buf_state->page_info.page_offset, + buf_state->page_info.page_offset + + buf_state->page_info.pad, buf_len, DMA_FROM_DEVICE); /* Append to current skb if one exists. */ @@ -653,6 +658,34 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, return 0; } + xprog = READ_ONCE(priv->xdp_prog); + if (xprog) { + struct xdp_buff xdp; + void *old_data; + int xdp_act; + + xdp_init_buff(&xdp, buf_state->page_info.buf_size, + &rx->xdp_rxq); + xdp_prepare_buff(&xdp, + buf_state->page_info.page_address + + buf_state->page_info.page_offset, + buf_state->page_info.pad, + buf_len, false); + old_data = xdp.data; + xdp_act = bpf_prog_run_xdp(xprog, &xdp); + buf_state->page_info.pad += xdp.data - old_data; + buf_len = xdp.data_end - xdp.data; + if (xdp_act != XDP_PASS) { + gve_xdp_done_dqo(priv, rx, &xdp, xprog, xdp_act, + buf_state); + return 0; + } + + u64_stats_update_begin(&rx->statss); + rx->xdp_actions[XDP_PASS]++; + u64_stats_update_end(&rx->statss); + } + if (eop && buf_len <= priv->rx_copybreak) { rx->ctx.skb_head = gve_rx_copy(priv->dev, napi, &buf_state->page_info, buf_len); @@ -665,8 +698,7 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, rx->rx_copybreak_pkt++; u64_stats_update_end(&rx->statss); - gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, - buf_state); + gve_free_buffer(rx, buf_state); return 0; } @@ -681,16 +713,15 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, return 0; } - skb_add_rx_frag(rx->ctx.skb_head, 0, buf_state->page_info.page, - buf_state->page_info.page_offset, buf_len, - priv->data_buffer_size_dqo); - gve_dec_pagecnt_bias(&buf_state->page_info); + if (rx->dqo.page_pool) + skb_mark_for_recycle(rx->ctx.skb_head); - gve_try_recycle_buf(priv, rx, buf_state); + gve_skb_add_rx_frag(rx, buf_state, 0, buf_len); + gve_reuse_buffer(rx, buf_state); return 0; error: - gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state); + gve_free_buffer(rx, buf_state); return -ENOMEM; } @@ -781,9 +812,9 @@ int gve_rx_poll_dqo(struct gve_notify_block *block, int budget) /* Do not read data until we own the descriptor */ dma_rmb(); - err = gve_rx_dqo(napi, rx, compl_desc, rx->q_num); + err = gve_rx_dqo(napi, rx, compl_desc, complq->head, rx->q_num); if (err < 0) { - gve_rx_free_skb(rx); + gve_rx_free_skb(napi, rx); u64_stats_update_begin(&rx->statss); if (err == -ENOMEM) rx->rx_skb_alloc_fail++; @@ -826,7 +857,7 @@ int gve_rx_poll_dqo(struct gve_notify_block *block, int budget) /* gve_rx_complete_skb() will consume skb if successful */ if (gve_rx_complete_skb(rx, napi, compl_desc, feat) != 0) { - gve_rx_free_skb(rx); + gve_rx_free_skb(napi, rx); u64_stats_update_begin(&rx->statss); rx->rx_desc_err_dropped_pkt++; u64_stats_update_end(&rx->statss); diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c index 07ba124780df..1b40bf0c811a 100644 --- a/drivers/net/ethernet/google/gve/gve_tx.c +++ b/drivers/net/ethernet/google/gve/gve_tx.c @@ -158,15 +158,16 @@ static int gve_clean_xdp_done(struct gve_priv *priv, struct gve_tx_ring *tx, u32 to_do) { struct gve_tx_buffer_state *info; - u32 clean_end = tx->done + to_do; u64 pkts = 0, bytes = 0; size_t space_freed = 0; u32 xsk_complete = 0; u32 idx; + int i; - for (; tx->done < clean_end; tx->done++) { + for (i = 0; i < to_do; i++) { idx = tx->done & tx->mask; info = &tx->info[idx]; + tx->done++; if (unlikely(!info->xdp.size)) continue; @@ -196,29 +197,43 @@ static int gve_clean_xdp_done(struct gve_priv *priv, struct gve_tx_ring *tx, static int gve_clean_tx_done(struct gve_priv *priv, struct gve_tx_ring *tx, u32 to_do, bool try_to_wake); -static void gve_tx_free_ring(struct gve_priv *priv, int idx) +void gve_tx_stop_ring_gqi(struct gve_priv *priv, int idx) { + int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); struct gve_tx_ring *tx = &priv->tx[idx]; + + if (!gve_tx_was_added_to_block(priv, idx)) + return; + + gve_remove_napi(priv, ntfy_idx); + if (tx->q_num < priv->tx_cfg.num_queues) + gve_clean_tx_done(priv, tx, priv->tx_desc_cnt, false); + else + gve_clean_xdp_done(priv, tx, priv->tx_desc_cnt); + netdev_tx_reset_queue(tx->netdev_txq); + gve_tx_remove_from_block(priv, idx); +} + +static void gve_tx_free_ring_gqi(struct gve_priv *priv, struct gve_tx_ring *tx, + struct gve_tx_alloc_rings_cfg *cfg) +{ struct device *hdev = &priv->pdev->dev; + int idx = tx->q_num; size_t bytes; + u32 qpl_id; u32 slots; - gve_tx_remove_from_block(priv, idx); slots = tx->mask + 1; - if (tx->q_num < priv->tx_cfg.num_queues) { - gve_clean_tx_done(priv, tx, priv->tx_desc_cnt, false); - netdev_tx_reset_queue(tx->netdev_txq); - } else { - gve_clean_xdp_done(priv, tx, priv->tx_desc_cnt); - } - dma_free_coherent(hdev, sizeof(*tx->q_resources), tx->q_resources, tx->q_resources_bus); tx->q_resources = NULL; - if (!tx->raw_addressing) { - gve_tx_fifo_release(priv, &tx->tx_fifo); - gve_unassign_qpl(priv, tx->tx_fifo.qpl->id); + if (tx->tx_fifo.qpl) { + if (tx->tx_fifo.base) + gve_tx_fifo_release(priv, &tx->tx_fifo); + + qpl_id = gve_tx_qpl_id(priv, tx->q_num); + gve_free_queue_page_list(priv, tx->tx_fifo.qpl, qpl_id); tx->tx_fifo.qpl = NULL; } @@ -232,11 +247,25 @@ static void gve_tx_free_ring(struct gve_priv *priv, int idx) netif_dbg(priv, drv, priv->dev, "freed tx queue %d\n", idx); } -static int gve_tx_alloc_ring(struct gve_priv *priv, int idx) +void gve_tx_start_ring_gqi(struct gve_priv *priv, int idx) { + int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); struct gve_tx_ring *tx = &priv->tx[idx]; + + gve_tx_add_to_block(priv, idx); + + tx->netdev_txq = netdev_get_tx_queue(priv->dev, idx); + gve_add_napi(priv, ntfy_idx, gve_napi_poll); +} + +static int gve_tx_alloc_ring_gqi(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *cfg, + struct gve_tx_ring *tx, + int idx) +{ struct device *hdev = &priv->pdev->dev; - u32 slots = priv->tx_desc_cnt; + int qpl_page_cnt; + u32 qpl_id = 0; size_t bytes; /* Make sure everything is zeroed to start */ @@ -245,25 +274,30 @@ static int gve_tx_alloc_ring(struct gve_priv *priv, int idx) spin_lock_init(&tx->xdp_lock); tx->q_num = idx; - tx->mask = slots - 1; + tx->mask = cfg->ring_size - 1; /* alloc metadata */ - tx->info = vcalloc(slots, sizeof(*tx->info)); + tx->info = vcalloc(cfg->ring_size, sizeof(*tx->info)); if (!tx->info) return -ENOMEM; /* alloc tx queue */ - bytes = sizeof(*tx->desc) * slots; + bytes = sizeof(*tx->desc) * cfg->ring_size; tx->desc = dma_alloc_coherent(hdev, bytes, &tx->bus, GFP_KERNEL); if (!tx->desc) goto abort_with_info; - tx->raw_addressing = priv->queue_format == GVE_GQI_RDA_FORMAT; - tx->dev = &priv->pdev->dev; + tx->raw_addressing = cfg->raw_addressing; + tx->dev = hdev; if (!tx->raw_addressing) { - tx->tx_fifo.qpl = gve_assign_tx_qpl(priv, idx); + qpl_id = gve_tx_qpl_id(priv, tx->q_num); + qpl_page_cnt = priv->tx_pages_per_qpl; + + tx->tx_fifo.qpl = gve_alloc_queue_page_list(priv, qpl_id, + qpl_page_cnt); if (!tx->tx_fifo.qpl) goto abort_with_desc; + /* map Tx FIFO */ if (gve_tx_fifo_init(priv, &tx->tx_fifo)) goto abort_with_qpl; @@ -277,20 +311,16 @@ static int gve_tx_alloc_ring(struct gve_priv *priv, int idx) if (!tx->q_resources) goto abort_with_fifo; - netif_dbg(priv, drv, priv->dev, "tx[%d]->bus=%lx\n", idx, - (unsigned long)tx->bus); - if (idx < priv->tx_cfg.num_queues) - tx->netdev_txq = netdev_get_tx_queue(priv->dev, idx); - gve_tx_add_to_block(priv, idx); - return 0; abort_with_fifo: if (!tx->raw_addressing) gve_tx_fifo_release(priv, &tx->tx_fifo); abort_with_qpl: - if (!tx->raw_addressing) - gve_unassign_qpl(priv, tx->tx_fifo.qpl->id); + if (!tx->raw_addressing) { + gve_free_queue_page_list(priv, tx->tx_fifo.qpl, qpl_id); + tx->tx_fifo.qpl = NULL; + } abort_with_desc: dma_free_coherent(hdev, bytes, tx->desc, tx->bus); tx->desc = NULL; @@ -300,36 +330,60 @@ abort_with_info: return -ENOMEM; } -int gve_tx_alloc_rings(struct gve_priv *priv, int start_id, int num_rings) +int gve_tx_alloc_rings_gqi(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *cfg) { + struct gve_tx_ring *tx = cfg->tx; + int total_queues; int err = 0; - int i; + int i, j; + + total_queues = cfg->qcfg->num_queues + cfg->num_xdp_rings; + if (total_queues > cfg->qcfg->max_queues) { + netif_err(priv, drv, priv->dev, + "Cannot alloc more than the max num of Tx rings\n"); + return -EINVAL; + } + + tx = kvcalloc(cfg->qcfg->max_queues, sizeof(struct gve_tx_ring), + GFP_KERNEL); + if (!tx) + return -ENOMEM; - for (i = start_id; i < start_id + num_rings; i++) { - err = gve_tx_alloc_ring(priv, i); + for (i = 0; i < total_queues; i++) { + err = gve_tx_alloc_ring_gqi(priv, cfg, &tx[i], i); if (err) { netif_err(priv, drv, priv->dev, "Failed to alloc tx ring=%d: err=%d\n", i, err); - break; + goto cleanup; } } - /* Unallocate if there was an error */ - if (err) { - int j; - for (j = start_id; j < i; j++) - gve_tx_free_ring(priv, j); - } + cfg->tx = tx; + return 0; + +cleanup: + for (j = 0; j < i; j++) + gve_tx_free_ring_gqi(priv, &tx[j], cfg); + kvfree(tx); return err; } -void gve_tx_free_rings_gqi(struct gve_priv *priv, int start_id, int num_rings) +void gve_tx_free_rings_gqi(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *cfg) { + struct gve_tx_ring *tx = cfg->tx; int i; - for (i = start_id; i < start_id + num_rings; i++) - gve_tx_free_ring(priv, i); + if (!tx) + return; + + for (i = 0; i < cfg->qcfg->num_queues + cfg->qcfg->num_xdp_queues; i++) + gve_tx_free_ring_gqi(priv, &tx[i], cfg); + + kvfree(tx); + cfg->tx = NULL; } /* gve_tx_avail - Calculates the number of slots available in the ring @@ -776,11 +830,14 @@ int gve_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, struct gve_tx_ring *tx; int i, err = 0, qid; - if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) + if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK) || !priv->xdp_prog) return -EINVAL; + if (!gve_get_napi_enabled(priv)) + return -ENETDOWN; + qid = gve_xdp_tx_queue_id(priv, - smp_processor_id() % priv->num_xdp_queues); + smp_processor_id() % priv->tx_cfg.num_xdp_queues); tx = &priv->tx[qid]; @@ -895,14 +952,10 @@ static int gve_xsk_tx(struct gve_priv *priv, struct gve_tx_ring *tx, spin_lock(&tx->xdp_lock); while (sent < budget) { - if (!gve_can_tx(tx, GVE_TX_START_THRESH)) + if (!gve_can_tx(tx, GVE_TX_START_THRESH) || + !xsk_tx_peek_desc(tx->xsk_pool, &desc)) goto out; - if (!xsk_tx_peek_desc(tx->xsk_pool, &desc)) { - tx->xdp_xsk_done = tx->xdp_xsk_wakeup; - goto out; - } - data = xsk_buff_raw_get_data(tx->xsk_pool, desc.addr); nsegs = gve_tx_fill_xdp(priv, tx, data, desc.len, NULL, true); tx->req += nsegs; @@ -917,33 +970,41 @@ out: return sent; } +int gve_xsk_tx_poll(struct gve_notify_block *rx_block, int budget) +{ + struct gve_rx_ring *rx = rx_block->rx; + struct gve_priv *priv = rx->gve; + struct gve_tx_ring *tx; + int sent = 0; + + tx = &priv->tx[gve_xdp_tx_queue_id(priv, rx->q_num)]; + if (tx->xsk_pool) { + sent = gve_xsk_tx(priv, tx, budget); + + u64_stats_update_begin(&tx->statss); + tx->xdp_xsk_sent += sent; + u64_stats_update_end(&tx->statss); + if (xsk_uses_need_wakeup(tx->xsk_pool)) + xsk_set_tx_need_wakeup(tx->xsk_pool); + } + + return sent; +} + bool gve_xdp_poll(struct gve_notify_block *block, int budget) { struct gve_priv *priv = block->priv; struct gve_tx_ring *tx = block->tx; u32 nic_done; - bool repoll; u32 to_do; /* Find out how much work there is to be done */ nic_done = gve_tx_load_event_counter(priv, tx); to_do = min_t(u32, (nic_done - tx->done), budget); gve_clean_xdp_done(priv, tx, to_do); - repoll = nic_done != tx->done; - - if (tx->xsk_pool) { - int sent = gve_xsk_tx(priv, tx, budget); - - u64_stats_update_begin(&tx->statss); - tx->xdp_xsk_sent += sent; - u64_stats_update_end(&tx->statss); - repoll |= (sent == budget); - if (xsk_uses_need_wakeup(tx->xsk_pool)) - xsk_set_tx_need_wakeup(tx->xsk_pool); - } /* If we still have work we want to repoll */ - return repoll; + return nic_done != tx->done; } bool gve_tx_poll(struct gve_notify_block *block, int budget) diff --git a/drivers/net/ethernet/google/gve/gve_tx_dqo.c b/drivers/net/ethernet/google/gve/gve_tx_dqo.c index f59c4710f118..a27f1574a733 100644 --- a/drivers/net/ethernet/google/gve/gve_tx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_tx_dqo.c @@ -188,13 +188,28 @@ static void gve_tx_clean_pending_packets(struct gve_tx_ring *tx) } } -static void gve_tx_free_ring_dqo(struct gve_priv *priv, int idx) +void gve_tx_stop_ring_dqo(struct gve_priv *priv, int idx) { + int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); struct gve_tx_ring *tx = &priv->tx[idx]; - struct device *hdev = &priv->pdev->dev; - size_t bytes; + if (!gve_tx_was_added_to_block(priv, idx)) + return; + + gve_remove_napi(priv, ntfy_idx); + gve_clean_tx_done_dqo(priv, tx, /*napi=*/NULL); + netdev_tx_reset_queue(tx->netdev_txq); + gve_tx_clean_pending_packets(tx); gve_tx_remove_from_block(priv, idx); +} + +static void gve_tx_free_ring_dqo(struct gve_priv *priv, struct gve_tx_ring *tx, + struct gve_tx_alloc_rings_cfg *cfg) +{ + struct device *hdev = &priv->pdev->dev; + int idx = tx->q_num; + size_t bytes; + u32 qpl_id; if (tx->q_resources) { dma_free_coherent(hdev, sizeof(*tx->q_resources), @@ -223,7 +238,8 @@ static void gve_tx_free_ring_dqo(struct gve_priv *priv, int idx) tx->dqo.tx_qpl_buf_next = NULL; if (tx->dqo.qpl) { - gve_unassign_qpl(priv, tx->dqo.qpl->id); + qpl_id = gve_tx_qpl_id(priv, tx->q_num); + gve_free_queue_page_list(priv, tx->dqo.qpl, qpl_id); tx->dqo.qpl = NULL; } @@ -253,25 +269,37 @@ static int gve_tx_qpl_buf_init(struct gve_tx_ring *tx) return 0; } -static int gve_tx_alloc_ring_dqo(struct gve_priv *priv, int idx) +void gve_tx_start_ring_dqo(struct gve_priv *priv, int idx) { + int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); struct gve_tx_ring *tx = &priv->tx[idx]; + + gve_tx_add_to_block(priv, idx); + + tx->netdev_txq = netdev_get_tx_queue(priv->dev, idx); + gve_add_napi(priv, ntfy_idx, gve_napi_poll_dqo); +} + +static int gve_tx_alloc_ring_dqo(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *cfg, + struct gve_tx_ring *tx, + int idx) +{ struct device *hdev = &priv->pdev->dev; int num_pending_packets; + int qpl_page_cnt; size_t bytes; + u32 qpl_id; int i; memset(tx, 0, sizeof(*tx)); tx->q_num = idx; - tx->dev = &priv->pdev->dev; - tx->netdev_txq = netdev_get_tx_queue(priv->dev, idx); + tx->dev = hdev; atomic_set_release(&tx->dqo_compl.hw_tx_head, 0); /* Queue sizes must be a power of 2 */ - tx->mask = priv->tx_desc_cnt - 1; - tx->dqo.complq_mask = priv->queue_format == GVE_DQO_RDA_FORMAT ? - priv->options_dqo_rda.tx_comp_ring_entries - 1 : - tx->mask; + tx->mask = cfg->ring_size - 1; + tx->dqo.complq_mask = tx->mask; /* The max number of pending packets determines the maximum number of * descriptors which maybe written to the completion queue. @@ -327,8 +355,12 @@ static int gve_tx_alloc_ring_dqo(struct gve_priv *priv, int idx) if (!tx->q_resources) goto err; - if (gve_is_qpl(priv)) { - tx->dqo.qpl = gve_assign_tx_qpl(priv, idx); + if (!cfg->raw_addressing) { + qpl_id = gve_tx_qpl_id(priv, tx->q_num); + qpl_page_cnt = priv->tx_pages_per_qpl; + + tx->dqo.qpl = gve_alloc_queue_page_list(priv, qpl_id, + qpl_page_cnt); if (!tx->dqo.qpl) goto err; @@ -336,22 +368,35 @@ static int gve_tx_alloc_ring_dqo(struct gve_priv *priv, int idx) goto err; } - gve_tx_add_to_block(priv, idx); - return 0; err: - gve_tx_free_ring_dqo(priv, idx); + gve_tx_free_ring_dqo(priv, tx, cfg); return -ENOMEM; } -int gve_tx_alloc_rings_dqo(struct gve_priv *priv) +int gve_tx_alloc_rings_dqo(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *cfg) { + struct gve_tx_ring *tx = cfg->tx; + int total_queues; int err = 0; - int i; + int i, j; + + total_queues = cfg->qcfg->num_queues + cfg->num_xdp_rings; + if (total_queues > cfg->qcfg->max_queues) { + netif_err(priv, drv, priv->dev, + "Cannot alloc more than the max num of Tx rings\n"); + return -EINVAL; + } + + tx = kvcalloc(cfg->qcfg->max_queues, sizeof(struct gve_tx_ring), + GFP_KERNEL); + if (!tx) + return -ENOMEM; - for (i = 0; i < priv->tx_cfg.num_queues; i++) { - err = gve_tx_alloc_ring_dqo(priv, i); + for (i = 0; i < total_queues; i++) { + err = gve_tx_alloc_ring_dqo(priv, cfg, &tx[i], i); if (err) { netif_err(priv, drv, priv->dev, "Failed to alloc tx ring=%d: err=%d\n", @@ -360,28 +405,30 @@ int gve_tx_alloc_rings_dqo(struct gve_priv *priv) } } + cfg->tx = tx; return 0; err: - for (i--; i >= 0; i--) - gve_tx_free_ring_dqo(priv, i); - + for (j = 0; j < i; j++) + gve_tx_free_ring_dqo(priv, &tx[j], cfg); + kvfree(tx); return err; } -void gve_tx_free_rings_dqo(struct gve_priv *priv) +void gve_tx_free_rings_dqo(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *cfg) { + struct gve_tx_ring *tx = cfg->tx; int i; - for (i = 0; i < priv->tx_cfg.num_queues; i++) { - struct gve_tx_ring *tx = &priv->tx[i]; + if (!tx) + return; - gve_clean_tx_done_dqo(priv, tx, /*napi=*/NULL); - netdev_tx_reset_queue(tx->netdev_txq); - gve_tx_clean_pending_packets(tx); + for (i = 0; i < cfg->qcfg->num_queues + cfg->qcfg->num_xdp_queues; i++) + gve_tx_free_ring_dqo(priv, &tx[i], cfg); - gve_tx_free_ring_dqo(priv, i); - } + kvfree(tx); + cfg->tx = NULL; } /* Returns the number of slots available in the ring */ @@ -501,28 +548,18 @@ static int gve_prep_tso(struct sk_buff *skb) if (unlikely(skb_shinfo(skb)->gso_size < GVE_TX_MIN_TSO_MSS_DQO)) return -1; + if (!(skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) + return -EINVAL; + /* Needed because we will modify header. */ err = skb_cow_head(skb, 0); if (err < 0) return err; tcp = tcp_hdr(skb); - - /* Remove payload length from checksum. */ paylen = skb->len - skb_transport_offset(skb); - - switch (skb_shinfo(skb)->gso_type) { - case SKB_GSO_TCPV4: - case SKB_GSO_TCPV6: - csum_replace_by_diff(&tcp->check, - (__force __wsum)htonl(paylen)); - - /* Compute length of segmentation header. */ - header_len = skb_tcp_all_headers(skb); - break; - default: - return -EINVAL; - } + csum_replace_by_diff(&tcp->check, (__force __wsum)htonl(paylen)); + header_len = skb_tcp_all_headers(skb); if (unlikely(header_len > GVE_TX_MAX_HDR_SIZE_DQO)) return -EINVAL; @@ -623,7 +660,8 @@ static int gve_tx_add_skb_no_copy_dqo(struct gve_tx_ring *tx, goto err; dma_unmap_len_set(pkt, len[pkt->num_bufs], len); - dma_unmap_addr_set(pkt, dma[pkt->num_bufs], addr); + netmem_dma_unmap_addr_set(skb_frag_netmem(frag), pkt, + dma[pkt->num_bufs], addr); ++pkt->num_bufs; gve_tx_fill_pkt_desc_dqo(tx, desc_idx, skb, len, addr, @@ -822,22 +860,42 @@ static bool gve_can_send_tso(const struct sk_buff *skb) const int header_len = skb_tcp_all_headers(skb); const int gso_size = shinfo->gso_size; int cur_seg_num_bufs; + int prev_frag_size; int cur_seg_size; int i; cur_seg_size = skb_headlen(skb) - header_len; + prev_frag_size = skb_headlen(skb); cur_seg_num_bufs = cur_seg_size > 0; for (i = 0; i < shinfo->nr_frags; i++) { if (cur_seg_size >= gso_size) { cur_seg_size %= gso_size; cur_seg_num_bufs = cur_seg_size > 0; + + if (prev_frag_size > GVE_TX_MAX_BUF_SIZE_DQO) { + int prev_frag_remain = prev_frag_size % + GVE_TX_MAX_BUF_SIZE_DQO; + + /* If the last descriptor of the previous frag + * is less than cur_seg_size, the segment will + * span two descriptors in the previous frag. + * Since max gso size (9728) is less than + * GVE_TX_MAX_BUF_SIZE_DQO, it is impossible + * for the segment to span more than two + * descriptors. + */ + if (prev_frag_remain && + cur_seg_size > prev_frag_remain) + cur_seg_num_bufs++; + } } if (unlikely(++cur_seg_num_bufs > max_bufs_per_seg)) return false; - cur_seg_size += skb_frag_size(&shinfo->frags[i]); + prev_frag_size = skb_frag_size(&shinfo->frags[i]); + cur_seg_size += prev_frag_size; } return true; @@ -981,8 +1039,9 @@ static void gve_unmap_packet(struct device *dev, dma_unmap_single(dev, dma_unmap_addr(pkt, dma[0]), dma_unmap_len(pkt, len[0]), DMA_TO_DEVICE); for (i = 1; i < pkt->num_bufs; i++) { - dma_unmap_page(dev, dma_unmap_addr(pkt, dma[i]), - dma_unmap_len(pkt, len[i]), DMA_TO_DEVICE); + netmem_dma_unmap_page_attrs(dev, dma_unmap_addr(pkt, dma[i]), + dma_unmap_len(pkt, len[i]), + DMA_TO_DEVICE, 0); } pkt->num_bufs = 0; } @@ -1082,8 +1141,7 @@ static void gve_handle_miss_completion(struct gve_priv *priv, /* jiffies can wraparound but time comparisons can handle overflows. */ pending_packet->timeout_jiffies = jiffies + - msecs_to_jiffies(GVE_REINJECT_COMPL_TIMEOUT * - MSEC_PER_SEC); + secs_to_jiffies(GVE_REINJECT_COMPL_TIMEOUT); add_to_list(tx, &tx->dqo_compl.miss_completions, pending_packet); *bytes += pending_packet->skb->len; @@ -1127,8 +1185,7 @@ static void remove_miss_completions(struct gve_priv *priv, pending_packet->state = GVE_PACKET_STATE_TIMED_OUT_COMPL; pending_packet->timeout_jiffies = jiffies + - msecs_to_jiffies(GVE_DEALLOCATE_COMPL_TIMEOUT * - MSEC_PER_SEC); + secs_to_jiffies(GVE_DEALLOCATE_COMPL_TIMEOUT); /* Maintain pending packet in another list so the packet can be * unallocated at a later time. */ diff --git a/drivers/net/ethernet/google/gve/gve_utils.c b/drivers/net/ethernet/google/gve/gve_utils.c index 26e08d753270..ace9b8698021 100644 --- a/drivers/net/ethernet/google/gve/gve_utils.c +++ b/drivers/net/ethernet/google/gve/gve_utils.c @@ -8,6 +8,14 @@ #include "gve_adminq.h" #include "gve_utils.h" +bool gve_tx_was_added_to_block(struct gve_priv *priv, int queue_idx) +{ + struct gve_notify_block *block = + &priv->ntfy_blocks[gve_tx_idx_to_ntfy(priv, queue_idx)]; + + return block->tx != NULL; +} + void gve_tx_remove_from_block(struct gve_priv *priv, int queue_idx) { struct gve_notify_block *block = @@ -30,6 +38,14 @@ void gve_tx_add_to_block(struct gve_priv *priv, int queue_idx) queue_idx); } +bool gve_rx_was_added_to_block(struct gve_priv *priv, int queue_idx) +{ + struct gve_notify_block *block = + &priv->ntfy_blocks[gve_rx_idx_to_ntfy(priv, queue_idx)]; + + return block->rx != NULL; +} + void gve_rx_remove_from_block(struct gve_priv *priv, int queue_idx) { struct gve_notify_block *block = @@ -48,11 +64,9 @@ void gve_rx_add_to_block(struct gve_priv *priv, int queue_idx) rx->ntfy_id = ntfy_idx; } -struct sk_buff *gve_rx_copy(struct net_device *dev, struct napi_struct *napi, - struct gve_rx_slot_page_info *page_info, u16 len) +struct sk_buff *gve_rx_copy_data(struct net_device *dev, struct napi_struct *napi, + u8 *data, u16 len) { - void *va = page_info->page_address + page_info->page_offset + - page_info->pad; struct sk_buff *skb; skb = napi_alloc_skb(napi, len); @@ -60,12 +74,21 @@ struct sk_buff *gve_rx_copy(struct net_device *dev, struct napi_struct *napi, return NULL; __skb_put(skb, len); - skb_copy_to_linear_data_offset(skb, 0, va, len); + skb_copy_to_linear_data_offset(skb, 0, data, len); skb->protocol = eth_type_trans(skb, dev); return skb; } +struct sk_buff *gve_rx_copy(struct net_device *dev, struct napi_struct *napi, + struct gve_rx_slot_page_info *page_info, u16 len) +{ + void *va = page_info->page_address + page_info->page_offset + + page_info->pad; + + return gve_rx_copy_data(dev, napi, va, len); +} + void gve_dec_pagecnt_bias(struct gve_rx_slot_page_info *page_info) { page_info->pagecnt_bias--; @@ -81,3 +104,19 @@ void gve_dec_pagecnt_bias(struct gve_rx_slot_page_info *page_info) page_ref_add(page_info->page, INT_MAX - pagecount); } } + +void gve_add_napi(struct gve_priv *priv, int ntfy_idx, + int (*gve_poll)(struct napi_struct *, int)) +{ + struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; + + netif_napi_add_locked(priv->dev, &block->napi, gve_poll); + netif_napi_set_irq_locked(&block->napi, block->irq); +} + +void gve_remove_napi(struct gve_priv *priv, int ntfy_idx) +{ + struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; + + netif_napi_del_locked(&block->napi); +} diff --git a/drivers/net/ethernet/google/gve/gve_utils.h b/drivers/net/ethernet/google/gve/gve_utils.h index 324fd98a6112..bf2e9a0adb36 100644 --- a/drivers/net/ethernet/google/gve/gve_utils.h +++ b/drivers/net/ethernet/google/gve/gve_utils.h @@ -11,17 +11,25 @@ #include "gve.h" +bool gve_tx_was_added_to_block(struct gve_priv *priv, int queue_idx); void gve_tx_remove_from_block(struct gve_priv *priv, int queue_idx); void gve_tx_add_to_block(struct gve_priv *priv, int queue_idx); +bool gve_rx_was_added_to_block(struct gve_priv *priv, int queue_idx); void gve_rx_remove_from_block(struct gve_priv *priv, int queue_idx); void gve_rx_add_to_block(struct gve_priv *priv, int queue_idx); +struct sk_buff *gve_rx_copy_data(struct net_device *dev, struct napi_struct *napi, + u8 *data, u16 len); + struct sk_buff *gve_rx_copy(struct net_device *dev, struct napi_struct *napi, struct gve_rx_slot_page_info *page_info, u16 len); /* Decrement pagecnt_bias. Set it back to INT_MAX if it reached zero. */ void gve_dec_pagecnt_bias(struct gve_rx_slot_page_info *page_info); +void gve_add_napi(struct gve_priv *priv, int ntfy_idx, + int (*gve_poll)(struct napi_struct *, int)); +void gve_remove_napi(struct gve_priv *priv, int ntfy_idx); #endif /* _GVE_UTILS_H */ |