diff options
Diffstat (limited to 'drivers/net/ethernet/google/gve')
-rw-r--r-- | drivers/net/ethernet/google/gve/Makefile | 3 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve.h | 294 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_adminq.c | 681 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_adminq.h | 213 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c | 316 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_dqo.h | 6 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_ethtool.c | 370 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_flow_rule.c | 298 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_main.c | 1222 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_rx.c | 168 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_rx_dqo.c | 528 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_tx.c | 123 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_tx_dqo.c | 109 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_utils.c | 5 |
14 files changed, 3000 insertions, 1336 deletions
diff --git a/drivers/net/ethernet/google/gve/Makefile b/drivers/net/ethernet/google/gve/Makefile index b9a6be76531b..4520f1c07a63 100644 --- a/drivers/net/ethernet/google/gve/Makefile +++ b/drivers/net/ethernet/google/gve/Makefile @@ -1,4 +1,5 @@ # Makefile for the Google virtual Ethernet (gve) driver obj-$(CONFIG_GVE) += gve.o -gve-objs := gve_main.o gve_tx.o gve_tx_dqo.o gve_rx.o gve_rx_dqo.o gve_ethtool.o gve_adminq.o gve_utils.o +gve-objs := gve_main.o gve_tx.o gve_tx_dqo.o gve_rx.o gve_rx_dqo.o gve_ethtool.o gve_adminq.o gve_utils.o gve_flow_rule.o \ + gve_buffer_mgmt_dqo.o diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index 4814c96d5fe7..2fab38c8ee78 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: (GPL-2.0 OR MIT) * Google virtual Ethernet (gve) driver * - * Copyright (C) 2015-2021 Google, Inc. + * Copyright (C) 2015-2024 Google LLC */ #ifndef _GVE_H_ @@ -13,6 +13,7 @@ #include <linux/netdevice.h> #include <linux/pci.h> #include <linux/u64_stats_sync.h> +#include <net/page_pool/helpers.h> #include <net/xdp.h> #include "gve_desc.h" @@ -50,12 +51,28 @@ /* PTYPEs are always 10 bits. */ #define GVE_NUM_PTYPES 1024 +/* Default minimum ring size */ +#define GVE_DEFAULT_MIN_TX_RING_SIZE 256 +#define GVE_DEFAULT_MIN_RX_RING_SIZE 512 + #define GVE_DEFAULT_RX_BUFFER_SIZE 2048 #define GVE_MAX_RX_BUFFER_SIZE 4096 +#define GVE_XDP_RX_BUFFER_SIZE_DQO 4096 + #define GVE_DEFAULT_RX_BUFFER_OFFSET 2048 +#define GVE_PAGE_POOL_SIZE_MULTIPLIER 4 + +#define GVE_FLOW_RULES_CACHE_SIZE \ + (GVE_ADMINQ_BUFFER_SIZE / sizeof(struct gve_adminq_queried_flow_rule)) +#define GVE_FLOW_RULE_IDS_CACHE_SIZE \ + (GVE_ADMINQ_BUFFER_SIZE / sizeof(((struct gve_adminq_queried_flow_rule *)0)->location)) + +#define GVE_RSS_KEY_SIZE 40 +#define GVE_RSS_INDIR_SIZE 128 + #define GVE_XDP_ACTIONS 5 #define GVE_GQ_TX_MIN_PKT_DESC_BYTES 182 @@ -63,7 +80,6 @@ #define GVE_DEFAULT_HEADER_BUFFER_SIZE 128 #define DQO_QPL_DEFAULT_TX_PAGES 512 -#define DQO_QPL_DEFAULT_RX_PAGES 2048 /* Maximum TSO size supported on DQO */ #define GVE_DQO_TX_MAX 0x3FFFF @@ -91,9 +107,16 @@ struct gve_rx_desc_queue { /* The page info for a single slot in the RX data queue */ struct gve_rx_slot_page_info { - struct page *page; + /* netmem is used for DQO RDA mode + * page is used in all other modes + */ + union { + struct page *page; + netmem_ref netmem; + }; void *page_address; u32 page_offset; /* offset to write to in page */ + unsigned int buf_size; int pagecnt_bias; /* expected pagecnt if only the driver has a ref */ u16 pad; /* adjustment for rx padding */ u8 can_flip; /* tracks if the networking stack is using the page */ @@ -206,6 +229,11 @@ struct gve_rx_cnts { /* Contains datapath state used to represent an RX queue. */ struct gve_rx_ring { struct gve_priv *gve; + + u16 packet_buffer_size; /* Size of buffer posted to NIC */ + u16 packet_buffer_truesize; /* Total size of RX buffer */ + u16 rx_headroom; + union { /* GQI fields */ struct { @@ -214,7 +242,6 @@ struct gve_rx_ring { /* threshold for posting new buffs and descs */ u32 db_threshold; - u16 packet_buffer_size; u32 qpl_copy_pool_mask; u32 qpl_copy_pool_head; @@ -265,6 +292,8 @@ struct gve_rx_ring { /* Address info of the buffers for header-split */ struct gve_header_buf hdr_bufs; + + struct page_pool *page_pool; } dqo; }; @@ -590,8 +619,6 @@ struct gve_tx_ring { dma_addr_t complq_bus_dqo; /* dma address of the dqo.compl_ring */ struct u64_stats_sync statss; /* sync stats for 32bit archs */ struct xsk_buff_pool *xsk_pool; - u32 xdp_xsk_wakeup; - u32 xdp_xsk_done; u64 xdp_xsk_sent; u64 xdp_xmit; u64 xdp_xmit_errors; @@ -607,12 +634,21 @@ struct gve_notify_block { struct gve_priv *priv; struct gve_tx_ring *tx; /* tx rings on this block */ struct gve_rx_ring *rx; /* rx rings on this block */ + u32 irq; +}; + +/* Tracks allowed and current rx queue settings */ +struct gve_rx_queue_config { + u16 max_queues; + u16 num_queues; + u16 packet_buffer_size; }; -/* Tracks allowed and current queue settings */ -struct gve_queue_config { +/* Tracks allowed and current tx queue settings */ +struct gve_tx_queue_config { u16 max_queues; - u16 num_queues; /* current */ + u16 num_queues; /* number of TX queues, excluding XDP queues */ + u16 num_xdp_queues; }; /* Tracks the available and used qpl IDs */ @@ -621,11 +657,6 @@ struct gve_qpl_config { unsigned long *qpl_id_map; /* bitmap of used qpl ids */ }; -struct gve_options_dqo_rda { - u16 tx_comp_ring_entries; /* number of tx_comp descriptors */ - u16 rx_buff_ring_entries; /* number of rx_buff descriptors */ -}; - struct gve_irq_db { __be32 index; } ____cacheline_aligned; @@ -639,31 +670,13 @@ struct gve_ptype_lut { struct gve_ptype ptypes[GVE_NUM_PTYPES]; }; -/* Parameters for allocating queue page lists */ -struct gve_qpls_alloc_cfg { - struct gve_qpl_config *qpl_cfg; - struct gve_queue_config *tx_cfg; - struct gve_queue_config *rx_cfg; - - u16 num_xdp_queues; - bool raw_addressing; - bool is_gqi; - - /* Allocated resources are returned here */ - struct gve_queue_page_list *qpls; -}; - /* Parameters for allocating resources for tx queues */ struct gve_tx_alloc_rings_cfg { - struct gve_queue_config *qcfg; + struct gve_tx_queue_config *qcfg; - /* qpls and qpl_cfg must already be allocated */ - struct gve_queue_page_list *qpls; - struct gve_qpl_config *qpl_cfg; + u16 num_xdp_rings; u16 ring_size; - u16 start_idx; - u16 num_rings; bool raw_addressing; /* Allocated resources are returned here */ @@ -673,17 +686,15 @@ struct gve_tx_alloc_rings_cfg { /* Parameters for allocating resources for rx queues */ struct gve_rx_alloc_rings_cfg { /* tx config is also needed to determine QPL ids */ - struct gve_queue_config *qcfg; - struct gve_queue_config *qcfg_tx; - - /* qpls and qpl_cfg must already be allocated */ - struct gve_queue_page_list *qpls; - struct gve_qpl_config *qpl_cfg; + struct gve_rx_queue_config *qcfg_rx; + struct gve_tx_queue_config *qcfg_tx; u16 ring_size; u16 packet_buffer_size; bool raw_addressing; bool enable_header_split; + bool reset_rss; + bool xdp; /* Allocated resources are returned here */ struct gve_rx_ring *rx; @@ -701,11 +712,48 @@ enum gve_queue_format { GVE_DQO_QPL_FORMAT = 0x4, }; +struct gve_flow_spec { + __be32 src_ip[4]; + __be32 dst_ip[4]; + union { + struct { + __be16 src_port; + __be16 dst_port; + }; + __be32 spi; + }; + union { + u8 tos; + u8 tclass; + }; +}; + +struct gve_flow_rule { + u32 location; + u16 flow_type; + u16 action; + struct gve_flow_spec key; + struct gve_flow_spec mask; +}; + +struct gve_flow_rules_cache { + bool rules_cache_synced; /* False if the driver's rules_cache is outdated */ + struct gve_adminq_queried_flow_rule *rules_cache; + __be32 *rule_ids_cache; + /* The total number of queried rules that stored in the caches */ + u32 rules_cache_num; + u32 rule_ids_cache_num; +}; + +struct gve_rss_config { + u8 *hash_key; + u32 *hash_lut; +}; + struct gve_priv { struct net_device *dev; struct gve_tx_ring *tx; /* array of tx_cfg.num_queues */ struct gve_rx_ring *rx; /* array of rx_cfg.num_queues */ - struct gve_queue_page_list *qpls; /* array of num qpls */ struct gve_notify_block *ntfy_blocks; /* array of num_ntfy_blks */ struct gve_irq_db *irq_db_indices; /* array of num_ntfy_blks */ dma_addr_t irq_db_indices_bus; @@ -718,19 +766,21 @@ struct gve_priv { u16 num_event_counters; u16 tx_desc_cnt; /* num desc per ring */ u16 rx_desc_cnt; /* num desc per ring */ + u16 max_tx_desc_cnt; + u16 max_rx_desc_cnt; + u16 min_tx_desc_cnt; + u16 min_rx_desc_cnt; + bool modify_ring_size_enabled; + bool default_min_ring_size; u16 tx_pages_per_qpl; /* Suggested number of pages per qpl for TX queues by NIC */ - u16 rx_pages_per_qpl; /* Suggested number of pages per qpl for RX queues by NIC */ - u16 rx_data_slot_cnt; /* rx buffer length */ u64 max_registered_pages; u64 num_registered_pages; /* num pages registered with NIC */ struct bpf_prog *xdp_prog; /* XDP BPF program */ u32 rx_copybreak; /* copy packets smaller than this */ u16 default_num_queues; /* default num queues to set up */ - u16 num_xdp_queues; - struct gve_queue_config tx_cfg; - struct gve_queue_config rx_cfg; - struct gve_qpl_config qpl_cfg; /* map used QPL ids */ + struct gve_tx_queue_config tx_cfg; + struct gve_rx_queue_config rx_cfg; u32 num_ntfy_blks; /* spilt between TX and RX so must be even */ struct gve_registers __iomem *reg_bar0; /* see gve_register.h */ @@ -745,6 +795,7 @@ struct gve_priv { union gve_adminq_command *adminq; dma_addr_t adminq_bus_addr; struct dma_pool *adminq_pool; + struct mutex adminq_lock; /* Protects adminq command execution */ u32 adminq_mask; /* masks prod_cnt to adminq size */ u32 adminq_prod_cnt; /* free-running count of AQ cmds executed */ u32 adminq_cmd_fail; /* free-running count of AQ cmds failed */ @@ -764,6 +815,10 @@ struct gve_priv { u32 adminq_report_link_speed_cnt; u32 adminq_get_ptype_map_cnt; u32 adminq_verify_driver_compatibility_cnt; + u32 adminq_query_flow_rules_cnt; + u32 adminq_cfg_flow_rule_cnt; + u32 adminq_cfg_rss_cnt; + u32 adminq_query_rss_cnt; /* Global stats */ u32 interface_up_cnt; /* count of times interface turned up since last reset */ @@ -792,11 +847,9 @@ struct gve_priv { u64 link_speed; bool up_before_suspend; /* True if dev was up before suspend */ - struct gve_options_dqo_rda options_dqo_rda; struct gve_ptype_lut *ptype_lut_dqo; /* Must be a power of two. */ - u16 data_buffer_size_dqo; u16 max_rx_buffer_size; /* device limit */ enum gve_queue_format queue_format; @@ -807,6 +860,16 @@ struct gve_priv { u16 header_buf_size; /* device configured, header-split supported if non-zero */ bool header_split_enabled; /* True if the header split is enabled by the user */ + + u32 max_flow_rules; + u32 num_flow_rules; + + struct gve_flow_rules_cache flow_rules_cache; + + u16 rss_key_size; + u16 rss_lut_size; + bool cache_rss_config; + struct gve_rss_config rss_config; }; enum gve_service_task_flags_bit { @@ -989,27 +1052,16 @@ static inline bool gve_is_qpl(struct gve_priv *priv) } /* Returns the number of tx queue page lists */ -static inline u32 gve_num_tx_qpls(const struct gve_queue_config *tx_cfg, - int num_xdp_queues, +static inline u32 gve_num_tx_qpls(const struct gve_tx_queue_config *tx_cfg, bool is_qpl) { if (!is_qpl) return 0; - return tx_cfg->num_queues + num_xdp_queues; -} - -/* Returns the number of XDP tx queue page lists - */ -static inline u32 gve_num_xdp_qpls(struct gve_priv *priv) -{ - if (priv->queue_format != GVE_GQI_QPL_FORMAT) - return 0; - - return priv->num_xdp_queues; + return tx_cfg->num_queues + tx_cfg->num_xdp_queues; } /* Returns the number of rx queue page lists */ -static inline u32 gve_num_rx_qpls(const struct gve_queue_config *rx_cfg, +static inline u32 gve_num_rx_qpls(const struct gve_rx_queue_config *rx_cfg, bool is_qpl) { if (!is_qpl) @@ -1027,8 +1079,8 @@ static inline u32 gve_rx_qpl_id(struct gve_priv *priv, int rx_qid) return priv->tx_cfg.max_queues + rx_qid; } -/* Returns the index into priv->qpls where a certain rx queue's QPL resides */ -static inline u32 gve_get_rx_qpl_id(const struct gve_queue_config *tx_cfg, int rx_qid) +static inline u32 gve_get_rx_qpl_id(const struct gve_tx_queue_config *tx_cfg, + int rx_qid) { return tx_cfg->max_queues + rx_qid; } @@ -1038,41 +1090,17 @@ static inline u32 gve_tx_start_qpl_id(struct gve_priv *priv) return gve_tx_qpl_id(priv, 0); } -/* Returns the index into priv->qpls where the first rx queue's QPL resides */ -static inline u32 gve_rx_start_qpl_id(const struct gve_queue_config *tx_cfg) +static inline u32 gve_rx_start_qpl_id(const struct gve_tx_queue_config *tx_cfg) { return gve_get_rx_qpl_id(tx_cfg, 0); } -/* Returns a pointer to the next available tx qpl in the list of qpls */ -static inline -struct gve_queue_page_list *gve_assign_tx_qpl(struct gve_tx_alloc_rings_cfg *cfg, - int tx_qid) +static inline u32 gve_get_rx_pages_per_qpl_dqo(u32 rx_desc_cnt) { - /* QPL already in use */ - if (test_bit(tx_qid, cfg->qpl_cfg->qpl_id_map)) - return NULL; - set_bit(tx_qid, cfg->qpl_cfg->qpl_id_map); - return &cfg->qpls[tx_qid]; -} - -/* Returns a pointer to the next available rx qpl in the list of qpls */ -static inline -struct gve_queue_page_list *gve_assign_rx_qpl(struct gve_rx_alloc_rings_cfg *cfg, - int rx_qid) -{ - int id = gve_get_rx_qpl_id(cfg->qcfg_tx, rx_qid); - /* QPL already in use */ - if (test_bit(id, cfg->qpl_cfg->qpl_id_map)) - return NULL; - set_bit(id, cfg->qpl_cfg->qpl_id_map); - return &cfg->qpls[id]; -} - -/* Unassigns the qpl with the given id */ -static inline void gve_unassign_qpl(struct gve_qpl_config *qpl_cfg, int id) -{ - clear_bit(id, qpl_cfg->qpl_id_map); + /* For DQO, page count should be more than ring size for + * out-of-order completions. Set it to two times of ring size. + */ + return 2 * rx_desc_cnt; } /* Returns the correct dma direction for tx and rx qpls */ @@ -1093,7 +1121,7 @@ static inline bool gve_is_gqi(struct gve_priv *priv) static inline u32 gve_num_tx_queues(struct gve_priv *priv) { - return priv->tx_cfg.num_queues + priv->num_xdp_queues; + return priv->tx_cfg.num_queues + priv->tx_cfg.num_xdp_queues; } static inline u32 gve_xdp_tx_queue_id(struct gve_priv *priv, u32 queue_id) @@ -1106,6 +1134,16 @@ static inline u32 gve_xdp_tx_start_queue_id(struct gve_priv *priv) return gve_xdp_tx_queue_id(priv, 0); } +static inline bool gve_supports_xdp_xmit(struct gve_priv *priv) +{ + switch (priv->queue_format) { + case GVE_GQI_QPL_FORMAT: + return true; + default: + return false; + } +} + /* gqi napi handler defined in gve_main.c */ int gve_napi_poll(struct napi_struct *napi, int budget); @@ -1115,6 +1153,12 @@ int gve_alloc_page(struct gve_priv *priv, struct device *dev, enum dma_data_direction, gfp_t gfp_flags); void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma, enum dma_data_direction); +/* qpls */ +struct gve_queue_page_list *gve_alloc_queue_page_list(struct gve_priv *priv, + u32 id, int pages); +void gve_free_queue_page_list(struct gve_priv *priv, + struct gve_queue_page_list *qpl, + u32 id); /* tx handling */ netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev); int gve_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, @@ -1124,6 +1168,7 @@ int gve_xdp_xmit_one(struct gve_priv *priv, struct gve_tx_ring *tx, void gve_xdp_tx_flush(struct gve_priv *priv, u32 xdp_qid); bool gve_tx_poll(struct gve_notify_block *block, int budget); bool gve_xdp_poll(struct gve_notify_block *block, int budget); +int gve_xsk_tx_poll(struct gve_notify_block *block, int budget); int gve_tx_alloc_rings_gqi(struct gve_priv *priv, struct gve_tx_alloc_rings_cfg *cfg); void gve_tx_free_rings_gqi(struct gve_priv *priv, @@ -1137,7 +1182,12 @@ bool gve_tx_clean_pending(struct gve_priv *priv, struct gve_tx_ring *tx); void gve_rx_write_doorbell(struct gve_priv *priv, struct gve_rx_ring *rx); int gve_rx_poll(struct gve_notify_block *block, int budget); bool gve_rx_work_pending(struct gve_rx_ring *rx); -int gve_rx_alloc_rings(struct gve_priv *priv); +int gve_rx_alloc_ring_gqi(struct gve_priv *priv, + struct gve_rx_alloc_rings_cfg *cfg, + struct gve_rx_ring *rx, + int idx); +void gve_rx_free_ring_gqi(struct gve_priv *priv, struct gve_rx_ring *rx, + struct gve_rx_alloc_rings_cfg *cfg); int gve_rx_alloc_rings_gqi(struct gve_priv *priv, struct gve_rx_alloc_rings_cfg *cfg); void gve_rx_free_rings_gqi(struct gve_priv *priv, @@ -1147,12 +1197,58 @@ void gve_rx_stop_ring_gqi(struct gve_priv *priv, int idx); u16 gve_get_pkt_buf_size(const struct gve_priv *priv, bool enable_hplit); bool gve_header_split_supported(const struct gve_priv *priv); int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split); +/* rx buffer handling */ +int gve_buf_ref_cnt(struct gve_rx_buf_state_dqo *bs); +void gve_free_page_dqo(struct gve_priv *priv, struct gve_rx_buf_state_dqo *bs, + bool free_page); +struct gve_rx_buf_state_dqo *gve_alloc_buf_state(struct gve_rx_ring *rx); +bool gve_buf_state_is_allocated(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state); +void gve_free_buf_state(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state); +struct gve_rx_buf_state_dqo *gve_dequeue_buf_state(struct gve_rx_ring *rx, + struct gve_index_list *list); +void gve_enqueue_buf_state(struct gve_rx_ring *rx, struct gve_index_list *list, + struct gve_rx_buf_state_dqo *buf_state); +struct gve_rx_buf_state_dqo *gve_get_recycled_buf_state(struct gve_rx_ring *rx); +void gve_try_recycle_buf(struct gve_priv *priv, struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state); +void gve_free_to_page_pool(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state, + bool allow_direct); +int gve_alloc_qpl_page_dqo(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state); +void gve_free_qpl_page_dqo(struct gve_rx_buf_state_dqo *buf_state); +void gve_reuse_buffer(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state); +void gve_free_buffer(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state); +int gve_alloc_buffer(struct gve_rx_ring *rx, struct gve_rx_desc_dqo *desc); +struct page_pool *gve_rx_create_page_pool(struct gve_priv *priv, + struct gve_rx_ring *rx, + bool xdp); + /* Reset */ void gve_schedule_reset(struct gve_priv *priv); int gve_reset(struct gve_priv *priv, bool attempt_teardown); +void gve_get_curr_alloc_cfgs(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, + struct gve_rx_alloc_rings_cfg *rx_alloc_cfg); +int gve_adjust_config(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, + struct gve_rx_alloc_rings_cfg *rx_alloc_cfg); int gve_adjust_queues(struct gve_priv *priv, - struct gve_queue_config new_rx_config, - struct gve_queue_config new_tx_config); + struct gve_rx_queue_config new_rx_config, + struct gve_tx_queue_config new_tx_config, + bool reset_rss); +/* flow steering rule */ +int gve_get_flow_rule_entry(struct gve_priv *priv, struct ethtool_rxnfc *cmd); +int gve_get_flow_rule_ids(struct gve_priv *priv, struct ethtool_rxnfc *cmd, u32 *rule_locs); +int gve_add_flow_rule(struct gve_priv *priv, struct ethtool_rxnfc *cmd); +int gve_del_flow_rule(struct gve_priv *priv, struct ethtool_rxnfc *cmd); +int gve_flow_rules_reset(struct gve_priv *priv); +/* RSS config */ +int gve_init_rss_config(struct gve_priv *priv, u16 num_queues); /* report stats handling */ void gve_handle_report_stats(struct gve_priv *priv); /* exported by ethtool.c */ diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c index ae12ac38e18b..3e8fc33cc11f 100644 --- a/drivers/net/ethernet/google/gve/gve_adminq.c +++ b/drivers/net/ethernet/google/gve/gve_adminq.c @@ -32,6 +32,8 @@ struct gve_device_option *gve_get_next_option(struct gve_device_descriptor *desc return option_end > descriptor_end ? NULL : (struct gve_device_option *)option_end; } +#define GVE_DEVICE_OPTION_NO_MIN_RING_SIZE 8 + static void gve_parse_device_option(struct gve_priv *priv, struct gve_device_descriptor *device_descriptor, @@ -41,7 +43,10 @@ void gve_parse_device_option(struct gve_priv *priv, struct gve_device_option_dqo_rda **dev_op_dqo_rda, struct gve_device_option_jumbo_frames **dev_op_jumbo_frames, struct gve_device_option_dqo_qpl **dev_op_dqo_qpl, - struct gve_device_option_buffer_sizes **dev_op_buffer_sizes) + struct gve_device_option_buffer_sizes **dev_op_buffer_sizes, + struct gve_device_option_flow_steering **dev_op_flow_steering, + struct gve_device_option_rss_config **dev_op_rss_config, + struct gve_device_option_modify_ring **dev_op_modify_ring) { u32 req_feat_mask = be32_to_cpu(option->required_features_mask); u16 option_length = be16_to_cpu(option->option_length); @@ -165,6 +170,61 @@ void gve_parse_device_option(struct gve_priv *priv, "Buffer Sizes"); *dev_op_buffer_sizes = (void *)(option + 1); break; + case GVE_DEV_OPT_ID_MODIFY_RING: + if (option_length < GVE_DEVICE_OPTION_NO_MIN_RING_SIZE || + req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_MODIFY_RING) { + dev_warn(&priv->pdev->dev, GVE_DEVICE_OPTION_ERROR_FMT, + "Modify Ring", (int)sizeof(**dev_op_modify_ring), + GVE_DEV_OPT_REQ_FEAT_MASK_MODIFY_RING, + option_length, req_feat_mask); + break; + } + + if (option_length > sizeof(**dev_op_modify_ring)) { + dev_warn(&priv->pdev->dev, + GVE_DEVICE_OPTION_TOO_BIG_FMT, "Modify Ring"); + } + + *dev_op_modify_ring = (void *)(option + 1); + + /* device has not provided min ring size */ + if (option_length == GVE_DEVICE_OPTION_NO_MIN_RING_SIZE) + priv->default_min_ring_size = true; + break; + case GVE_DEV_OPT_ID_FLOW_STEERING: + if (option_length < sizeof(**dev_op_flow_steering) || + req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_FLOW_STEERING) { + dev_warn(&priv->pdev->dev, GVE_DEVICE_OPTION_ERROR_FMT, + "Flow Steering", + (int)sizeof(**dev_op_flow_steering), + GVE_DEV_OPT_REQ_FEAT_MASK_FLOW_STEERING, + option_length, req_feat_mask); + break; + } + + if (option_length > sizeof(**dev_op_flow_steering)) + dev_warn(&priv->pdev->dev, + GVE_DEVICE_OPTION_TOO_BIG_FMT, + "Flow Steering"); + *dev_op_flow_steering = (void *)(option + 1); + break; + case GVE_DEV_OPT_ID_RSS_CONFIG: + if (option_length < sizeof(**dev_op_rss_config) || + req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_RSS_CONFIG) { + dev_warn(&priv->pdev->dev, GVE_DEVICE_OPTION_ERROR_FMT, + "RSS config", + (int)sizeof(**dev_op_rss_config), + GVE_DEV_OPT_REQ_FEAT_MASK_RSS_CONFIG, + option_length, req_feat_mask); + break; + } + + if (option_length > sizeof(**dev_op_rss_config)) + dev_warn(&priv->pdev->dev, + GVE_DEVICE_OPTION_TOO_BIG_FMT, + "RSS config"); + *dev_op_rss_config = (void *)(option + 1); + break; default: /* If we don't recognize the option just continue * without doing anything. @@ -183,7 +243,10 @@ gve_process_device_options(struct gve_priv *priv, struct gve_device_option_dqo_rda **dev_op_dqo_rda, struct gve_device_option_jumbo_frames **dev_op_jumbo_frames, struct gve_device_option_dqo_qpl **dev_op_dqo_qpl, - struct gve_device_option_buffer_sizes **dev_op_buffer_sizes) + struct gve_device_option_buffer_sizes **dev_op_buffer_sizes, + struct gve_device_option_flow_steering **dev_op_flow_steering, + struct gve_device_option_rss_config **dev_op_rss_config, + struct gve_device_option_modify_ring **dev_op_modify_ring) { const int num_options = be16_to_cpu(descriptor->num_device_options); struct gve_device_option *dev_opt; @@ -204,7 +267,9 @@ gve_process_device_options(struct gve_priv *priv, gve_parse_device_option(priv, descriptor, dev_opt, dev_op_gqi_rda, dev_op_gqi_qpl, dev_op_dqo_rda, dev_op_jumbo_frames, - dev_op_dqo_qpl, dev_op_buffer_sizes); + dev_op_dqo_qpl, dev_op_buffer_sizes, + dev_op_flow_steering, dev_op_rss_config, + dev_op_modify_ring); dev_opt = next_opt; } @@ -242,6 +307,10 @@ int gve_adminq_alloc(struct device *dev, struct gve_priv *priv) priv->adminq_report_stats_cnt = 0; priv->adminq_report_link_speed_cnt = 0; priv->adminq_get_ptype_map_cnt = 0; + priv->adminq_query_flow_rules_cnt = 0; + priv->adminq_cfg_flow_rule_cnt = 0; + priv->adminq_cfg_rss_cnt = 0; + priv->adminq_query_rss_cnt = 0; /* Setup Admin queue with the device */ if (priv->pdev->revision < 0x1) { @@ -258,6 +327,7 @@ int gve_adminq_alloc(struct device *dev, struct gve_priv *priv) &priv->reg_bar0->adminq_base_address_lo); iowrite32be(GVE_DRIVER_STATUS_RUN_MASK, &priv->reg_bar0->driver_status); } + mutex_init(&priv->adminq_lock); gve_set_admin_queue_ok(priv); return 0; } @@ -434,6 +504,8 @@ static int gve_adminq_issue_cmd(struct gve_priv *priv, memcpy(cmd, cmd_orig, sizeof(*cmd_orig)); opcode = be32_to_cpu(READ_ONCE(cmd->opcode)); + if (opcode == GVE_ADMINQ_EXTENDED_COMMAND) + opcode = be32_to_cpu(cmd->extended_command.inner_opcode); switch (opcode) { case GVE_ADMINQ_DESCRIBE_DEVICE: @@ -478,6 +550,18 @@ static int gve_adminq_issue_cmd(struct gve_priv *priv, case GVE_ADMINQ_VERIFY_DRIVER_COMPATIBILITY: priv->adminq_verify_driver_compatibility_cnt++; break; + case GVE_ADMINQ_QUERY_FLOW_RULES: + priv->adminq_query_flow_rules_cnt++; + break; + case GVE_ADMINQ_CONFIGURE_FLOW_RULE: + priv->adminq_cfg_flow_rule_cnt++; + break; + case GVE_ADMINQ_CONFIGURE_RSS: + priv->adminq_cfg_rss_cnt++; + break; + case GVE_ADMINQ_QUERY_RSS: + priv->adminq_query_rss_cnt++; + break; default: dev_err(&priv->pdev->dev, "unknown AQ command opcode %d\n", opcode); } @@ -485,28 +569,58 @@ static int gve_adminq_issue_cmd(struct gve_priv *priv, return 0; } -/* This function is not threadsafe - the caller is responsible for any - * necessary locks. - * The caller is also responsible for making sure there are no commands - * waiting to be executed. - */ static int gve_adminq_execute_cmd(struct gve_priv *priv, union gve_adminq_command *cmd_orig) { u32 tail, head; int err; + mutex_lock(&priv->adminq_lock); tail = ioread32be(&priv->reg_bar0->adminq_event_counter); head = priv->adminq_prod_cnt; - if (tail != head) - // This is not a valid path - return -EINVAL; + if (tail != head) { + err = -EINVAL; + goto out; + } err = gve_adminq_issue_cmd(priv, cmd_orig); if (err) - return err; + goto out; - return gve_adminq_kick_and_wait(priv); + err = gve_adminq_kick_and_wait(priv); + +out: + mutex_unlock(&priv->adminq_lock); + return err; +} + +static int gve_adminq_execute_extended_cmd(struct gve_priv *priv, u32 opcode, + size_t cmd_size, void *cmd_orig) +{ + union gve_adminq_command cmd; + dma_addr_t inner_cmd_bus; + void *inner_cmd; + int err; + + inner_cmd = dma_alloc_coherent(&priv->pdev->dev, cmd_size, + &inner_cmd_bus, GFP_KERNEL); + if (!inner_cmd) + return -ENOMEM; + + memcpy(inner_cmd, cmd_orig, cmd_size); + + memset(&cmd, 0, sizeof(cmd)); + cmd.opcode = cpu_to_be32(GVE_ADMINQ_EXTENDED_COMMAND); + cmd.extended_command = (struct gve_adminq_extended_command) { + .inner_opcode = cpu_to_be32(opcode), + .inner_length = cpu_to_be32(cmd_size), + .inner_command_addr = cpu_to_be64(inner_cmd_bus), + }; + + err = gve_adminq_execute_cmd(priv, &cmd); + + dma_free_coherent(&priv->pdev->dev, cmd_size, inner_cmd, inner_cmd_bus); + return err; } /* The device specifies that the management vector can either be the first irq @@ -565,6 +679,7 @@ static int gve_adminq_create_tx_queue(struct gve_priv *priv, u32 queue_index) cpu_to_be64(tx->q_resources_bus), .tx_ring_addr = cpu_to_be64(tx->bus), .ntfy_id = cpu_to_be32(tx->ntfy_id), + .tx_ring_size = cpu_to_be16(priv->tx_desc_cnt), }; if (gve_is_gqi(priv)) { @@ -573,24 +688,17 @@ static int gve_adminq_create_tx_queue(struct gve_priv *priv, u32 queue_index) cmd.create_tx_queue.queue_page_list_id = cpu_to_be32(qpl_id); } else { - u16 comp_ring_size; u32 qpl_id = 0; - if (priv->queue_format == GVE_DQO_RDA_FORMAT) { + if (priv->queue_format == GVE_DQO_RDA_FORMAT) qpl_id = GVE_RAW_ADDRESSING_QPL_ID; - comp_ring_size = - priv->options_dqo_rda.tx_comp_ring_entries; - } else { + else qpl_id = tx->dqo.qpl->id; - comp_ring_size = priv->tx_desc_cnt; - } cmd.create_tx_queue.queue_page_list_id = cpu_to_be32(qpl_id); - cmd.create_tx_queue.tx_ring_size = - cpu_to_be16(priv->tx_desc_cnt); cmd.create_tx_queue.tx_comp_ring_addr = cpu_to_be64(tx->complq_bus_dqo); cmd.create_tx_queue.tx_comp_ring_size = - cpu_to_be16(comp_ring_size); + cpu_to_be16(priv->tx_desc_cnt); } return gve_adminq_issue_cmd(priv, &cmd); @@ -610,63 +718,71 @@ int gve_adminq_create_tx_queues(struct gve_priv *priv, u32 start_id, u32 num_que return gve_adminq_kick_and_wait(priv); } -static int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_index) +static void gve_adminq_get_create_rx_queue_cmd(struct gve_priv *priv, + union gve_adminq_command *cmd, + u32 queue_index) { struct gve_rx_ring *rx = &priv->rx[queue_index]; - union gve_adminq_command cmd; - memset(&cmd, 0, sizeof(cmd)); - cmd.opcode = cpu_to_be32(GVE_ADMINQ_CREATE_RX_QUEUE); - cmd.create_rx_queue = (struct gve_adminq_create_rx_queue) { + memset(cmd, 0, sizeof(*cmd)); + cmd->opcode = cpu_to_be32(GVE_ADMINQ_CREATE_RX_QUEUE); + cmd->create_rx_queue = (struct gve_adminq_create_rx_queue) { .queue_id = cpu_to_be32(queue_index), .ntfy_id = cpu_to_be32(rx->ntfy_id), .queue_resources_addr = cpu_to_be64(rx->q_resources_bus), + .rx_ring_size = cpu_to_be16(priv->rx_desc_cnt), + .packet_buffer_size = cpu_to_be16(rx->packet_buffer_size), }; if (gve_is_gqi(priv)) { u32 qpl_id = priv->queue_format == GVE_GQI_RDA_FORMAT ? GVE_RAW_ADDRESSING_QPL_ID : rx->data.qpl->id; - cmd.create_rx_queue.rx_desc_ring_addr = - cpu_to_be64(rx->desc.bus), - cmd.create_rx_queue.rx_data_ring_addr = - cpu_to_be64(rx->data.data_bus), - cmd.create_rx_queue.index = cpu_to_be32(queue_index); - cmd.create_rx_queue.queue_page_list_id = cpu_to_be32(qpl_id); - cmd.create_rx_queue.packet_buffer_size = cpu_to_be16(rx->packet_buffer_size); + cmd->create_rx_queue.rx_desc_ring_addr = + cpu_to_be64(rx->desc.bus); + cmd->create_rx_queue.rx_data_ring_addr = + cpu_to_be64(rx->data.data_bus); + cmd->create_rx_queue.index = cpu_to_be32(queue_index); + cmd->create_rx_queue.queue_page_list_id = cpu_to_be32(qpl_id); } else { - u16 rx_buff_ring_entries; u32 qpl_id = 0; - if (priv->queue_format == GVE_DQO_RDA_FORMAT) { + if (priv->queue_format == GVE_DQO_RDA_FORMAT) qpl_id = GVE_RAW_ADDRESSING_QPL_ID; - rx_buff_ring_entries = - priv->options_dqo_rda.rx_buff_ring_entries; - } else { + else qpl_id = rx->dqo.qpl->id; - rx_buff_ring_entries = priv->rx_desc_cnt; - } - cmd.create_rx_queue.queue_page_list_id = cpu_to_be32(qpl_id); - cmd.create_rx_queue.rx_ring_size = - cpu_to_be16(priv->rx_desc_cnt); - cmd.create_rx_queue.rx_desc_ring_addr = + cmd->create_rx_queue.queue_page_list_id = cpu_to_be32(qpl_id); + cmd->create_rx_queue.rx_desc_ring_addr = cpu_to_be64(rx->dqo.complq.bus); - cmd.create_rx_queue.rx_data_ring_addr = + cmd->create_rx_queue.rx_data_ring_addr = cpu_to_be64(rx->dqo.bufq.bus); - cmd.create_rx_queue.packet_buffer_size = - cpu_to_be16(priv->data_buffer_size_dqo); - cmd.create_rx_queue.rx_buff_ring_size = - cpu_to_be16(rx_buff_ring_entries); - cmd.create_rx_queue.enable_rsc = + cmd->create_rx_queue.rx_buff_ring_size = + cpu_to_be16(priv->rx_desc_cnt); + cmd->create_rx_queue.enable_rsc = !!(priv->dev->features & NETIF_F_LRO); if (priv->header_split_enabled) - cmd.create_rx_queue.header_buffer_size = + cmd->create_rx_queue.header_buffer_size = cpu_to_be16(priv->header_buf_size); } +} + +static int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_index) +{ + union gve_adminq_command cmd; + gve_adminq_get_create_rx_queue_cmd(priv, &cmd, queue_index); return gve_adminq_issue_cmd(priv, &cmd); } +/* Unlike gve_adminq_create_rx_queue, this actually rings the doorbell */ +int gve_adminq_create_single_rx_queue(struct gve_priv *priv, u32 queue_index) +{ + union gve_adminq_command cmd; + + gve_adminq_get_create_rx_queue_cmd(priv, &cmd, queue_index); + return gve_adminq_execute_cmd(priv, &cmd); +} + int gve_adminq_create_rx_queues(struct gve_priv *priv, u32 num_queues) { int err; @@ -713,22 +829,31 @@ int gve_adminq_destroy_tx_queues(struct gve_priv *priv, u32 start_id, u32 num_qu return gve_adminq_kick_and_wait(priv); } +static void gve_adminq_make_destroy_rx_queue_cmd(union gve_adminq_command *cmd, + u32 queue_index) +{ + memset(cmd, 0, sizeof(*cmd)); + cmd->opcode = cpu_to_be32(GVE_ADMINQ_DESTROY_RX_QUEUE); + cmd->destroy_rx_queue = (struct gve_adminq_destroy_rx_queue) { + .queue_id = cpu_to_be32(queue_index), + }; +} + static int gve_adminq_destroy_rx_queue(struct gve_priv *priv, u32 queue_index) { union gve_adminq_command cmd; - int err; - memset(&cmd, 0, sizeof(cmd)); - cmd.opcode = cpu_to_be32(GVE_ADMINQ_DESTROY_RX_QUEUE); - cmd.destroy_rx_queue = (struct gve_adminq_destroy_rx_queue) { - .queue_id = cpu_to_be32(queue_index), - }; + gve_adminq_make_destroy_rx_queue_cmd(&cmd, queue_index); + return gve_adminq_issue_cmd(priv, &cmd); +} - err = gve_adminq_issue_cmd(priv, &cmd); - if (err) - return err; +/* Unlike gve_adminq_destroy_rx_queue, this actually rings the doorbell */ +int gve_adminq_destroy_single_rx_queue(struct gve_priv *priv, u32 queue_index) +{ + union gve_adminq_command cmd; - return 0; + gve_adminq_make_destroy_rx_queue_cmd(&cmd, queue_index); + return gve_adminq_execute_cmd(priv, &cmd); } int gve_adminq_destroy_rx_queues(struct gve_priv *priv, u32 num_queues) @@ -745,31 +870,26 @@ int gve_adminq_destroy_rx_queues(struct gve_priv *priv, u32 num_queues) return gve_adminq_kick_and_wait(priv); } -static int gve_set_desc_cnt(struct gve_priv *priv, - struct gve_device_descriptor *descriptor) +static void gve_set_default_desc_cnt(struct gve_priv *priv, + const struct gve_device_descriptor *descriptor) { priv->tx_desc_cnt = be16_to_cpu(descriptor->tx_queue_entries); priv->rx_desc_cnt = be16_to_cpu(descriptor->rx_queue_entries); - return 0; + + /* set default ranges */ + priv->max_tx_desc_cnt = priv->tx_desc_cnt; + priv->max_rx_desc_cnt = priv->rx_desc_cnt; + priv->min_tx_desc_cnt = priv->tx_desc_cnt; + priv->min_rx_desc_cnt = priv->rx_desc_cnt; } -static int -gve_set_desc_cnt_dqo(struct gve_priv *priv, - const struct gve_device_descriptor *descriptor, - const struct gve_device_option_dqo_rda *dev_op_dqo_rda) +static void gve_set_default_rss_sizes(struct gve_priv *priv) { - priv->tx_desc_cnt = be16_to_cpu(descriptor->tx_queue_entries); - priv->rx_desc_cnt = be16_to_cpu(descriptor->rx_queue_entries); - - if (priv->queue_format == GVE_DQO_QPL_FORMAT) - return 0; - - priv->options_dqo_rda.tx_comp_ring_entries = - be16_to_cpu(dev_op_dqo_rda->tx_comp_ring_entries); - priv->options_dqo_rda.rx_buff_ring_entries = - be16_to_cpu(dev_op_dqo_rda->rx_buff_ring_entries); - - return 0; + if (!gve_is_gqi(priv)) { + priv->rss_key_size = GVE_RSS_KEY_SIZE; + priv->rss_lut_size = GVE_RSS_INDIR_SIZE; + priv->cache_rss_config = true; + } } static void gve_enable_supported_features(struct gve_priv *priv, @@ -779,7 +899,13 @@ static void gve_enable_supported_features(struct gve_priv *priv, const struct gve_device_option_dqo_qpl *dev_op_dqo_qpl, const struct gve_device_option_buffer_sizes - *dev_op_buffer_sizes) + *dev_op_buffer_sizes, + const struct gve_device_option_flow_steering + *dev_op_flow_steering, + const struct gve_device_option_rss_config + *dev_op_rss_config, + const struct gve_device_option_modify_ring + *dev_op_modify_ring) { /* Before control reaches this point, the page-size-capped max MTU from * the gve_device_descriptor field has already been stored in @@ -796,12 +922,8 @@ static void gve_enable_supported_features(struct gve_priv *priv, if (dev_op_dqo_qpl) { priv->tx_pages_per_qpl = be16_to_cpu(dev_op_dqo_qpl->tx_pages_per_qpl); - priv->rx_pages_per_qpl = - be16_to_cpu(dev_op_dqo_qpl->rx_pages_per_qpl); if (priv->tx_pages_per_qpl == 0) priv->tx_pages_per_qpl = DQO_QPL_DEFAULT_TX_PAGES; - if (priv->rx_pages_per_qpl == 0) - priv->rx_pages_per_qpl = DQO_QPL_DEFAULT_RX_PAGES; } if (dev_op_buffer_sizes && @@ -814,12 +936,59 @@ static void gve_enable_supported_features(struct gve_priv *priv, "BUFFER SIZES device option enabled with max_rx_buffer_size of %u, header_buf_size of %u.\n", priv->max_rx_buffer_size, priv->header_buf_size); } + + /* Read and store ring size ranges given by device */ + if (dev_op_modify_ring && + (supported_features_mask & GVE_SUP_MODIFY_RING_MASK)) { + priv->modify_ring_size_enabled = true; + + /* max ring size for DQO QPL should not be overwritten because of device limit */ + if (priv->queue_format != GVE_DQO_QPL_FORMAT) { + priv->max_rx_desc_cnt = be16_to_cpu(dev_op_modify_ring->max_rx_ring_size); + priv->max_tx_desc_cnt = be16_to_cpu(dev_op_modify_ring->max_tx_ring_size); + } + if (priv->default_min_ring_size) { + /* If device hasn't provided minimums, use default minimums */ + priv->min_tx_desc_cnt = GVE_DEFAULT_MIN_TX_RING_SIZE; + priv->min_rx_desc_cnt = GVE_DEFAULT_MIN_RX_RING_SIZE; + } else { + priv->min_rx_desc_cnt = be16_to_cpu(dev_op_modify_ring->min_rx_ring_size); + priv->min_tx_desc_cnt = be16_to_cpu(dev_op_modify_ring->min_tx_ring_size); + } + } + + if (dev_op_flow_steering && + (supported_features_mask & GVE_SUP_FLOW_STEERING_MASK)) { + if (dev_op_flow_steering->max_flow_rules) { + priv->max_flow_rules = + be32_to_cpu(dev_op_flow_steering->max_flow_rules); + priv->dev->hw_features |= NETIF_F_NTUPLE; + dev_info(&priv->pdev->dev, + "FLOW STEERING device option enabled with max rule limit of %u.\n", + priv->max_flow_rules); + } + } + + if (dev_op_rss_config && + (supported_features_mask & GVE_SUP_RSS_CONFIG_MASK)) { + priv->rss_key_size = + be16_to_cpu(dev_op_rss_config->hash_key_size); + priv->rss_lut_size = + be16_to_cpu(dev_op_rss_config->hash_lut_size); + priv->cache_rss_config = false; + dev_dbg(&priv->pdev->dev, + "RSS device option enabled with key size of %u, lut size of %u.\n", + priv->rss_key_size, priv->rss_lut_size); + } } int gve_adminq_describe_device(struct gve_priv *priv) { + struct gve_device_option_flow_steering *dev_op_flow_steering = NULL; struct gve_device_option_buffer_sizes *dev_op_buffer_sizes = NULL; struct gve_device_option_jumbo_frames *dev_op_jumbo_frames = NULL; + struct gve_device_option_modify_ring *dev_op_modify_ring = NULL; + struct gve_device_option_rss_config *dev_op_rss_config = NULL; struct gve_device_option_gqi_rda *dev_op_gqi_rda = NULL; struct gve_device_option_gqi_qpl *dev_op_gqi_qpl = NULL; struct gve_device_option_dqo_rda *dev_op_dqo_rda = NULL; @@ -851,9 +1020,11 @@ int gve_adminq_describe_device(struct gve_priv *priv) err = gve_process_device_options(priv, descriptor, &dev_op_gqi_rda, &dev_op_gqi_qpl, &dev_op_dqo_rda, - &dev_op_jumbo_frames, - &dev_op_dqo_qpl, - &dev_op_buffer_sizes); + &dev_op_jumbo_frames, &dev_op_dqo_qpl, + &dev_op_buffer_sizes, + &dev_op_flow_steering, + &dev_op_rss_config, + &dev_op_modify_ring); if (err) goto free_device_descriptor; @@ -888,15 +1059,15 @@ int gve_adminq_describe_device(struct gve_priv *priv) dev_info(&priv->pdev->dev, "Driver is running with GQI QPL queue format.\n"); } - if (gve_is_gqi(priv)) { - err = gve_set_desc_cnt(priv, descriptor); - } else { - /* DQO supports LRO. */ + + /* set default descriptor counts */ + gve_set_default_desc_cnt(priv, descriptor); + + gve_set_default_rss_sizes(priv); + + /* DQO supports LRO. */ + if (!gve_is_gqi(priv)) priv->dev->hw_features |= NETIF_F_LRO; - err = gve_set_desc_cnt_dqo(priv, descriptor, dev_op_dqo_rda); - } - if (err) - goto free_device_descriptor; priv->max_registered_pages = be64_to_cpu(descriptor->max_registered_pages); @@ -912,18 +1083,12 @@ int gve_adminq_describe_device(struct gve_priv *priv) mac = descriptor->mac; dev_info(&priv->pdev->dev, "MAC addr: %pM\n", mac); priv->tx_pages_per_qpl = be16_to_cpu(descriptor->tx_pages_per_qpl); - priv->rx_data_slot_cnt = be16_to_cpu(descriptor->rx_pages_per_qpl); - - if (gve_is_gqi(priv) && priv->rx_data_slot_cnt < priv->rx_desc_cnt) { - dev_err(&priv->pdev->dev, "rx_data_slot_cnt cannot be smaller than rx_desc_cnt, setting rx_desc_cnt down to %d.\n", - priv->rx_data_slot_cnt); - priv->rx_desc_cnt = priv->rx_data_slot_cnt; - } priv->default_num_queues = be16_to_cpu(descriptor->default_num_queues); gve_enable_supported_features(priv, supported_features_mask, dev_op_jumbo_frames, dev_op_dqo_qpl, - dev_op_buffer_sizes); + dev_op_buffer_sizes, dev_op_flow_steering, + dev_op_rss_config, dev_op_modify_ring); free_device_descriptor: dma_pool_free(priv->adminq_pool, descriptor, descriptor_bus); @@ -976,20 +1141,6 @@ int gve_adminq_unregister_page_list(struct gve_priv *priv, u32 page_list_id) return gve_adminq_execute_cmd(priv, &cmd); } -int gve_adminq_set_mtu(struct gve_priv *priv, u64 mtu) -{ - union gve_adminq_command cmd; - - memset(&cmd, 0, sizeof(cmd)); - cmd.opcode = cpu_to_be32(GVE_ADMINQ_SET_DRIVER_PARAMETER); - cmd.set_driver_param = (struct gve_adminq_set_driver_parameter) { - .parameter_type = cpu_to_be32(GVE_SET_PARAM_MTU), - .parameter_value = cpu_to_be64(mtu), - }; - - return gve_adminq_execute_cmd(priv, &cmd); -} - int gve_adminq_report_stats(struct gve_priv *priv, u64 stats_report_len, dma_addr_t stats_report_addr, u64 interval) { @@ -1086,3 +1237,293 @@ err: ptype_map_bus); return err; } + +static int +gve_adminq_configure_flow_rule(struct gve_priv *priv, + struct gve_adminq_configure_flow_rule *flow_rule_cmd) +{ + int err = gve_adminq_execute_extended_cmd(priv, + GVE_ADMINQ_CONFIGURE_FLOW_RULE, + sizeof(struct gve_adminq_configure_flow_rule), + flow_rule_cmd); + + if (err == -ETIME) { + dev_err(&priv->pdev->dev, "Timeout to configure the flow rule, trigger reset"); + gve_reset(priv, true); + } else if (!err) { + priv->flow_rules_cache.rules_cache_synced = false; + } + + return err; +} + +int gve_adminq_add_flow_rule(struct gve_priv *priv, struct gve_adminq_flow_rule *rule, u32 loc) +{ + struct gve_adminq_configure_flow_rule flow_rule_cmd = { + .opcode = cpu_to_be16(GVE_FLOW_RULE_CFG_ADD), + .location = cpu_to_be32(loc), + .rule = *rule, + }; + + return gve_adminq_configure_flow_rule(priv, &flow_rule_cmd); +} + +int gve_adminq_del_flow_rule(struct gve_priv *priv, u32 loc) +{ + struct gve_adminq_configure_flow_rule flow_rule_cmd = { + .opcode = cpu_to_be16(GVE_FLOW_RULE_CFG_DEL), + .location = cpu_to_be32(loc), + }; + + return gve_adminq_configure_flow_rule(priv, &flow_rule_cmd); +} + +int gve_adminq_reset_flow_rules(struct gve_priv *priv) +{ + struct gve_adminq_configure_flow_rule flow_rule_cmd = { + .opcode = cpu_to_be16(GVE_FLOW_RULE_CFG_RESET), + }; + + return gve_adminq_configure_flow_rule(priv, &flow_rule_cmd); +} + +int gve_adminq_configure_rss(struct gve_priv *priv, struct ethtool_rxfh_param *rxfh) +{ + const u32 *hash_lut_to_config = NULL; + const u8 *hash_key_to_config = NULL; + dma_addr_t lut_bus = 0, key_bus = 0; + union gve_adminq_command cmd; + __be32 *lut = NULL; + u8 hash_alg = 0; + u8 *key = NULL; + int err = 0; + u16 i; + + switch (rxfh->hfunc) { + case ETH_RSS_HASH_NO_CHANGE: + fallthrough; + case ETH_RSS_HASH_TOP: + hash_alg = ETH_RSS_HASH_TOP; + break; + default: + return -EOPNOTSUPP; + } + + if (rxfh->indir) { + if (rxfh->indir_size != priv->rss_lut_size) + return -EINVAL; + + hash_lut_to_config = rxfh->indir; + } else if (priv->cache_rss_config) { + hash_lut_to_config = priv->rss_config.hash_lut; + } + + if (hash_lut_to_config) { + lut = dma_alloc_coherent(&priv->pdev->dev, + priv->rss_lut_size * sizeof(*lut), + &lut_bus, GFP_KERNEL); + if (!lut) + return -ENOMEM; + + for (i = 0; i < priv->rss_lut_size; i++) + lut[i] = cpu_to_be32(hash_lut_to_config[i]); + } + + if (rxfh->key) { + if (rxfh->key_size != priv->rss_key_size) { + err = -EINVAL; + goto out; + } + + hash_key_to_config = rxfh->key; + } else if (priv->cache_rss_config) { + hash_key_to_config = priv->rss_config.hash_key; + } + + if (hash_key_to_config) { + key = dma_alloc_coherent(&priv->pdev->dev, + priv->rss_key_size, + &key_bus, GFP_KERNEL); + if (!key) { + err = -ENOMEM; + goto out; + } + + memcpy(key, hash_key_to_config, priv->rss_key_size); + } + + /* Zero-valued fields in the cmd.configure_rss instruct the device to + * not update those fields. + */ + memset(&cmd, 0, sizeof(cmd)); + cmd.opcode = cpu_to_be32(GVE_ADMINQ_CONFIGURE_RSS); + cmd.configure_rss = (struct gve_adminq_configure_rss) { + .hash_types = cpu_to_be16(BIT(GVE_RSS_HASH_TCPV4) | + BIT(GVE_RSS_HASH_UDPV4) | + BIT(GVE_RSS_HASH_TCPV6) | + BIT(GVE_RSS_HASH_UDPV6)), + .hash_alg = hash_alg, + .hash_key_size = + cpu_to_be16((key_bus) ? priv->rss_key_size : 0), + .hash_lut_size = + cpu_to_be16((lut_bus) ? priv->rss_lut_size : 0), + .hash_key_addr = cpu_to_be64(key_bus), + .hash_lut_addr = cpu_to_be64(lut_bus), + }; + + err = gve_adminq_execute_cmd(priv, &cmd); + +out: + if (lut) + dma_free_coherent(&priv->pdev->dev, + priv->rss_lut_size * sizeof(*lut), + lut, lut_bus); + if (key) + dma_free_coherent(&priv->pdev->dev, + priv->rss_key_size, key, key_bus); + return err; +} + +/* In the dma memory that the driver allocated for the device to query the flow rules, the device + * will first write it with a struct of gve_query_flow_rules_descriptor. Next to it, the device + * will write an array of rules or rule ids with the count that specified in the descriptor. + * For GVE_FLOW_RULE_QUERY_STATS, the device will only write the descriptor. + */ +static int gve_adminq_process_flow_rules_query(struct gve_priv *priv, u16 query_opcode, + struct gve_query_flow_rules_descriptor *descriptor) +{ + struct gve_flow_rules_cache *flow_rules_cache = &priv->flow_rules_cache; + u32 num_queried_rules, total_memory_len, rule_info_len; + void *rule_info; + + total_memory_len = be32_to_cpu(descriptor->total_length); + num_queried_rules = be32_to_cpu(descriptor->num_queried_rules); + rule_info = (void *)(descriptor + 1); + + switch (query_opcode) { + case GVE_FLOW_RULE_QUERY_RULES: + rule_info_len = num_queried_rules * sizeof(*flow_rules_cache->rules_cache); + if (sizeof(*descriptor) + rule_info_len != total_memory_len) { + dev_err(&priv->dev->dev, "flow rules query is out of memory.\n"); + return -ENOMEM; + } + + memcpy(flow_rules_cache->rules_cache, rule_info, rule_info_len); + flow_rules_cache->rules_cache_num = num_queried_rules; + break; + case GVE_FLOW_RULE_QUERY_IDS: + rule_info_len = num_queried_rules * sizeof(*flow_rules_cache->rule_ids_cache); + if (sizeof(*descriptor) + rule_info_len != total_memory_len) { + dev_err(&priv->dev->dev, "flow rule ids query is out of memory.\n"); + return -ENOMEM; + } + + memcpy(flow_rules_cache->rule_ids_cache, rule_info, rule_info_len); + flow_rules_cache->rule_ids_cache_num = num_queried_rules; + break; + case GVE_FLOW_RULE_QUERY_STATS: + priv->num_flow_rules = be32_to_cpu(descriptor->num_flow_rules); + priv->max_flow_rules = be32_to_cpu(descriptor->max_flow_rules); + return 0; + default: + return -EINVAL; + } + + return 0; +} + +int gve_adminq_query_flow_rules(struct gve_priv *priv, u16 query_opcode, u32 starting_loc) +{ + struct gve_query_flow_rules_descriptor *descriptor; + union gve_adminq_command cmd; + dma_addr_t descriptor_bus; + int err = 0; + + memset(&cmd, 0, sizeof(cmd)); + descriptor = dma_pool_alloc(priv->adminq_pool, GFP_KERNEL, &descriptor_bus); + if (!descriptor) + return -ENOMEM; + + cmd.opcode = cpu_to_be32(GVE_ADMINQ_QUERY_FLOW_RULES); + cmd.query_flow_rules = (struct gve_adminq_query_flow_rules) { + .opcode = cpu_to_be16(query_opcode), + .starting_rule_id = cpu_to_be32(starting_loc), + .available_length = cpu_to_be64(GVE_ADMINQ_BUFFER_SIZE), + .rule_descriptor_addr = cpu_to_be64(descriptor_bus), + }; + err = gve_adminq_execute_cmd(priv, &cmd); + if (err) + goto out; + + err = gve_adminq_process_flow_rules_query(priv, query_opcode, descriptor); + +out: + dma_pool_free(priv->adminq_pool, descriptor, descriptor_bus); + return err; +} + +static int gve_adminq_process_rss_query(struct gve_priv *priv, + struct gve_query_rss_descriptor *descriptor, + struct ethtool_rxfh_param *rxfh) +{ + u32 total_memory_length; + u16 hash_lut_length; + void *rss_info_addr; + __be32 *lut; + u16 i; + + total_memory_length = be32_to_cpu(descriptor->total_length); + hash_lut_length = priv->rss_lut_size * sizeof(*rxfh->indir); + + if (sizeof(*descriptor) + priv->rss_key_size + hash_lut_length != total_memory_length) { + dev_err(&priv->dev->dev, + "rss query desc from device has invalid length parameter.\n"); + return -EINVAL; + } + + rxfh->hfunc = descriptor->hash_alg; + + rss_info_addr = (void *)(descriptor + 1); + if (rxfh->key) { + rxfh->key_size = priv->rss_key_size; + memcpy(rxfh->key, rss_info_addr, priv->rss_key_size); + } + + rss_info_addr += priv->rss_key_size; + lut = (__be32 *)rss_info_addr; + if (rxfh->indir) { + rxfh->indir_size = priv->rss_lut_size; + for (i = 0; i < priv->rss_lut_size; i++) + rxfh->indir[i] = be32_to_cpu(lut[i]); + } + + return 0; +} + +int gve_adminq_query_rss_config(struct gve_priv *priv, struct ethtool_rxfh_param *rxfh) +{ + struct gve_query_rss_descriptor *descriptor; + union gve_adminq_command cmd; + dma_addr_t descriptor_bus; + int err = 0; + + descriptor = dma_pool_alloc(priv->adminq_pool, GFP_KERNEL, &descriptor_bus); + if (!descriptor) + return -ENOMEM; + + memset(&cmd, 0, sizeof(cmd)); + cmd.opcode = cpu_to_be32(GVE_ADMINQ_QUERY_RSS); + cmd.query_rss = (struct gve_adminq_query_rss) { + .available_length = cpu_to_be64(GVE_ADMINQ_BUFFER_SIZE), + .rss_descriptor_addr = cpu_to_be64(descriptor_bus), + }; + err = gve_adminq_execute_cmd(priv, &cmd); + if (err) + goto out; + + err = gve_adminq_process_rss_query(priv, descriptor, rxfh); + +out: + dma_pool_free(priv->adminq_pool, descriptor, descriptor_bus); + return err; +} diff --git a/drivers/net/ethernet/google/gve/gve_adminq.h b/drivers/net/ethernet/google/gve/gve_adminq.h index 5ac972e45ff8..228217458275 100644 --- a/drivers/net/ethernet/google/gve/gve_adminq.h +++ b/drivers/net/ethernet/google/gve/gve_adminq.h @@ -20,11 +20,26 @@ enum gve_adminq_opcodes { GVE_ADMINQ_DESTROY_TX_QUEUE = 0x7, GVE_ADMINQ_DESTROY_RX_QUEUE = 0x8, GVE_ADMINQ_DECONFIGURE_DEVICE_RESOURCES = 0x9, + GVE_ADMINQ_CONFIGURE_RSS = 0xA, GVE_ADMINQ_SET_DRIVER_PARAMETER = 0xB, GVE_ADMINQ_REPORT_STATS = 0xC, GVE_ADMINQ_REPORT_LINK_SPEED = 0xD, GVE_ADMINQ_GET_PTYPE_MAP = 0xE, GVE_ADMINQ_VERIFY_DRIVER_COMPATIBILITY = 0xF, + GVE_ADMINQ_QUERY_FLOW_RULES = 0x10, + GVE_ADMINQ_QUERY_RSS = 0x12, + + /* For commands that are larger than 56 bytes */ + GVE_ADMINQ_EXTENDED_COMMAND = 0xFF, +}; + +/* The normal adminq command is restricted to be 56 bytes at maximum. For the + * longer adminq command, it is wrapped by GVE_ADMINQ_EXTENDED_COMMAND with + * inner opcode of gve_adminq_extended_cmd_opcodes specified. The inner command + * is written in the dma memory allocated by GVE_ADMINQ_EXTENDED_COMMAND. + */ +enum gve_adminq_extended_cmd_opcodes { + GVE_ADMINQ_CONFIGURE_FLOW_RULE = 0x101, }; /* Admin queue status codes */ @@ -103,8 +118,7 @@ static_assert(sizeof(struct gve_device_option_gqi_qpl) == 4); struct gve_device_option_dqo_rda { __be32 supported_features_mask; - __be16 tx_comp_ring_entries; - __be16 rx_buff_ring_entries; + __be32 reserved; }; static_assert(sizeof(struct gve_device_option_dqo_rda) == 8); @@ -134,6 +148,32 @@ struct gve_device_option_buffer_sizes { static_assert(sizeof(struct gve_device_option_buffer_sizes) == 8); +struct gve_device_option_modify_ring { + __be32 supported_featured_mask; + __be16 max_rx_ring_size; + __be16 max_tx_ring_size; + __be16 min_rx_ring_size; + __be16 min_tx_ring_size; +}; + +static_assert(sizeof(struct gve_device_option_modify_ring) == 12); + +struct gve_device_option_flow_steering { + __be32 supported_features_mask; + __be32 reserved; + __be32 max_flow_rules; +}; + +static_assert(sizeof(struct gve_device_option_flow_steering) == 12); + +struct gve_device_option_rss_config { + __be32 supported_features_mask; + __be16 hash_key_size; + __be16 hash_lut_size; +}; + +static_assert(sizeof(struct gve_device_option_rss_config) == 8); + /* Terminology: * * RDA - Raw DMA Addressing - Buffers associated with SKBs are directly DMA @@ -143,28 +183,37 @@ static_assert(sizeof(struct gve_device_option_buffer_sizes) == 8); * the device for read/write and data is copied from/to SKBs. */ enum gve_dev_opt_id { - GVE_DEV_OPT_ID_GQI_RAW_ADDRESSING = 0x1, - GVE_DEV_OPT_ID_GQI_RDA = 0x2, - GVE_DEV_OPT_ID_GQI_QPL = 0x3, - GVE_DEV_OPT_ID_DQO_RDA = 0x4, - GVE_DEV_OPT_ID_DQO_QPL = 0x7, - GVE_DEV_OPT_ID_JUMBO_FRAMES = 0x8, - GVE_DEV_OPT_ID_BUFFER_SIZES = 0xa, + GVE_DEV_OPT_ID_GQI_RAW_ADDRESSING = 0x1, + GVE_DEV_OPT_ID_GQI_RDA = 0x2, + GVE_DEV_OPT_ID_GQI_QPL = 0x3, + GVE_DEV_OPT_ID_DQO_RDA = 0x4, + GVE_DEV_OPT_ID_MODIFY_RING = 0x6, + GVE_DEV_OPT_ID_DQO_QPL = 0x7, + GVE_DEV_OPT_ID_JUMBO_FRAMES = 0x8, + GVE_DEV_OPT_ID_BUFFER_SIZES = 0xa, + GVE_DEV_OPT_ID_FLOW_STEERING = 0xb, + GVE_DEV_OPT_ID_RSS_CONFIG = 0xe, }; enum gve_dev_opt_req_feat_mask { - GVE_DEV_OPT_REQ_FEAT_MASK_GQI_RAW_ADDRESSING = 0x0, - GVE_DEV_OPT_REQ_FEAT_MASK_GQI_RDA = 0x0, - GVE_DEV_OPT_REQ_FEAT_MASK_GQI_QPL = 0x0, - GVE_DEV_OPT_REQ_FEAT_MASK_DQO_RDA = 0x0, - GVE_DEV_OPT_REQ_FEAT_MASK_JUMBO_FRAMES = 0x0, - GVE_DEV_OPT_REQ_FEAT_MASK_DQO_QPL = 0x0, - GVE_DEV_OPT_REQ_FEAT_MASK_BUFFER_SIZES = 0x0, + GVE_DEV_OPT_REQ_FEAT_MASK_GQI_RAW_ADDRESSING = 0x0, + GVE_DEV_OPT_REQ_FEAT_MASK_GQI_RDA = 0x0, + GVE_DEV_OPT_REQ_FEAT_MASK_GQI_QPL = 0x0, + GVE_DEV_OPT_REQ_FEAT_MASK_DQO_RDA = 0x0, + GVE_DEV_OPT_REQ_FEAT_MASK_JUMBO_FRAMES = 0x0, + GVE_DEV_OPT_REQ_FEAT_MASK_DQO_QPL = 0x0, + GVE_DEV_OPT_REQ_FEAT_MASK_BUFFER_SIZES = 0x0, + GVE_DEV_OPT_REQ_FEAT_MASK_MODIFY_RING = 0x0, + GVE_DEV_OPT_REQ_FEAT_MASK_FLOW_STEERING = 0x0, + GVE_DEV_OPT_REQ_FEAT_MASK_RSS_CONFIG = 0x0, }; enum gve_sup_feature_mask { - GVE_SUP_JUMBO_FRAMES_MASK = 1 << 2, - GVE_SUP_BUFFER_SIZES_MASK = 1 << 4, + GVE_SUP_MODIFY_RING_MASK = 1 << 0, + GVE_SUP_JUMBO_FRAMES_MASK = 1 << 2, + GVE_SUP_BUFFER_SIZES_MASK = 1 << 4, + GVE_SUP_FLOW_STEERING_MASK = 1 << 5, + GVE_SUP_RSS_CONFIG_MASK = 1 << 7, }; #define GVE_DEV_OPT_LEN_GQI_RAW_ADDRESSING 0x0 @@ -178,6 +227,7 @@ enum gve_driver_capbility { gve_driver_capability_dqo_rda = 3, gve_driver_capability_alt_miss_compl = 4, gve_driver_capability_flexible_buffer_size = 5, + gve_driver_capability_flexible_rss_size = 6, }; #define GVE_CAP1(a) BIT((int)a) @@ -190,12 +240,21 @@ enum gve_driver_capbility { GVE_CAP1(gve_driver_capability_gqi_rda) | \ GVE_CAP1(gve_driver_capability_dqo_rda) | \ GVE_CAP1(gve_driver_capability_alt_miss_compl) | \ - GVE_CAP1(gve_driver_capability_flexible_buffer_size)) + GVE_CAP1(gve_driver_capability_flexible_buffer_size) | \ + GVE_CAP1(gve_driver_capability_flexible_rss_size)) #define GVE_DRIVER_CAPABILITY_FLAGS2 0x0 #define GVE_DRIVER_CAPABILITY_FLAGS3 0x0 #define GVE_DRIVER_CAPABILITY_FLAGS4 0x0 +struct gve_adminq_extended_command { + __be32 inner_opcode; + __be32 inner_length; + __be64 inner_command_addr; +}; + +static_assert(sizeof(struct gve_adminq_extended_command) == 16); + struct gve_driver_info { u8 os_type; /* 0x01 = Linux */ u8 driver_major; @@ -400,6 +459,109 @@ struct gve_adminq_get_ptype_map { __be64 ptype_map_addr; }; +/* Flow-steering related definitions */ +enum gve_adminq_flow_rule_cfg_opcode { + GVE_FLOW_RULE_CFG_ADD = 0, + GVE_FLOW_RULE_CFG_DEL = 1, + GVE_FLOW_RULE_CFG_RESET = 2, +}; + +enum gve_adminq_flow_rule_query_opcode { + GVE_FLOW_RULE_QUERY_RULES = 0, + GVE_FLOW_RULE_QUERY_IDS = 1, + GVE_FLOW_RULE_QUERY_STATS = 2, +}; + +enum gve_adminq_flow_type { + GVE_FLOW_TYPE_TCPV4, + GVE_FLOW_TYPE_UDPV4, + GVE_FLOW_TYPE_SCTPV4, + GVE_FLOW_TYPE_AHV4, + GVE_FLOW_TYPE_ESPV4, + GVE_FLOW_TYPE_TCPV6, + GVE_FLOW_TYPE_UDPV6, + GVE_FLOW_TYPE_SCTPV6, + GVE_FLOW_TYPE_AHV6, + GVE_FLOW_TYPE_ESPV6, +}; + +/* Flow-steering command */ +struct gve_adminq_flow_rule { + __be16 flow_type; + __be16 action; /* RX queue id */ + struct gve_flow_spec key; + struct gve_flow_spec mask; +}; + +struct gve_adminq_configure_flow_rule { + __be16 opcode; + u8 padding[2]; + struct gve_adminq_flow_rule rule; + __be32 location; +}; + +static_assert(sizeof(struct gve_adminq_configure_flow_rule) == 92); + +struct gve_query_flow_rules_descriptor { + __be32 num_flow_rules; + __be32 max_flow_rules; + __be32 num_queried_rules; + __be32 total_length; +}; + +struct gve_adminq_queried_flow_rule { + __be32 location; + struct gve_adminq_flow_rule flow_rule; +}; + +struct gve_adminq_query_flow_rules { + __be16 opcode; + u8 padding[2]; + __be32 starting_rule_id; + __be64 available_length; /* The dma memory length that the driver allocated */ + __be64 rule_descriptor_addr; /* The dma memory address */ +}; + +static_assert(sizeof(struct gve_adminq_query_flow_rules) == 24); + +enum gve_rss_hash_type { + GVE_RSS_HASH_IPV4, + GVE_RSS_HASH_TCPV4, + GVE_RSS_HASH_IPV6, + GVE_RSS_HASH_IPV6_EX, + GVE_RSS_HASH_TCPV6, + GVE_RSS_HASH_TCPV6_EX, + GVE_RSS_HASH_UDPV4, + GVE_RSS_HASH_UDPV6, + GVE_RSS_HASH_UDPV6_EX, +}; + +struct gve_adminq_configure_rss { + __be16 hash_types; + u8 hash_alg; + u8 reserved; + __be16 hash_key_size; + __be16 hash_lut_size; + __be64 hash_key_addr; + __be64 hash_lut_addr; +}; + +static_assert(sizeof(struct gve_adminq_configure_rss) == 24); + +struct gve_query_rss_descriptor { + __be32 total_length; + __be16 hash_types; + u8 hash_alg; + u8 reserved; +}; + +struct gve_adminq_query_rss { + __be64 available_length; + __be64 rss_descriptor_addr; +}; + +static_assert(sizeof(struct gve_adminq_query_rss) == 16); + union gve_adminq_command { struct { __be32 opcode; @@ -420,6 +582,10 @@ union gve_adminq_command { struct gve_adminq_get_ptype_map get_ptype_map; struct gve_adminq_verify_driver_compatibility verify_driver_compatibility; + struct gve_adminq_query_flow_rules query_flow_rules; + struct gve_adminq_configure_rss configure_rss; + struct gve_adminq_query_rss query_rss; + struct gve_adminq_extended_command extended_command; }; }; u8 reserved[64]; @@ -439,18 +605,25 @@ int gve_adminq_configure_device_resources(struct gve_priv *priv, int gve_adminq_deconfigure_device_resources(struct gve_priv *priv); int gve_adminq_create_tx_queues(struct gve_priv *priv, u32 start_id, u32 num_queues); int gve_adminq_destroy_tx_queues(struct gve_priv *priv, u32 start_id, u32 num_queues); +int gve_adminq_create_single_rx_queue(struct gve_priv *priv, u32 queue_index); int gve_adminq_create_rx_queues(struct gve_priv *priv, u32 num_queues); +int gve_adminq_destroy_single_rx_queue(struct gve_priv *priv, u32 queue_index); int gve_adminq_destroy_rx_queues(struct gve_priv *priv, u32 queue_id); int gve_adminq_register_page_list(struct gve_priv *priv, struct gve_queue_page_list *qpl); int gve_adminq_unregister_page_list(struct gve_priv *priv, u32 page_list_id); -int gve_adminq_set_mtu(struct gve_priv *priv, u64 mtu); int gve_adminq_report_stats(struct gve_priv *priv, u64 stats_report_len, dma_addr_t stats_report_addr, u64 interval); int gve_adminq_verify_driver_compatibility(struct gve_priv *priv, u64 driver_info_len, dma_addr_t driver_info_addr); int gve_adminq_report_link_speed(struct gve_priv *priv); +int gve_adminq_add_flow_rule(struct gve_priv *priv, struct gve_adminq_flow_rule *rule, u32 loc); +int gve_adminq_del_flow_rule(struct gve_priv *priv, u32 loc); +int gve_adminq_reset_flow_rules(struct gve_priv *priv); +int gve_adminq_query_flow_rules(struct gve_priv *priv, u16 query_opcode, u32 starting_loc); +int gve_adminq_configure_rss(struct gve_priv *priv, struct ethtool_rxfh_param *rxfh); +int gve_adminq_query_rss_config(struct gve_priv *priv, struct ethtool_rxfh_param *rxfh); struct gve_ptype_lut; int gve_adminq_get_ptype_map_dqo(struct gve_priv *priv, diff --git a/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c b/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c new file mode 100644 index 000000000000..a71883e1d920 --- /dev/null +++ b/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c @@ -0,0 +1,316 @@ +// SPDX-License-Identifier: (GPL-2.0 OR MIT) +/* Google virtual Ethernet (gve) driver + * + * Copyright (C) 2015-2024 Google, Inc. + */ + +#include "gve.h" +#include "gve_utils.h" + +int gve_buf_ref_cnt(struct gve_rx_buf_state_dqo *bs) +{ + return page_count(bs->page_info.page) - bs->page_info.pagecnt_bias; +} + +struct gve_rx_buf_state_dqo *gve_alloc_buf_state(struct gve_rx_ring *rx) +{ + struct gve_rx_buf_state_dqo *buf_state; + s16 buffer_id; + + buffer_id = rx->dqo.free_buf_states; + if (unlikely(buffer_id == -1)) + return NULL; + + buf_state = &rx->dqo.buf_states[buffer_id]; + + /* Remove buf_state from free list */ + rx->dqo.free_buf_states = buf_state->next; + + /* Point buf_state to itself to mark it as allocated */ + buf_state->next = buffer_id; + + return buf_state; +} + +bool gve_buf_state_is_allocated(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state) +{ + s16 buffer_id = buf_state - rx->dqo.buf_states; + + return buf_state->next == buffer_id; +} + +void gve_free_buf_state(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state) +{ + s16 buffer_id = buf_state - rx->dqo.buf_states; + + buf_state->next = rx->dqo.free_buf_states; + rx->dqo.free_buf_states = buffer_id; +} + +struct gve_rx_buf_state_dqo *gve_dequeue_buf_state(struct gve_rx_ring *rx, + struct gve_index_list *list) +{ + struct gve_rx_buf_state_dqo *buf_state; + s16 buffer_id; + + buffer_id = list->head; + if (unlikely(buffer_id == -1)) + return NULL; + + buf_state = &rx->dqo.buf_states[buffer_id]; + + /* Remove buf_state from list */ + list->head = buf_state->next; + if (buf_state->next == -1) + list->tail = -1; + + /* Point buf_state to itself to mark it as allocated */ + buf_state->next = buffer_id; + + return buf_state; +} + +void gve_enqueue_buf_state(struct gve_rx_ring *rx, struct gve_index_list *list, + struct gve_rx_buf_state_dqo *buf_state) +{ + s16 buffer_id = buf_state - rx->dqo.buf_states; + + buf_state->next = -1; + + if (list->head == -1) { + list->head = buffer_id; + list->tail = buffer_id; + } else { + int tail = list->tail; + + rx->dqo.buf_states[tail].next = buffer_id; + list->tail = buffer_id; + } +} + +struct gve_rx_buf_state_dqo *gve_get_recycled_buf_state(struct gve_rx_ring *rx) +{ + struct gve_rx_buf_state_dqo *buf_state; + int i; + + /* Recycled buf states are immediately usable. */ + buf_state = gve_dequeue_buf_state(rx, &rx->dqo.recycled_buf_states); + if (likely(buf_state)) + return buf_state; + + if (unlikely(rx->dqo.used_buf_states.head == -1)) + return NULL; + + /* Used buf states are only usable when ref count reaches 0, which means + * no SKBs refer to them. + * + * Search a limited number before giving up. + */ + for (i = 0; i < 5; i++) { + buf_state = gve_dequeue_buf_state(rx, &rx->dqo.used_buf_states); + if (gve_buf_ref_cnt(buf_state) == 0) { + rx->dqo.used_buf_states_cnt--; + return buf_state; + } + + gve_enqueue_buf_state(rx, &rx->dqo.used_buf_states, buf_state); + } + + return NULL; +} + +int gve_alloc_qpl_page_dqo(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state) +{ + struct gve_priv *priv = rx->gve; + u32 idx; + + idx = rx->dqo.next_qpl_page_idx; + if (idx >= gve_get_rx_pages_per_qpl_dqo(priv->rx_desc_cnt)) { + net_err_ratelimited("%s: Out of QPL pages\n", + priv->dev->name); + return -ENOMEM; + } + buf_state->page_info.page = rx->dqo.qpl->pages[idx]; + buf_state->addr = rx->dqo.qpl->page_buses[idx]; + rx->dqo.next_qpl_page_idx++; + buf_state->page_info.page_offset = 0; + buf_state->page_info.page_address = + page_address(buf_state->page_info.page); + buf_state->page_info.buf_size = rx->packet_buffer_truesize; + buf_state->page_info.pad = rx->rx_headroom; + buf_state->last_single_ref_offset = 0; + + /* The page already has 1 ref. */ + page_ref_add(buf_state->page_info.page, INT_MAX - 1); + buf_state->page_info.pagecnt_bias = INT_MAX; + + return 0; +} + +void gve_free_qpl_page_dqo(struct gve_rx_buf_state_dqo *buf_state) +{ + if (!buf_state->page_info.page) + return; + + page_ref_sub(buf_state->page_info.page, + buf_state->page_info.pagecnt_bias - 1); + buf_state->page_info.page = NULL; +} + +void gve_try_recycle_buf(struct gve_priv *priv, struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state) +{ + const u16 data_buffer_size = rx->packet_buffer_truesize; + int pagecount; + + /* Can't reuse if we only fit one buffer per page */ + if (data_buffer_size * 2 > PAGE_SIZE) + goto mark_used; + + pagecount = gve_buf_ref_cnt(buf_state); + + /* Record the offset when we have a single remaining reference. + * + * When this happens, we know all of the other offsets of the page are + * usable. + */ + if (pagecount == 1) { + buf_state->last_single_ref_offset = + buf_state->page_info.page_offset; + } + + /* Use the next buffer sized chunk in the page. */ + buf_state->page_info.page_offset += data_buffer_size; + buf_state->page_info.page_offset &= (PAGE_SIZE - 1); + + /* If we wrap around to the same offset without ever dropping to 1 + * reference, then we don't know if this offset was ever freed. + */ + if (buf_state->page_info.page_offset == + buf_state->last_single_ref_offset) { + goto mark_used; + } + + gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state); + return; + +mark_used: + gve_enqueue_buf_state(rx, &rx->dqo.used_buf_states, buf_state); + rx->dqo.used_buf_states_cnt++; +} + +void gve_free_to_page_pool(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state, + bool allow_direct) +{ + netmem_ref netmem = buf_state->page_info.netmem; + + if (!netmem) + return; + + page_pool_put_full_netmem(netmem_get_pp(netmem), netmem, allow_direct); + buf_state->page_info.netmem = 0; +} + +static int gve_alloc_from_page_pool(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state) +{ + netmem_ref netmem; + + buf_state->page_info.buf_size = rx->packet_buffer_truesize; + netmem = page_pool_alloc_netmem(rx->dqo.page_pool, + &buf_state->page_info.page_offset, + &buf_state->page_info.buf_size, + GFP_ATOMIC); + + if (!netmem) + return -ENOMEM; + + buf_state->page_info.netmem = netmem; + buf_state->page_info.page_address = netmem_address(netmem); + buf_state->addr = page_pool_get_dma_addr_netmem(netmem); + buf_state->page_info.pad = rx->dqo.page_pool->p.offset; + + return 0; +} + +struct page_pool *gve_rx_create_page_pool(struct gve_priv *priv, + struct gve_rx_ring *rx, + bool xdp) +{ + u32 ntfy_id = gve_rx_idx_to_ntfy(priv, rx->q_num); + struct page_pool_params pp = { + .flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV, + .order = 0, + .pool_size = GVE_PAGE_POOL_SIZE_MULTIPLIER * priv->rx_desc_cnt, + .dev = &priv->pdev->dev, + .netdev = priv->dev, + .napi = &priv->ntfy_blocks[ntfy_id].napi, + .max_len = PAGE_SIZE, + .dma_dir = xdp ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE, + .offset = xdp ? XDP_PACKET_HEADROOM : 0, + }; + + return page_pool_create(&pp); +} + +void gve_free_buffer(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state) +{ + if (rx->dqo.page_pool) { + gve_free_to_page_pool(rx, buf_state, true); + gve_free_buf_state(rx, buf_state); + } else { + gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, + buf_state); + } +} + +void gve_reuse_buffer(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state) +{ + if (rx->dqo.page_pool) { + buf_state->page_info.netmem = 0; + gve_free_buf_state(rx, buf_state); + } else { + gve_dec_pagecnt_bias(&buf_state->page_info); + gve_try_recycle_buf(rx->gve, rx, buf_state); + } +} + +int gve_alloc_buffer(struct gve_rx_ring *rx, struct gve_rx_desc_dqo *desc) +{ + struct gve_rx_buf_state_dqo *buf_state; + + if (rx->dqo.page_pool) { + buf_state = gve_alloc_buf_state(rx); + if (WARN_ON_ONCE(!buf_state)) + return -ENOMEM; + + if (gve_alloc_from_page_pool(rx, buf_state)) + goto free_buf_state; + } else { + buf_state = gve_get_recycled_buf_state(rx); + if (unlikely(!buf_state)) { + buf_state = gve_alloc_buf_state(rx); + if (unlikely(!buf_state)) + return -ENOMEM; + + if (unlikely(gve_alloc_qpl_page_dqo(rx, buf_state))) + goto free_buf_state; + } + } + desc->buf_id = cpu_to_le16(buf_state - rx->dqo.buf_states); + desc->buf_addr = cpu_to_le64(buf_state->addr + + buf_state->page_info.page_offset + + buf_state->page_info.pad); + + return 0; + +free_buf_state: + gve_free_buf_state(rx, buf_state); + return -ENOMEM; +} diff --git a/drivers/net/ethernet/google/gve/gve_dqo.h b/drivers/net/ethernet/google/gve/gve_dqo.h index b81584829c40..e83773fb891f 100644 --- a/drivers/net/ethernet/google/gve/gve_dqo.h +++ b/drivers/net/ethernet/google/gve/gve_dqo.h @@ -44,6 +44,12 @@ void gve_tx_free_rings_dqo(struct gve_priv *priv, struct gve_tx_alloc_rings_cfg *cfg); void gve_tx_start_ring_dqo(struct gve_priv *priv, int idx); void gve_tx_stop_ring_dqo(struct gve_priv *priv, int idx); +int gve_rx_alloc_ring_dqo(struct gve_priv *priv, + struct gve_rx_alloc_rings_cfg *cfg, + struct gve_rx_ring *rx, + int idx); +void gve_rx_free_ring_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, + struct gve_rx_alloc_rings_cfg *cfg); int gve_rx_alloc_rings_dqo(struct gve_priv *priv, struct gve_rx_alloc_rings_cfg *cfg); void gve_rx_free_rings_dqo(struct gve_priv *priv, diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c index 9aebfb843d9d..3c1da0cf3f61 100644 --- a/drivers/net/ethernet/google/gve/gve_ethtool.c +++ b/drivers/net/ethernet/google/gve/gve_ethtool.c @@ -1,13 +1,14 @@ // SPDX-License-Identifier: (GPL-2.0 OR MIT) /* Google virtual Ethernet (gve) driver * - * Copyright (C) 2015-2021 Google, Inc. + * Copyright (C) 2015-2024 Google LLC */ #include <linux/rtnetlink.h> #include "gve.h" #include "gve_adminq.h" #include "gve_dqo.h" +#include "gve_utils.h" static void gve_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *info) @@ -62,18 +63,20 @@ static const char gve_gstrings_rx_stats[][ETH_GSTRING_LEN] = { static const char gve_gstrings_tx_stats[][ETH_GSTRING_LEN] = { "tx_posted_desc[%u]", "tx_completed_desc[%u]", "tx_consumed_desc[%u]", "tx_bytes[%u]", "tx_wake[%u]", "tx_stop[%u]", "tx_event_counter[%u]", - "tx_dma_mapping_error[%u]", "tx_xsk_wakeup[%u]", - "tx_xsk_done[%u]", "tx_xsk_sent[%u]", "tx_xdp_xmit[%u]", "tx_xdp_xmit_errors[%u]" + "tx_dma_mapping_error[%u]", + "tx_xsk_sent[%u]", "tx_xdp_xmit[%u]", "tx_xdp_xmit_errors[%u]" }; -static const char gve_gstrings_adminq_stats[][ETH_GSTRING_LEN] = { +static const char gve_gstrings_adminq_stats[][ETH_GSTRING_LEN] __nonstring_array = { "adminq_prod_cnt", "adminq_cmd_fail", "adminq_timeouts", "adminq_describe_device_cnt", "adminq_cfg_device_resources_cnt", "adminq_register_page_list_cnt", "adminq_unregister_page_list_cnt", "adminq_create_tx_queue_cnt", "adminq_create_rx_queue_cnt", "adminq_destroy_tx_queue_cnt", "adminq_destroy_rx_queue_cnt", "adminq_dcfg_device_resources_cnt", "adminq_set_driver_parameter_cnt", - "adminq_report_stats_cnt", "adminq_report_link_speed_cnt" + "adminq_report_stats_cnt", "adminq_report_link_speed_cnt", "adminq_get_ptype_map_cnt", + "adminq_query_flow_rules", "adminq_cfg_flow_rule", "adminq_cfg_rss_cnt", + "adminq_query_rss_cnt", }; static const char gve_gstrings_priv_flags[][ETH_GSTRING_LEN] = { @@ -89,42 +92,34 @@ static const char gve_gstrings_priv_flags[][ETH_GSTRING_LEN] = { static void gve_get_strings(struct net_device *netdev, u32 stringset, u8 *data) { struct gve_priv *priv = netdev_priv(netdev); - char *s = (char *)data; + u8 *s = (char *)data; int num_tx_queues; int i, j; num_tx_queues = gve_num_tx_queues(priv); switch (stringset) { case ETH_SS_STATS: - memcpy(s, *gve_gstrings_main_stats, - sizeof(gve_gstrings_main_stats)); - s += sizeof(gve_gstrings_main_stats); - - for (i = 0; i < priv->rx_cfg.num_queues; i++) { - for (j = 0; j < NUM_GVE_RX_CNTS; j++) { - snprintf(s, ETH_GSTRING_LEN, - gve_gstrings_rx_stats[j], i); - s += ETH_GSTRING_LEN; - } - } + for (i = 0; i < ARRAY_SIZE(gve_gstrings_main_stats); i++) + ethtool_puts(&s, gve_gstrings_main_stats[i]); - for (i = 0; i < num_tx_queues; i++) { - for (j = 0; j < NUM_GVE_TX_CNTS; j++) { - snprintf(s, ETH_GSTRING_LEN, - gve_gstrings_tx_stats[j], i); - s += ETH_GSTRING_LEN; - } - } + for (i = 0; i < priv->rx_cfg.num_queues; i++) + for (j = 0; j < NUM_GVE_RX_CNTS; j++) + ethtool_sprintf(&s, gve_gstrings_rx_stats[j], + i); + + for (i = 0; i < num_tx_queues; i++) + for (j = 0; j < NUM_GVE_TX_CNTS; j++) + ethtool_sprintf(&s, gve_gstrings_tx_stats[j], + i); + + for (i = 0; i < ARRAY_SIZE(gve_gstrings_adminq_stats); i++) + ethtool_cpy(&s, gve_gstrings_adminq_stats[i]); - memcpy(s, *gve_gstrings_adminq_stats, - sizeof(gve_gstrings_adminq_stats)); - s += sizeof(gve_gstrings_adminq_stats); break; case ETH_SS_PRIV_FLAGS: - memcpy(s, *gve_gstrings_priv_flags, - sizeof(gve_gstrings_priv_flags)); - s += sizeof(gve_gstrings_priv_flags); + for (i = 0; i < ARRAY_SIZE(gve_gstrings_priv_flags); i++) + ethtool_puts(&s, gve_gstrings_priv_flags[i]); break; default: @@ -165,6 +160,8 @@ gve_get_ethtool_stats(struct net_device *netdev, struct stats *report_stats; int *rx_qid_to_stats_idx; int *tx_qid_to_stats_idx; + int num_stopped_rxqs = 0; + int num_stopped_txqs = 0; struct gve_priv *priv; bool skip_nic_stats; unsigned int start; @@ -181,12 +178,23 @@ gve_get_ethtool_stats(struct net_device *netdev, sizeof(int), GFP_KERNEL); if (!rx_qid_to_stats_idx) return; + for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) { + rx_qid_to_stats_idx[ring] = -1; + if (!gve_rx_was_added_to_block(priv, ring)) + num_stopped_rxqs++; + } tx_qid_to_stats_idx = kmalloc_array(num_tx_queues, sizeof(int), GFP_KERNEL); if (!tx_qid_to_stats_idx) { kfree(rx_qid_to_stats_idx); return; } + for (ring = 0; ring < num_tx_queues; ring++) { + tx_qid_to_stats_idx[ring] = -1; + if (!gve_tx_was_added_to_block(priv, ring)) + num_stopped_txqs++; + } + for (rx_pkts = 0, rx_bytes = 0, rx_hsplit_pkt = 0, rx_skb_alloc_fail = 0, rx_buf_alloc_fail = 0, rx_desc_err_dropped_pkt = 0, rx_hsplit_unsplit_pkt = 0, @@ -260,7 +268,13 @@ gve_get_ethtool_stats(struct net_device *netdev, /* For rx cross-reporting stats, start from nic rx stats in report */ base_stats_idx = GVE_TX_STATS_REPORT_NUM * num_tx_queues + GVE_RX_STATS_REPORT_NUM * priv->rx_cfg.num_queues; - max_stats_idx = NIC_RX_STATS_REPORT_NUM * priv->rx_cfg.num_queues + + /* The boundary between driver stats and NIC stats shifts if there are + * stopped queues. + */ + base_stats_idx += NIC_RX_STATS_REPORT_NUM * num_stopped_rxqs + + NIC_TX_STATS_REPORT_NUM * num_stopped_txqs; + max_stats_idx = NIC_RX_STATS_REPORT_NUM * + (priv->rx_cfg.num_queues - num_stopped_rxqs) + base_stats_idx; /* Preprocess the stats report for rx, map queue id to start index */ skip_nic_stats = false; @@ -274,6 +288,10 @@ gve_get_ethtool_stats(struct net_device *netdev, skip_nic_stats = true; break; } + if (queue_id < 0 || queue_id >= priv->rx_cfg.num_queues) { + net_err_ratelimited("Invalid rxq id in NIC stats\n"); + continue; + } rx_qid_to_stats_idx[queue_id] = stats_idx; } /* walk RX rings */ @@ -308,11 +326,11 @@ gve_get_ethtool_stats(struct net_device *netdev, data[i++] = rx->rx_copybreak_pkt; data[i++] = rx->rx_copied_pkt; /* stats from NIC */ - if (skip_nic_stats) { + stats_idx = rx_qid_to_stats_idx[ring]; + if (skip_nic_stats || stats_idx < 0) { /* skip NIC rx stats */ i += NIC_RX_STATS_REPORT_NUM; } else { - stats_idx = rx_qid_to_stats_idx[ring]; for (j = 0; j < NIC_RX_STATS_REPORT_NUM; j++) { u64 value = be64_to_cpu(report_stats[stats_idx + j].value); @@ -338,7 +356,8 @@ gve_get_ethtool_stats(struct net_device *netdev, /* For tx cross-reporting stats, start from nic tx stats in report */ base_stats_idx = max_stats_idx; - max_stats_idx = NIC_TX_STATS_REPORT_NUM * num_tx_queues + + max_stats_idx = NIC_TX_STATS_REPORT_NUM * + (num_tx_queues - num_stopped_txqs) + max_stats_idx; /* Preprocess the stats report for tx, map queue id to start index */ skip_nic_stats = false; @@ -352,6 +371,10 @@ gve_get_ethtool_stats(struct net_device *netdev, skip_nic_stats = true; break; } + if (queue_id < 0 || queue_id >= num_tx_queues) { + net_err_ratelimited("Invalid txq id in NIC stats\n"); + continue; + } tx_qid_to_stats_idx[queue_id] = stats_idx; } /* walk TX rings */ @@ -369,7 +392,9 @@ gve_get_ethtool_stats(struct net_device *netdev, */ data[i++] = 0; data[i++] = 0; - data[i++] = tx->dqo_tx.tail - tx->dqo_tx.head; + data[i++] = + (tx->dqo_tx.tail - tx->dqo_tx.head) & + tx->mask; } do { start = @@ -383,20 +408,18 @@ gve_get_ethtool_stats(struct net_device *netdev, data[i++] = gve_tx_load_event_counter(priv, tx); data[i++] = tx->dma_mapping_error; /* stats from NIC */ - if (skip_nic_stats) { + stats_idx = tx_qid_to_stats_idx[ring]; + if (skip_nic_stats || stats_idx < 0) { /* skip NIC tx stats */ i += NIC_TX_STATS_REPORT_NUM; } else { - stats_idx = tx_qid_to_stats_idx[ring]; for (j = 0; j < NIC_TX_STATS_REPORT_NUM; j++) { u64 value = be64_to_cpu(report_stats[stats_idx + j].value); data[i++] = value; } } - /* XDP xsk counters */ - data[i++] = tx->xdp_xsk_wakeup; - data[i++] = tx->xdp_xsk_done; + /* XDP counters */ do { start = u64_stats_fetch_begin(&priv->tx[ring].statss); data[i] = tx->xdp_xsk_sent; @@ -428,6 +451,11 @@ gve_get_ethtool_stats(struct net_device *netdev, data[i++] = priv->adminq_set_driver_parameter_cnt; data[i++] = priv->adminq_report_stats_cnt; data[i++] = priv->adminq_report_link_speed_cnt; + data[i++] = priv->adminq_get_ptype_map_cnt; + data[i++] = priv->adminq_query_flow_rules_cnt; + data[i++] = priv->adminq_cfg_flow_rule_cnt; + data[i++] = priv->adminq_cfg_rss_cnt; + data[i++] = priv->adminq_query_rss_cnt; } static void gve_get_channels(struct net_device *netdev, @@ -449,11 +477,12 @@ static int gve_set_channels(struct net_device *netdev, struct ethtool_channels *cmd) { struct gve_priv *priv = netdev_priv(netdev); - struct gve_queue_config new_tx_cfg = priv->tx_cfg; - struct gve_queue_config new_rx_cfg = priv->rx_cfg; + struct gve_tx_queue_config new_tx_cfg = priv->tx_cfg; + struct gve_rx_queue_config new_rx_cfg = priv->rx_cfg; struct ethtool_channels old_settings; int new_tx = cmd->tx_count; int new_rx = cmd->rx_count; + bool reset_rss = false; gve_get_channels(netdev, &old_settings); @@ -464,22 +493,27 @@ static int gve_set_channels(struct net_device *netdev, if (!new_rx || !new_tx) return -EINVAL; - if (priv->num_xdp_queues && - (new_tx != new_rx || (2 * new_tx > priv->tx_cfg.max_queues))) { - dev_err(&priv->pdev->dev, "XDP load failed: The number of configured RX queues should be equal to the number of configured TX queues and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues"); - return -EINVAL; - } + if (priv->xdp_prog) { + if (new_tx != new_rx || + (2 * new_tx > priv->tx_cfg.max_queues)) { + dev_err(&priv->pdev->dev, "The number of configured RX queues should be equal to the number of configured TX queues and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues when XDP program is installed"); + return -EINVAL; + } - if (!netif_carrier_ok(netdev)) { - priv->tx_cfg.num_queues = new_tx; - priv->rx_cfg.num_queues = new_rx; - return 0; + /* One XDP TX queue per RX queue. */ + new_tx_cfg.num_xdp_queues = new_rx; + } else { + new_tx_cfg.num_xdp_queues = 0; } + if (new_rx != priv->rx_cfg.num_queues && + priv->cache_rss_config && !netif_is_rxfh_configured(netdev)) + reset_rss = true; + new_tx_cfg.num_queues = new_tx; new_rx_cfg.num_queues = new_rx; - return gve_adjust_queues(priv, new_rx_cfg, new_tx_cfg); + return gve_adjust_queues(priv, new_rx_cfg, new_tx_cfg, reset_rss); } static void gve_get_ringparam(struct net_device *netdev, @@ -489,8 +523,8 @@ static void gve_get_ringparam(struct net_device *netdev, { struct gve_priv *priv = netdev_priv(netdev); - cmd->rx_max_pending = priv->rx_desc_cnt; - cmd->tx_max_pending = priv->tx_desc_cnt; + cmd->rx_max_pending = priv->max_rx_desc_cnt; + cmd->tx_max_pending = priv->max_tx_desc_cnt; cmd->rx_pending = priv->rx_desc_cnt; cmd->tx_pending = priv->tx_desc_cnt; @@ -502,20 +536,81 @@ static void gve_get_ringparam(struct net_device *netdev, kernel_cmd->tcp_data_split = ETHTOOL_TCP_DATA_SPLIT_DISABLED; } +static int gve_adjust_ring_sizes(struct gve_priv *priv, + u16 new_tx_desc_cnt, + u16 new_rx_desc_cnt) +{ + struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; + struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; + int err; + + /* get current queue configuration */ + gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); + + /* copy over the new ring_size from ethtool */ + tx_alloc_cfg.ring_size = new_tx_desc_cnt; + rx_alloc_cfg.ring_size = new_rx_desc_cnt; + + if (netif_running(priv->dev)) { + err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); + if (err) + return err; + } + + /* Set new ring_size for the next up */ + priv->tx_desc_cnt = new_tx_desc_cnt; + priv->rx_desc_cnt = new_rx_desc_cnt; + + return 0; +} + +static int gve_validate_req_ring_size(struct gve_priv *priv, u16 new_tx_desc_cnt, + u16 new_rx_desc_cnt) +{ + /* check for valid range */ + if (new_tx_desc_cnt < priv->min_tx_desc_cnt || + new_tx_desc_cnt > priv->max_tx_desc_cnt || + new_rx_desc_cnt < priv->min_rx_desc_cnt || + new_rx_desc_cnt > priv->max_rx_desc_cnt) { + dev_err(&priv->pdev->dev, "Requested descriptor count out of range\n"); + return -EINVAL; + } + + if (!is_power_of_2(new_tx_desc_cnt) || !is_power_of_2(new_rx_desc_cnt)) { + dev_err(&priv->pdev->dev, "Requested descriptor count has to be a power of 2\n"); + return -EINVAL; + } + return 0; +} + static int gve_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *cmd, struct kernel_ethtool_ringparam *kernel_cmd, struct netlink_ext_ack *extack) { struct gve_priv *priv = netdev_priv(netdev); + u16 new_tx_cnt, new_rx_cnt; + int err; - if (priv->tx_desc_cnt != cmd->tx_pending || - priv->rx_desc_cnt != cmd->rx_pending) { - dev_info(&priv->pdev->dev, "Modify ring size is not supported.\n"); + err = gve_set_hsplit_config(priv, kernel_cmd->tcp_data_split); + if (err) + return err; + + if (cmd->tx_pending == priv->tx_desc_cnt && cmd->rx_pending == priv->rx_desc_cnt) + return 0; + + if (!priv->modify_ring_size_enabled) { + dev_err(&priv->pdev->dev, "Modify ring size is not supported.\n"); return -EOPNOTSUPP; } - return gve_set_hsplit_config(priv, kernel_cmd->tcp_data_split); + new_tx_cnt = cmd->tx_pending; + new_rx_cnt = cmd->rx_pending; + + if (gve_validate_req_ring_size(priv, new_tx_cnt, new_rx_cnt)) + return -EINVAL; + + return gve_adjust_ring_sizes(priv, new_tx_cnt, new_rx_cnt); } static int gve_user_reset(struct net_device *netdev, u32 *flags) @@ -554,8 +649,7 @@ static int gve_set_tunable(struct net_device *netdev, switch (etuna->id) { case ETHTOOL_RX_COPYBREAK: { - u32 max_copybreak = gve_is_gqi(priv) ? - GVE_DEFAULT_RX_BUFFER_SIZE : priv->data_buffer_size_dqo; + u32 max_copybreak = priv->rx_cfg.packet_buffer_size; len = *(u32 *)value; if (len > max_copybreak) @@ -611,7 +705,7 @@ static int gve_set_priv_flags(struct net_device *netdev, u32 flags) memset(priv->stats_report->stats, 0, (tx_stats_num + rx_stats_num) * sizeof(struct stats)); - del_timer_sync(&priv->stats_report_timer); + timer_delete_sync(&priv->stats_report_timer); } return 0; } @@ -689,6 +783,151 @@ static int gve_set_coalesce(struct net_device *netdev, return 0; } +static int gve_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd) +{ + struct gve_priv *priv = netdev_priv(netdev); + int err = 0; + + if (!(netdev->features & NETIF_F_NTUPLE)) + return -EOPNOTSUPP; + + switch (cmd->cmd) { + case ETHTOOL_SRXCLSRLINS: + err = gve_add_flow_rule(priv, cmd); + break; + case ETHTOOL_SRXCLSRLDEL: + err = gve_del_flow_rule(priv, cmd); + break; + case ETHTOOL_SRXFH: + err = -EOPNOTSUPP; + break; + default: + err = -EOPNOTSUPP; + break; + } + + return err; +} + +static int gve_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd, u32 *rule_locs) +{ + struct gve_priv *priv = netdev_priv(netdev); + int err = 0; + + switch (cmd->cmd) { + case ETHTOOL_GRXRINGS: + cmd->data = priv->rx_cfg.num_queues; + break; + case ETHTOOL_GRXCLSRLCNT: + if (!priv->max_flow_rules) + return -EOPNOTSUPP; + + err = gve_adminq_query_flow_rules(priv, GVE_FLOW_RULE_QUERY_STATS, 0); + if (err) + return err; + + cmd->rule_cnt = priv->num_flow_rules; + cmd->data = priv->max_flow_rules; + break; + case ETHTOOL_GRXCLSRULE: + err = gve_get_flow_rule_entry(priv, cmd); + break; + case ETHTOOL_GRXCLSRLALL: + err = gve_get_flow_rule_ids(priv, cmd, (u32 *)rule_locs); + break; + case ETHTOOL_GRXFH: + err = -EOPNOTSUPP; + break; + default: + err = -EOPNOTSUPP; + break; + } + + return err; +} + +static u32 gve_get_rxfh_key_size(struct net_device *netdev) +{ + struct gve_priv *priv = netdev_priv(netdev); + + return priv->rss_key_size; +} + +static u32 gve_get_rxfh_indir_size(struct net_device *netdev) +{ + struct gve_priv *priv = netdev_priv(netdev); + + return priv->rss_lut_size; +} + +static void gve_get_rss_config_cache(struct gve_priv *priv, + struct ethtool_rxfh_param *rxfh) +{ + struct gve_rss_config *rss_config = &priv->rss_config; + + rxfh->hfunc = ETH_RSS_HASH_TOP; + + if (rxfh->key) { + rxfh->key_size = priv->rss_key_size; + memcpy(rxfh->key, rss_config->hash_key, priv->rss_key_size); + } + + if (rxfh->indir) { + rxfh->indir_size = priv->rss_lut_size; + memcpy(rxfh->indir, rss_config->hash_lut, + priv->rss_lut_size * sizeof(*rxfh->indir)); + } +} + +static int gve_get_rxfh(struct net_device *netdev, struct ethtool_rxfh_param *rxfh) +{ + struct gve_priv *priv = netdev_priv(netdev); + + if (!priv->rss_key_size || !priv->rss_lut_size) + return -EOPNOTSUPP; + + if (priv->cache_rss_config) { + gve_get_rss_config_cache(priv, rxfh); + return 0; + } + + return gve_adminq_query_rss_config(priv, rxfh); +} + +static void gve_set_rss_config_cache(struct gve_priv *priv, + struct ethtool_rxfh_param *rxfh) +{ + struct gve_rss_config *rss_config = &priv->rss_config; + + if (rxfh->key) + memcpy(rss_config->hash_key, rxfh->key, priv->rss_key_size); + + if (rxfh->indir) + memcpy(rss_config->hash_lut, rxfh->indir, + priv->rss_lut_size * sizeof(*rxfh->indir)); +} + +static int gve_set_rxfh(struct net_device *netdev, struct ethtool_rxfh_param *rxfh, + struct netlink_ext_ack *extack) +{ + struct gve_priv *priv = netdev_priv(netdev); + int err; + + if (!priv->rss_key_size || !priv->rss_lut_size) + return -EOPNOTSUPP; + + err = gve_adminq_configure_rss(priv, rxfh); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Fail to configure RSS config"); + return err; + } + + if (priv->cache_rss_config) + gve_set_rss_config_cache(priv, rxfh); + + return 0; +} + const struct ethtool_ops gve_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS, .supported_ring_params = ETHTOOL_RING_USE_TCP_DATA_SPLIT, @@ -700,6 +939,12 @@ const struct ethtool_ops gve_ethtool_ops = { .get_msglevel = gve_get_msglevel, .set_channels = gve_set_channels, .get_channels = gve_get_channels, + .set_rxnfc = gve_set_rxnfc, + .get_rxnfc = gve_get_rxnfc, + .get_rxfh_indir_size = gve_get_rxfh_indir_size, + .get_rxfh_key_size = gve_get_rxfh_key_size, + .get_rxfh = gve_get_rxfh, + .set_rxfh = gve_set_rxfh, .get_link = ethtool_op_get_link, .get_coalesce = gve_get_coalesce, .set_coalesce = gve_set_coalesce, @@ -710,5 +955,6 @@ const struct ethtool_ops gve_ethtool_ops = { .set_tunable = gve_set_tunable, .get_priv_flags = gve_get_priv_flags, .set_priv_flags = gve_set_priv_flags, - .get_link_ksettings = gve_get_link_ksettings + .get_link_ksettings = gve_get_link_ksettings, + .get_ts_info = ethtool_op_get_ts_info, }; diff --git a/drivers/net/ethernet/google/gve/gve_flow_rule.c b/drivers/net/ethernet/google/gve/gve_flow_rule.c new file mode 100644 index 000000000000..0bb8cd1876a3 --- /dev/null +++ b/drivers/net/ethernet/google/gve/gve_flow_rule.c @@ -0,0 +1,298 @@ +// SPDX-License-Identifier: (GPL-2.0 OR MIT) +/* Google virtual Ethernet (gve) driver + * + * Copyright (C) 2015-2024 Google LLC + */ + +#include "gve.h" +#include "gve_adminq.h" + +static +int gve_fill_ethtool_flow_spec(struct ethtool_rx_flow_spec *fsp, + struct gve_adminq_queried_flow_rule *rule) +{ + struct gve_adminq_flow_rule *flow_rule = &rule->flow_rule; + static const u16 flow_type_lut[] = { + [GVE_FLOW_TYPE_TCPV4] = TCP_V4_FLOW, + [GVE_FLOW_TYPE_UDPV4] = UDP_V4_FLOW, + [GVE_FLOW_TYPE_SCTPV4] = SCTP_V4_FLOW, + [GVE_FLOW_TYPE_AHV4] = AH_V4_FLOW, + [GVE_FLOW_TYPE_ESPV4] = ESP_V4_FLOW, + [GVE_FLOW_TYPE_TCPV6] = TCP_V6_FLOW, + [GVE_FLOW_TYPE_UDPV6] = UDP_V6_FLOW, + [GVE_FLOW_TYPE_SCTPV6] = SCTP_V6_FLOW, + [GVE_FLOW_TYPE_AHV6] = AH_V6_FLOW, + [GVE_FLOW_TYPE_ESPV6] = ESP_V6_FLOW, + }; + + if (be16_to_cpu(flow_rule->flow_type) >= ARRAY_SIZE(flow_type_lut)) + return -EINVAL; + + fsp->flow_type = flow_type_lut[be16_to_cpu(flow_rule->flow_type)]; + + memset(&fsp->h_u, 0, sizeof(fsp->h_u)); + memset(&fsp->h_ext, 0, sizeof(fsp->h_ext)); + memset(&fsp->m_u, 0, sizeof(fsp->m_u)); + memset(&fsp->m_ext, 0, sizeof(fsp->m_ext)); + + switch (fsp->flow_type) { + case TCP_V4_FLOW: + case UDP_V4_FLOW: + case SCTP_V4_FLOW: + fsp->h_u.tcp_ip4_spec.ip4src = flow_rule->key.src_ip[0]; + fsp->h_u.tcp_ip4_spec.ip4dst = flow_rule->key.dst_ip[0]; + fsp->h_u.tcp_ip4_spec.psrc = flow_rule->key.src_port; + fsp->h_u.tcp_ip4_spec.pdst = flow_rule->key.dst_port; + fsp->h_u.tcp_ip4_spec.tos = flow_rule->key.tos; + fsp->m_u.tcp_ip4_spec.ip4src = flow_rule->mask.src_ip[0]; + fsp->m_u.tcp_ip4_spec.ip4dst = flow_rule->mask.dst_ip[0]; + fsp->m_u.tcp_ip4_spec.psrc = flow_rule->mask.src_port; + fsp->m_u.tcp_ip4_spec.pdst = flow_rule->mask.dst_port; + fsp->m_u.tcp_ip4_spec.tos = flow_rule->mask.tos; + break; + case AH_V4_FLOW: + case ESP_V4_FLOW: + fsp->h_u.ah_ip4_spec.ip4src = flow_rule->key.src_ip[0]; + fsp->h_u.ah_ip4_spec.ip4dst = flow_rule->key.dst_ip[0]; + fsp->h_u.ah_ip4_spec.spi = flow_rule->key.spi; + fsp->h_u.ah_ip4_spec.tos = flow_rule->key.tos; + fsp->m_u.ah_ip4_spec.ip4src = flow_rule->mask.src_ip[0]; + fsp->m_u.ah_ip4_spec.ip4dst = flow_rule->mask.dst_ip[0]; + fsp->m_u.ah_ip4_spec.spi = flow_rule->mask.spi; + fsp->m_u.ah_ip4_spec.tos = flow_rule->mask.tos; + break; + case TCP_V6_FLOW: + case UDP_V6_FLOW: + case SCTP_V6_FLOW: + memcpy(fsp->h_u.tcp_ip6_spec.ip6src, &flow_rule->key.src_ip, + sizeof(struct in6_addr)); + memcpy(fsp->h_u.tcp_ip6_spec.ip6dst, &flow_rule->key.dst_ip, + sizeof(struct in6_addr)); + fsp->h_u.tcp_ip6_spec.psrc = flow_rule->key.src_port; + fsp->h_u.tcp_ip6_spec.pdst = flow_rule->key.dst_port; + fsp->h_u.tcp_ip6_spec.tclass = flow_rule->key.tclass; + memcpy(fsp->m_u.tcp_ip6_spec.ip6src, &flow_rule->mask.src_ip, + sizeof(struct in6_addr)); + memcpy(fsp->m_u.tcp_ip6_spec.ip6dst, &flow_rule->mask.dst_ip, + sizeof(struct in6_addr)); + fsp->m_u.tcp_ip6_spec.psrc = flow_rule->mask.src_port; + fsp->m_u.tcp_ip6_spec.pdst = flow_rule->mask.dst_port; + fsp->m_u.tcp_ip6_spec.tclass = flow_rule->mask.tclass; + break; + case AH_V6_FLOW: + case ESP_V6_FLOW: + memcpy(fsp->h_u.ah_ip6_spec.ip6src, &flow_rule->key.src_ip, + sizeof(struct in6_addr)); + memcpy(fsp->h_u.ah_ip6_spec.ip6dst, &flow_rule->key.dst_ip, + sizeof(struct in6_addr)); + fsp->h_u.ah_ip6_spec.spi = flow_rule->key.spi; + fsp->h_u.ah_ip6_spec.tclass = flow_rule->key.tclass; + memcpy(fsp->m_u.ah_ip6_spec.ip6src, &flow_rule->mask.src_ip, + sizeof(struct in6_addr)); + memcpy(fsp->m_u.ah_ip6_spec.ip6dst, &flow_rule->mask.dst_ip, + sizeof(struct in6_addr)); + fsp->m_u.ah_ip6_spec.spi = flow_rule->mask.spi; + fsp->m_u.ah_ip6_spec.tclass = flow_rule->mask.tclass; + break; + default: + return -EINVAL; + } + + fsp->ring_cookie = be16_to_cpu(flow_rule->action); + + return 0; +} + +static int gve_generate_flow_rule(struct gve_priv *priv, struct ethtool_rx_flow_spec *fsp, + struct gve_adminq_flow_rule *rule) +{ + static const u16 flow_type_lut[] = { + [TCP_V4_FLOW] = GVE_FLOW_TYPE_TCPV4, + [UDP_V4_FLOW] = GVE_FLOW_TYPE_UDPV4, + [SCTP_V4_FLOW] = GVE_FLOW_TYPE_SCTPV4, + [AH_V4_FLOW] = GVE_FLOW_TYPE_AHV4, + [ESP_V4_FLOW] = GVE_FLOW_TYPE_ESPV4, + [TCP_V6_FLOW] = GVE_FLOW_TYPE_TCPV6, + [UDP_V6_FLOW] = GVE_FLOW_TYPE_UDPV6, + [SCTP_V6_FLOW] = GVE_FLOW_TYPE_SCTPV6, + [AH_V6_FLOW] = GVE_FLOW_TYPE_AHV6, + [ESP_V6_FLOW] = GVE_FLOW_TYPE_ESPV6, + }; + u32 flow_type; + + if (fsp->ring_cookie == RX_CLS_FLOW_DISC) + return -EOPNOTSUPP; + + if (fsp->ring_cookie >= priv->rx_cfg.num_queues) + return -EINVAL; + + rule->action = cpu_to_be16(fsp->ring_cookie); + + flow_type = fsp->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT | FLOW_RSS); + if (!flow_type || flow_type >= ARRAY_SIZE(flow_type_lut)) + return -EINVAL; + + rule->flow_type = cpu_to_be16(flow_type_lut[flow_type]); + + switch (flow_type) { + case TCP_V4_FLOW: + case UDP_V4_FLOW: + case SCTP_V4_FLOW: + rule->key.src_ip[0] = fsp->h_u.tcp_ip4_spec.ip4src; + rule->key.dst_ip[0] = fsp->h_u.tcp_ip4_spec.ip4dst; + rule->key.src_port = fsp->h_u.tcp_ip4_spec.psrc; + rule->key.dst_port = fsp->h_u.tcp_ip4_spec.pdst; + rule->mask.src_ip[0] = fsp->m_u.tcp_ip4_spec.ip4src; + rule->mask.dst_ip[0] = fsp->m_u.tcp_ip4_spec.ip4dst; + rule->mask.src_port = fsp->m_u.tcp_ip4_spec.psrc; + rule->mask.dst_port = fsp->m_u.tcp_ip4_spec.pdst; + break; + case AH_V4_FLOW: + case ESP_V4_FLOW: + rule->key.src_ip[0] = fsp->h_u.tcp_ip4_spec.ip4src; + rule->key.dst_ip[0] = fsp->h_u.tcp_ip4_spec.ip4dst; + rule->key.spi = fsp->h_u.ah_ip4_spec.spi; + rule->mask.src_ip[0] = fsp->m_u.tcp_ip4_spec.ip4src; + rule->mask.dst_ip[0] = fsp->m_u.tcp_ip4_spec.ip4dst; + rule->mask.spi = fsp->m_u.ah_ip4_spec.spi; + break; + case TCP_V6_FLOW: + case UDP_V6_FLOW: + case SCTP_V6_FLOW: + memcpy(&rule->key.src_ip, fsp->h_u.tcp_ip6_spec.ip6src, + sizeof(struct in6_addr)); + memcpy(&rule->key.dst_ip, fsp->h_u.tcp_ip6_spec.ip6dst, + sizeof(struct in6_addr)); + rule->key.src_port = fsp->h_u.tcp_ip6_spec.psrc; + rule->key.dst_port = fsp->h_u.tcp_ip6_spec.pdst; + memcpy(&rule->mask.src_ip, fsp->m_u.tcp_ip6_spec.ip6src, + sizeof(struct in6_addr)); + memcpy(&rule->mask.dst_ip, fsp->m_u.tcp_ip6_spec.ip6dst, + sizeof(struct in6_addr)); + rule->mask.src_port = fsp->m_u.tcp_ip6_spec.psrc; + rule->mask.dst_port = fsp->m_u.tcp_ip6_spec.pdst; + break; + case AH_V6_FLOW: + case ESP_V6_FLOW: + memcpy(&rule->key.src_ip, fsp->h_u.usr_ip6_spec.ip6src, + sizeof(struct in6_addr)); + memcpy(&rule->key.dst_ip, fsp->h_u.usr_ip6_spec.ip6dst, + sizeof(struct in6_addr)); + rule->key.spi = fsp->h_u.ah_ip6_spec.spi; + memcpy(&rule->mask.src_ip, fsp->m_u.usr_ip6_spec.ip6src, + sizeof(struct in6_addr)); + memcpy(&rule->mask.dst_ip, fsp->m_u.usr_ip6_spec.ip6dst, + sizeof(struct in6_addr)); + rule->key.spi = fsp->h_u.ah_ip6_spec.spi; + break; + default: + /* not doing un-parsed flow types */ + return -EINVAL; + } + + return 0; +} + +int gve_get_flow_rule_entry(struct gve_priv *priv, struct ethtool_rxnfc *cmd) +{ + struct gve_adminq_queried_flow_rule *rules_cache = priv->flow_rules_cache.rules_cache; + struct ethtool_rx_flow_spec *fsp = (struct ethtool_rx_flow_spec *)&cmd->fs; + u32 *cache_num = &priv->flow_rules_cache.rules_cache_num; + struct gve_adminq_queried_flow_rule *rule = NULL; + int err = 0; + u32 i; + + if (!priv->max_flow_rules) + return -EOPNOTSUPP; + + if (!priv->flow_rules_cache.rules_cache_synced || + fsp->location < be32_to_cpu(rules_cache[0].location) || + fsp->location > be32_to_cpu(rules_cache[*cache_num - 1].location)) { + err = gve_adminq_query_flow_rules(priv, GVE_FLOW_RULE_QUERY_RULES, fsp->location); + if (err) + return err; + + priv->flow_rules_cache.rules_cache_synced = true; + } + + for (i = 0; i < *cache_num; i++) { + if (fsp->location == be32_to_cpu(rules_cache[i].location)) { + rule = &rules_cache[i]; + break; + } + } + + if (!rule) + return -EINVAL; + + err = gve_fill_ethtool_flow_spec(fsp, rule); + + return err; +} + +int gve_get_flow_rule_ids(struct gve_priv *priv, struct ethtool_rxnfc *cmd, u32 *rule_locs) +{ + __be32 *rule_ids_cache = priv->flow_rules_cache.rule_ids_cache; + u32 *cache_num = &priv->flow_rules_cache.rule_ids_cache_num; + u32 starting_rule_id = 0; + u32 i = 0, j = 0; + int err = 0; + + if (!priv->max_flow_rules) + return -EOPNOTSUPP; + + do { + err = gve_adminq_query_flow_rules(priv, GVE_FLOW_RULE_QUERY_IDS, + starting_rule_id); + if (err) + return err; + + for (i = 0; i < *cache_num; i++) { + if (j >= cmd->rule_cnt) + return -EMSGSIZE; + + rule_locs[j++] = be32_to_cpu(rule_ids_cache[i]); + starting_rule_id = be32_to_cpu(rule_ids_cache[i]) + 1; + } + } while (*cache_num != 0); + cmd->data = priv->max_flow_rules; + + return err; +} + +int gve_add_flow_rule(struct gve_priv *priv, struct ethtool_rxnfc *cmd) +{ + struct ethtool_rx_flow_spec *fsp = &cmd->fs; + struct gve_adminq_flow_rule *rule = NULL; + int err; + + if (!priv->max_flow_rules) + return -EOPNOTSUPP; + + rule = kvzalloc(sizeof(*rule), GFP_KERNEL); + if (!rule) + return -ENOMEM; + + err = gve_generate_flow_rule(priv, fsp, rule); + if (err) + goto out; + + err = gve_adminq_add_flow_rule(priv, rule, fsp->location); + +out: + kvfree(rule); + if (err) + dev_err(&priv->pdev->dev, "Failed to add the flow rule: %u", fsp->location); + + return err; +} + +int gve_del_flow_rule(struct gve_priv *priv, struct ethtool_rxnfc *cmd) +{ + struct ethtool_rx_flow_spec *fsp = (struct ethtool_rx_flow_spec *)&cmd->fs; + + if (!priv->max_flow_rules) + return -EOPNOTSUPP; + + return gve_adminq_del_flow_rule(priv, fsp->location); +} diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 166bd827a6d7..e1ffbd561fac 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: (GPL-2.0 OR MIT) /* Google virtual Ethernet (gve) driver * - * Copyright (C) 2015-2021 Google, Inc. + * Copyright (C) 2015-2024 Google LLC */ #include <linux/bpf.h> @@ -9,6 +9,7 @@ #include <linux/etherdevice.h> #include <linux/filter.h> #include <linux/interrupt.h> +#include <linux/irq.h> #include <linux/module.h> #include <linux/pci.h> #include <linux/sched.h> @@ -16,6 +17,7 @@ #include <linux/workqueue.h> #include <linux/utsname.h> #include <linux/version.h> +#include <net/netdev_queues.h> #include <net/sch_generic.h> #include <net/xdp_sock_drv.h> #include "gve.h" @@ -139,6 +141,86 @@ static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s) } } +static int gve_alloc_flow_rule_caches(struct gve_priv *priv) +{ + struct gve_flow_rules_cache *flow_rules_cache = &priv->flow_rules_cache; + int err = 0; + + if (!priv->max_flow_rules) + return 0; + + flow_rules_cache->rules_cache = + kvcalloc(GVE_FLOW_RULES_CACHE_SIZE, sizeof(*flow_rules_cache->rules_cache), + GFP_KERNEL); + if (!flow_rules_cache->rules_cache) { + dev_err(&priv->pdev->dev, "Cannot alloc flow rules cache\n"); + return -ENOMEM; + } + + flow_rules_cache->rule_ids_cache = + kvcalloc(GVE_FLOW_RULE_IDS_CACHE_SIZE, sizeof(*flow_rules_cache->rule_ids_cache), + GFP_KERNEL); + if (!flow_rules_cache->rule_ids_cache) { + dev_err(&priv->pdev->dev, "Cannot alloc flow rule ids cache\n"); + err = -ENOMEM; + goto free_rules_cache; + } + + return 0; + +free_rules_cache: + kvfree(flow_rules_cache->rules_cache); + flow_rules_cache->rules_cache = NULL; + return err; +} + +static void gve_free_flow_rule_caches(struct gve_priv *priv) +{ + struct gve_flow_rules_cache *flow_rules_cache = &priv->flow_rules_cache; + + kvfree(flow_rules_cache->rule_ids_cache); + flow_rules_cache->rule_ids_cache = NULL; + kvfree(flow_rules_cache->rules_cache); + flow_rules_cache->rules_cache = NULL; +} + +static int gve_alloc_rss_config_cache(struct gve_priv *priv) +{ + struct gve_rss_config *rss_config = &priv->rss_config; + + if (!priv->cache_rss_config) + return 0; + + rss_config->hash_key = kcalloc(priv->rss_key_size, + sizeof(rss_config->hash_key[0]), + GFP_KERNEL); + if (!rss_config->hash_key) + return -ENOMEM; + + rss_config->hash_lut = kcalloc(priv->rss_lut_size, + sizeof(rss_config->hash_lut[0]), + GFP_KERNEL); + if (!rss_config->hash_lut) + goto free_rss_key_cache; + + return 0; + +free_rss_key_cache: + kfree(rss_config->hash_key); + rss_config->hash_key = NULL; + return -ENOMEM; +} + +static void gve_free_rss_config_cache(struct gve_priv *priv) +{ + struct gve_rss_config *rss_config = &priv->rss_config; + + kfree(rss_config->hash_key); + kfree(rss_config->hash_lut); + + memset(rss_config, 0, sizeof(*rss_config)); +} + static int gve_alloc_counter_array(struct gve_priv *priv) { priv->counter_array = @@ -220,7 +302,7 @@ static void gve_free_stats_report(struct gve_priv *priv) if (!priv->stats_report) return; - del_timer_sync(&priv->stats_report_timer); + timer_delete_sync(&priv->stats_report_timer); dma_free_coherent(&priv->pdev->dev, priv->stats_report_len, priv->stats_report, priv->stats_report_bus); priv->stats_report = NULL; @@ -253,6 +335,18 @@ static irqreturn_t gve_intr_dqo(int irq, void *arg) return IRQ_HANDLED; } +static int gve_is_napi_on_home_cpu(struct gve_priv *priv, u32 irq) +{ + int cpu_curr = smp_processor_id(); + const struct cpumask *aff_mask; + + aff_mask = irq_get_effective_affinity_mask(irq); + if (unlikely(!aff_mask)) + return 1; + + return cpumask_test_cpu(cpu_curr, aff_mask); +} + int gve_napi_poll(struct napi_struct *napi, int budget) { struct gve_notify_block *block; @@ -276,6 +370,14 @@ int gve_napi_poll(struct napi_struct *napi, int budget) if (block->rx) { work_done = gve_rx_poll(block, budget); + + /* Poll XSK TX as part of RX NAPI. Setup re-poll based on max of + * TX and RX work done. + */ + if (priv->xdp_prog) + work_done = max_t(int, work_done, + gve_xsk_tx_poll(block, budget)); + reschedule |= work_done == budget; } @@ -322,8 +424,21 @@ int gve_napi_poll_dqo(struct napi_struct *napi, int budget) reschedule |= work_done == budget; } - if (reschedule) - return budget; + if (reschedule) { + /* Reschedule by returning budget only if already on the correct + * cpu. + */ + if (likely(gve_is_napi_on_home_cpu(priv, block->irq))) + return budget; + + /* If not on the cpu with which this queue's irq has affinity + * with, we avoid rescheduling napi and arm the irq instead so + * that napi gets rescheduled back eventually onto the right + * cpu. + */ + if (work_done == budget) + work_done--; + } if (likely(napi_complete_done(napi, work_done))) { /* Enable interrupts again. @@ -428,6 +543,7 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv) "Failed to receive msix vector %d\n", i); goto abort_with_some_ntfy_blocks; } + block->irq = priv->msix_vectors[msix_idx].vector; irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, get_cpu_mask(i % active_cpus)); block->irq_db_index = &priv->irq_db_indices[i].index; @@ -441,6 +557,7 @@ abort_with_some_ntfy_blocks: irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, NULL); free_irq(priv->msix_vectors[msix_idx].vector, block); + block->irq = 0; } kvfree(priv->ntfy_blocks); priv->ntfy_blocks = NULL; @@ -474,6 +591,7 @@ static void gve_free_notify_blocks(struct gve_priv *priv) irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, NULL); free_irq(priv->msix_vectors[msix_idx].vector, block); + block->irq = 0; } free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); kvfree(priv->ntfy_blocks); @@ -491,9 +609,15 @@ static int gve_setup_device_resources(struct gve_priv *priv) { int err; - err = gve_alloc_counter_array(priv); + err = gve_alloc_flow_rule_caches(priv); if (err) return err; + err = gve_alloc_rss_config_cache(priv); + if (err) + goto abort_with_flow_rule_caches; + err = gve_alloc_counter_array(priv); + if (err) + goto abort_with_rss_config_cache; err = gve_alloc_notify_blocks(priv); if (err) goto abort_with_counter; @@ -527,6 +651,12 @@ static int gve_setup_device_resources(struct gve_priv *priv) } } + err = gve_init_rss_config(priv, priv->rx_cfg.num_queues); + if (err) { + dev_err(&priv->pdev->dev, "Failed to init RSS config"); + goto abort_with_ptype_lut; + } + err = gve_adminq_report_stats(priv, priv->stats_report_len, priv->stats_report_bus, GVE_STATS_REPORT_TIMER_PERIOD); @@ -545,6 +675,10 @@ abort_with_ntfy_blocks: gve_free_notify_blocks(priv); abort_with_counter: gve_free_counter_array(priv); +abort_with_rss_config_cache: + gve_free_rss_config_cache(priv); +abort_with_flow_rule_caches: + gve_free_flow_rule_caches(priv); return err; } @@ -557,6 +691,12 @@ static void gve_teardown_device_resources(struct gve_priv *priv) /* Tell device its resources are being freed */ if (gve_get_device_resources_ok(priv)) { + err = gve_flow_rules_reset(priv); + if (err) { + dev_err(&priv->pdev->dev, + "Failed to reset flow rules: err=%d\n", err); + gve_trigger_reset(priv); + } /* detach the stats report */ err = gve_adminq_report_stats(priv, 0, 0x0, GVE_STATS_REPORT_TIMER_PERIOD); if (err) { @@ -576,43 +716,44 @@ static void gve_teardown_device_resources(struct gve_priv *priv) kvfree(priv->ptype_lut_dqo); priv->ptype_lut_dqo = NULL; + gve_free_flow_rule_caches(priv); + gve_free_rss_config_cache(priv); gve_free_counter_array(priv); gve_free_notify_blocks(priv); gve_free_stats_report(priv); gve_clear_device_resources_ok(priv); } -static int gve_unregister_qpl(struct gve_priv *priv, u32 i) +static int gve_unregister_qpl(struct gve_priv *priv, + struct gve_queue_page_list *qpl) { int err; - err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id); + if (!qpl) + return 0; + + err = gve_adminq_unregister_page_list(priv, qpl->id); if (err) { netif_err(priv, drv, priv->dev, "Failed to unregister queue page list %d\n", - priv->qpls[i].id); + qpl->id); return err; } - priv->num_registered_pages -= priv->qpls[i].num_entries; + priv->num_registered_pages -= qpl->num_entries; return 0; } -static int gve_register_qpl(struct gve_priv *priv, u32 i) +static int gve_register_qpl(struct gve_priv *priv, + struct gve_queue_page_list *qpl) { - int num_rx_qpls; int pages; int err; - /* Rx QPLs succeed Tx QPLs in the priv->qpls array. */ - num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv)); - if (i >= gve_rx_start_qpl_id(&priv->tx_cfg) + num_rx_qpls) { - netif_err(priv, drv, priv->dev, - "Cannot register nonexisting QPL at index %d\n", i); - return -EINVAL; - } + if (!qpl) + return 0; - pages = priv->qpls[i].num_entries; + pages = qpl->num_entries; if (pages + priv->num_registered_pages > priv->max_registered_pages) { netif_err(priv, drv, priv->dev, @@ -622,14 +763,11 @@ static int gve_register_qpl(struct gve_priv *priv, u32 i) return -EINVAL; } - err = gve_adminq_register_page_list(priv, &priv->qpls[i]); + err = gve_adminq_register_page_list(priv, qpl); if (err) { netif_err(priv, drv, priv->dev, "failed to register queue page list %d\n", - priv->qpls[i].id); - /* This failure will trigger a reset - no need to clean - * up - */ + qpl->id); return err; } @@ -637,43 +775,43 @@ static int gve_register_qpl(struct gve_priv *priv, u32 i) return 0; } -static int gve_register_xdp_qpls(struct gve_priv *priv) +static struct gve_queue_page_list *gve_tx_get_qpl(struct gve_priv *priv, int idx) { - int start_id; - int err; - int i; + struct gve_tx_ring *tx = &priv->tx[idx]; - start_id = gve_xdp_tx_start_queue_id(priv); - for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) { - err = gve_register_qpl(priv, i); - /* This failure will trigger a reset - no need to clean up */ - if (err) - return err; - } - return 0; + if (gve_is_gqi(priv)) + return tx->tx_fifo.qpl; + else + return tx->dqo.qpl; +} + +static struct gve_queue_page_list *gve_rx_get_qpl(struct gve_priv *priv, int idx) +{ + struct gve_rx_ring *rx = &priv->rx[idx]; + + if (gve_is_gqi(priv)) + return rx->data.qpl; + else + return rx->dqo.qpl; } static int gve_register_qpls(struct gve_priv *priv) { int num_tx_qpls, num_rx_qpls; - int start_id; int err; int i; - num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_num_xdp_qpls(priv), - gve_is_qpl(priv)); + num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_is_qpl(priv)); num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv)); for (i = 0; i < num_tx_qpls; i++) { - err = gve_register_qpl(priv, i); + err = gve_register_qpl(priv, gve_tx_get_qpl(priv, i)); if (err) return err; } - /* there might be a gap between the tx and rx qpl ids */ - start_id = gve_rx_start_qpl_id(&priv->tx_cfg); for (i = 0; i < num_rx_qpls; i++) { - err = gve_register_qpl(priv, start_id + i); + err = gve_register_qpl(priv, gve_rx_get_qpl(priv, i)); if (err) return err; } @@ -681,43 +819,24 @@ static int gve_register_qpls(struct gve_priv *priv) return 0; } -static int gve_unregister_xdp_qpls(struct gve_priv *priv) -{ - int start_id; - int err; - int i; - - start_id = gve_xdp_tx_start_queue_id(priv); - for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) { - err = gve_unregister_qpl(priv, i); - /* This failure will trigger a reset - no need to clean */ - if (err) - return err; - } - return 0; -} - static int gve_unregister_qpls(struct gve_priv *priv) { int num_tx_qpls, num_rx_qpls; - int start_id; int err; int i; - num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_num_xdp_qpls(priv), - gve_is_qpl(priv)); + num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_is_qpl(priv)); num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv)); for (i = 0; i < num_tx_qpls; i++) { - err = gve_unregister_qpl(priv, i); + err = gve_unregister_qpl(priv, gve_tx_get_qpl(priv, i)); /* This failure will trigger a reset - no need to clean */ if (err) return err; } - start_id = gve_rx_start_qpl_id(&priv->tx_cfg); for (i = 0; i < num_rx_qpls; i++) { - err = gve_unregister_qpl(priv, start_id + i); + err = gve_unregister_qpl(priv, gve_rx_get_qpl(priv, i)); /* This failure will trigger a reset - no need to clean */ if (err) return err; @@ -725,27 +844,6 @@ static int gve_unregister_qpls(struct gve_priv *priv) return 0; } -static int gve_create_xdp_rings(struct gve_priv *priv) -{ - int err; - - err = gve_adminq_create_tx_queues(priv, - gve_xdp_tx_start_queue_id(priv), - priv->num_xdp_queues); - if (err) { - netif_err(priv, drv, priv->dev, "failed to create %d XDP tx queues\n", - priv->num_xdp_queues); - /* This failure will trigger a reset - no need to clean - * up - */ - return err; - } - netif_dbg(priv, drv, priv->dev, "created %d XDP tx queues\n", - priv->num_xdp_queues); - - return 0; -} - static int gve_create_rings(struct gve_priv *priv) { int num_tx_queues = gve_num_tx_queues(priv); @@ -801,7 +899,7 @@ static void init_xdp_sync_stats(struct gve_priv *priv) int i; /* Init stats */ - for (i = start_id; i < start_id + priv->num_xdp_queues; i++) { + for (i = start_id; i < start_id + priv->tx_cfg.num_xdp_queues; i++) { int ntfy_idx = gve_tx_idx_to_ntfy(priv, i); u64_stats_init(&priv->tx[i].statss); @@ -828,22 +926,19 @@ static void gve_tx_get_curr_alloc_cfg(struct gve_priv *priv, { cfg->qcfg = &priv->tx_cfg; cfg->raw_addressing = !gve_is_qpl(priv); - cfg->qpls = priv->qpls; - cfg->qpl_cfg = &priv->qpl_cfg; cfg->ring_size = priv->tx_desc_cnt; - cfg->start_idx = 0; - cfg->num_rings = gve_num_tx_queues(priv); + cfg->num_xdp_rings = cfg->qcfg->num_xdp_queues; cfg->tx = priv->tx; } -static void gve_tx_stop_rings(struct gve_priv *priv, int start_id, int num_rings) +static void gve_tx_stop_rings(struct gve_priv *priv, int num_rings) { int i; if (!priv->tx) return; - for (i = start_id; i < start_id + num_rings; i++) { + for (i = 0; i < num_rings; i++) { if (gve_is_gqi(priv)) gve_tx_stop_ring_gqi(priv, i); else @@ -851,12 +946,11 @@ static void gve_tx_stop_rings(struct gve_priv *priv, int start_id, int num_rings } } -static void gve_tx_start_rings(struct gve_priv *priv, int start_id, - int num_rings) +static void gve_tx_start_rings(struct gve_priv *priv, int num_rings) { int i; - for (i = start_id; i < start_id + num_rings; i++) { + for (i = 0; i < num_rings; i++) { if (gve_is_gqi(priv)) gve_tx_start_ring_gqi(priv, i); else @@ -864,31 +958,9 @@ static void gve_tx_start_rings(struct gve_priv *priv, int start_id, } } -static int gve_alloc_xdp_rings(struct gve_priv *priv) -{ - struct gve_tx_alloc_rings_cfg cfg = {0}; - int err = 0; - - if (!priv->num_xdp_queues) - return 0; - - gve_tx_get_curr_alloc_cfg(priv, &cfg); - cfg.start_idx = gve_xdp_tx_start_queue_id(priv); - cfg.num_rings = priv->num_xdp_queues; - - err = gve_tx_alloc_rings_gqi(priv, &cfg); - if (err) - return err; - - gve_tx_start_rings(priv, cfg.start_idx, cfg.num_rings); - init_xdp_sync_stats(priv); - - return 0; -} - -static int gve_alloc_rings(struct gve_priv *priv, - struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, - struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) +static int gve_queues_mem_alloc(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, + struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) { int err; @@ -916,26 +988,6 @@ free_tx: return err; } -static int gve_destroy_xdp_rings(struct gve_priv *priv) -{ - int start_id; - int err; - - start_id = gve_xdp_tx_start_queue_id(priv); - err = gve_adminq_destroy_tx_queues(priv, - start_id, - priv->num_xdp_queues); - if (err) { - netif_err(priv, drv, priv->dev, - "failed to destroy XDP queues\n"); - /* This failure will trigger a reset - no need to clean up */ - return err; - } - netif_dbg(priv, drv, priv->dev, "destroyed XDP queues\n"); - - return 0; -} - static int gve_destroy_rings(struct gve_priv *priv) { int num_tx_queues = gve_num_tx_queues(priv); @@ -960,23 +1012,9 @@ static int gve_destroy_rings(struct gve_priv *priv) return 0; } -static void gve_free_xdp_rings(struct gve_priv *priv) -{ - struct gve_tx_alloc_rings_cfg cfg = {0}; - - gve_tx_get_curr_alloc_cfg(priv, &cfg); - cfg.start_idx = gve_xdp_tx_start_queue_id(priv); - cfg.num_rings = priv->num_xdp_queues; - - if (priv->tx) { - gve_tx_stop_rings(priv, cfg.start_idx, cfg.num_rings); - gve_tx_free_rings_gqi(priv, &cfg); - } -} - -static void gve_free_rings(struct gve_priv *priv, - struct gve_tx_alloc_rings_cfg *tx_cfg, - struct gve_rx_alloc_rings_cfg *rx_cfg) +static void gve_queues_mem_free(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *tx_cfg, + struct gve_rx_alloc_rings_cfg *rx_cfg) { if (gve_is_gqi(priv)) { gve_tx_free_rings_gqi(priv, tx_cfg); @@ -1005,35 +1043,41 @@ int gve_alloc_page(struct gve_priv *priv, struct device *dev, return 0; } -static int gve_alloc_queue_page_list(struct gve_priv *priv, - struct gve_queue_page_list *qpl, - u32 id, int pages) +struct gve_queue_page_list *gve_alloc_queue_page_list(struct gve_priv *priv, + u32 id, int pages) { + struct gve_queue_page_list *qpl; int err; int i; + qpl = kvzalloc(sizeof(*qpl), GFP_KERNEL); + if (!qpl) + return NULL; + qpl->id = id; qpl->num_entries = 0; qpl->pages = kvcalloc(pages, sizeof(*qpl->pages), GFP_KERNEL); - /* caller handles clean up */ if (!qpl->pages) - return -ENOMEM; + goto abort; + qpl->page_buses = kvcalloc(pages, sizeof(*qpl->page_buses), GFP_KERNEL); - /* caller handles clean up */ if (!qpl->page_buses) - return -ENOMEM; + goto abort; for (i = 0; i < pages; i++) { err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i], &qpl->page_buses[i], gve_qpl_dma_dir(priv, id), GFP_KERNEL); - /* caller handles clean up */ if (err) - return -ENOMEM; + goto abort; qpl->num_entries++; } - return 0; + return qpl; + +abort: + gve_free_queue_page_list(priv, qpl, id); + return NULL; } void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma, @@ -1045,14 +1089,16 @@ void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma, put_page(page); } -static void gve_free_queue_page_list(struct gve_priv *priv, - struct gve_queue_page_list *qpl, - int id) +void gve_free_queue_page_list(struct gve_priv *priv, + struct gve_queue_page_list *qpl, + u32 id) { int i; - if (!qpl->pages) + if (!qpl) return; + if (!qpl->pages) + goto free_qpl; if (!qpl->page_buses) goto free_pages; @@ -1065,120 +1111,8 @@ static void gve_free_queue_page_list(struct gve_priv *priv, free_pages: kvfree(qpl->pages); qpl->pages = NULL; -} - -static void gve_free_n_qpls(struct gve_priv *priv, - struct gve_queue_page_list *qpls, - int start_id, - int num_qpls) -{ - int i; - - for (i = start_id; i < start_id + num_qpls; i++) - gve_free_queue_page_list(priv, &qpls[i], i); -} - -static int gve_alloc_n_qpls(struct gve_priv *priv, - struct gve_queue_page_list *qpls, - int page_count, - int start_id, - int num_qpls) -{ - int err; - int i; - - for (i = start_id; i < start_id + num_qpls; i++) { - err = gve_alloc_queue_page_list(priv, &qpls[i], i, page_count); - if (err) - goto free_qpls; - } - - return 0; - -free_qpls: - /* Must include the failing QPL too for gve_alloc_queue_page_list fails - * without cleaning up. - */ - gve_free_n_qpls(priv, qpls, start_id, i - start_id + 1); - return err; -} - -static int gve_alloc_qpls(struct gve_priv *priv, - struct gve_qpls_alloc_cfg *cfg) -{ - int max_queues = cfg->tx_cfg->max_queues + cfg->rx_cfg->max_queues; - int rx_start_id, tx_num_qpls, rx_num_qpls; - struct gve_queue_page_list *qpls; - int page_count; - int err; - - if (cfg->raw_addressing) - return 0; - - qpls = kvcalloc(max_queues, sizeof(*qpls), GFP_KERNEL); - if (!qpls) - return -ENOMEM; - - cfg->qpl_cfg->qpl_map_size = BITS_TO_LONGS(max_queues) * - sizeof(unsigned long) * BITS_PER_BYTE; - cfg->qpl_cfg->qpl_id_map = kvcalloc(BITS_TO_LONGS(max_queues), - sizeof(unsigned long), GFP_KERNEL); - if (!cfg->qpl_cfg->qpl_id_map) { - err = -ENOMEM; - goto free_qpl_array; - } - - /* Allocate TX QPLs */ - page_count = priv->tx_pages_per_qpl; - tx_num_qpls = gve_num_tx_qpls(cfg->tx_cfg, cfg->num_xdp_queues, - gve_is_qpl(priv)); - err = gve_alloc_n_qpls(priv, qpls, page_count, 0, tx_num_qpls); - if (err) - goto free_qpl_map; - - /* Allocate RX QPLs */ - rx_start_id = gve_rx_start_qpl_id(cfg->tx_cfg); - /* For GQI_QPL number of pages allocated have 1:1 relationship with - * number of descriptors. For DQO, number of pages required are - * more than descriptors (because of out of order completions). - */ - page_count = cfg->is_gqi ? priv->rx_data_slot_cnt : priv->rx_pages_per_qpl; - rx_num_qpls = gve_num_rx_qpls(cfg->rx_cfg, gve_is_qpl(priv)); - err = gve_alloc_n_qpls(priv, qpls, page_count, rx_start_id, rx_num_qpls); - if (err) - goto free_tx_qpls; - - cfg->qpls = qpls; - return 0; - -free_tx_qpls: - gve_free_n_qpls(priv, qpls, 0, tx_num_qpls); -free_qpl_map: - kvfree(cfg->qpl_cfg->qpl_id_map); - cfg->qpl_cfg->qpl_id_map = NULL; -free_qpl_array: - kvfree(qpls); - return err; -} - -static void gve_free_qpls(struct gve_priv *priv, - struct gve_qpls_alloc_cfg *cfg) -{ - int max_queues = cfg->tx_cfg->max_queues + cfg->rx_cfg->max_queues; - struct gve_queue_page_list *qpls = cfg->qpls; - int i; - - if (!qpls) - return; - - kvfree(cfg->qpl_cfg->qpl_id_map); - cfg->qpl_cfg->qpl_id_map = NULL; - - for (i = 0; i < max_queues; i++) - gve_free_queue_page_list(priv, &qpls[i], i); - - kvfree(qpls); - cfg->qpls = NULL; +free_qpl: + kvfree(qpl); } /* Use this to schedule a reset when the device is capable of continuing @@ -1204,7 +1138,7 @@ static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev) int i, j; u32 tx_qid; - if (!priv->num_xdp_queues) + if (!priv->tx_cfg.num_xdp_queues) return 0; for (i = 0; i < priv->rx_cfg.num_queues; i++) { @@ -1215,8 +1149,14 @@ static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev) napi->napi_id); if (err) goto err; - err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq, - MEM_TYPE_PAGE_SHARED, NULL); + if (gve_is_qpl(priv)) + err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq, + MEM_TYPE_PAGE_SHARED, + NULL); + else + err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq, + MEM_TYPE_PAGE_POOL, + rx->dqo.page_pool); if (err) goto err; rx->xsk_pool = xsk_get_pool_from_qid(dev, i); @@ -1234,7 +1174,7 @@ static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev) } } - for (i = 0; i < priv->num_xdp_queues; i++) { + for (i = 0; i < priv->tx_cfg.num_xdp_queues; i++) { tx_qid = gve_xdp_tx_queue_id(priv, i); priv->tx[tx_qid].xsk_pool = xsk_get_pool_from_qid(dev, i); } @@ -1255,7 +1195,7 @@ static void gve_unreg_xdp_info(struct gve_priv *priv) { int i, tx_qid; - if (!priv->num_xdp_queues) + if (!priv->tx_cfg.num_xdp_queues || !priv->rx || !priv->tx) return; for (i = 0; i < priv->rx_cfg.num_queues; i++) { @@ -1268,7 +1208,7 @@ static void gve_unreg_xdp_info(struct gve_priv *priv) } } - for (i = 0; i < priv->num_xdp_queues; i++) { + for (i = 0; i < priv->tx_cfg.num_xdp_queues; i++) { tx_qid = gve_xdp_tx_queue_id(priv, i); priv->tx[tx_qid].xsk_pool = NULL; } @@ -1282,118 +1222,69 @@ static void gve_drain_page_cache(struct gve_priv *priv) page_frag_cache_drain(&priv->rx[i].page_cache); } -static void gve_qpls_get_curr_alloc_cfg(struct gve_priv *priv, - struct gve_qpls_alloc_cfg *cfg) -{ - cfg->raw_addressing = !gve_is_qpl(priv); - cfg->is_gqi = gve_is_gqi(priv); - cfg->num_xdp_queues = priv->num_xdp_queues; - cfg->qpl_cfg = &priv->qpl_cfg; - cfg->tx_cfg = &priv->tx_cfg; - cfg->rx_cfg = &priv->rx_cfg; - cfg->qpls = priv->qpls; -} - static void gve_rx_get_curr_alloc_cfg(struct gve_priv *priv, struct gve_rx_alloc_rings_cfg *cfg) { - cfg->qcfg = &priv->rx_cfg; + cfg->qcfg_rx = &priv->rx_cfg; cfg->qcfg_tx = &priv->tx_cfg; cfg->raw_addressing = !gve_is_qpl(priv); cfg->enable_header_split = priv->header_split_enabled; - cfg->qpls = priv->qpls; - cfg->qpl_cfg = &priv->qpl_cfg; cfg->ring_size = priv->rx_desc_cnt; - cfg->packet_buffer_size = gve_is_gqi(priv) ? - GVE_DEFAULT_RX_BUFFER_SIZE : - priv->data_buffer_size_dqo; + cfg->packet_buffer_size = priv->rx_cfg.packet_buffer_size; cfg->rx = priv->rx; + cfg->xdp = !!cfg->qcfg_tx->num_xdp_queues; } -static void gve_get_curr_alloc_cfgs(struct gve_priv *priv, - struct gve_qpls_alloc_cfg *qpls_alloc_cfg, - struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, - struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) +void gve_get_curr_alloc_cfgs(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, + struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) { - gve_qpls_get_curr_alloc_cfg(priv, qpls_alloc_cfg); gve_tx_get_curr_alloc_cfg(priv, tx_alloc_cfg); gve_rx_get_curr_alloc_cfg(priv, rx_alloc_cfg); } -static void gve_rx_start_rings(struct gve_priv *priv, int num_rings) +static void gve_rx_start_ring(struct gve_priv *priv, int i) { - int i; - - for (i = 0; i < num_rings; i++) { - if (gve_is_gqi(priv)) - gve_rx_start_ring_gqi(priv, i); - else - gve_rx_start_ring_dqo(priv, i); - } + if (gve_is_gqi(priv)) + gve_rx_start_ring_gqi(priv, i); + else + gve_rx_start_ring_dqo(priv, i); } -static void gve_rx_stop_rings(struct gve_priv *priv, int num_rings) +static void gve_rx_start_rings(struct gve_priv *priv, int num_rings) { int i; - if (!priv->rx) - return; - - for (i = 0; i < num_rings; i++) { - if (gve_is_gqi(priv)) - gve_rx_stop_ring_gqi(priv, i); - else - gve_rx_stop_ring_dqo(priv, i); - } + for (i = 0; i < num_rings; i++) + gve_rx_start_ring(priv, i); } -static void gve_queues_mem_free(struct gve_priv *priv, - struct gve_qpls_alloc_cfg *qpls_alloc_cfg, - struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, - struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) +static void gve_rx_stop_ring(struct gve_priv *priv, int i) { - gve_free_rings(priv, tx_alloc_cfg, rx_alloc_cfg); - gve_free_qpls(priv, qpls_alloc_cfg); + if (gve_is_gqi(priv)) + gve_rx_stop_ring_gqi(priv, i); + else + gve_rx_stop_ring_dqo(priv, i); } -static int gve_queues_mem_alloc(struct gve_priv *priv, - struct gve_qpls_alloc_cfg *qpls_alloc_cfg, - struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, - struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) +static void gve_rx_stop_rings(struct gve_priv *priv, int num_rings) { - int err; - - err = gve_alloc_qpls(priv, qpls_alloc_cfg); - if (err) { - netif_err(priv, drv, priv->dev, "Failed to alloc QPLs\n"); - return err; - } - tx_alloc_cfg->qpls = qpls_alloc_cfg->qpls; - rx_alloc_cfg->qpls = qpls_alloc_cfg->qpls; - err = gve_alloc_rings(priv, tx_alloc_cfg, rx_alloc_cfg); - if (err) { - netif_err(priv, drv, priv->dev, "Failed to alloc rings\n"); - goto free_qpls; - } + int i; - return 0; + if (!priv->rx) + return; -free_qpls: - gve_free_qpls(priv, qpls_alloc_cfg); - return err; + for (i = 0; i < num_rings; i++) + gve_rx_stop_ring(priv, i); } static void gve_queues_mem_remove(struct gve_priv *priv) { struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; - struct gve_qpls_alloc_cfg qpls_alloc_cfg = {0}; - gve_get_curr_alloc_cfgs(priv, &qpls_alloc_cfg, - &tx_alloc_cfg, &rx_alloc_cfg); - gve_queues_mem_free(priv, &qpls_alloc_cfg, - &tx_alloc_cfg, &rx_alloc_cfg); - priv->qpls = NULL; + gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); + gve_queues_mem_free(priv, &tx_alloc_cfg, &rx_alloc_cfg); priv->tx = NULL; priv->rx = NULL; } @@ -1402,7 +1293,6 @@ static void gve_queues_mem_remove(struct gve_priv *priv) * No memory is allocated. Passed-in memory is freed on errors. */ static int gve_queues_start(struct gve_priv *priv, - struct gve_qpls_alloc_cfg *qpls_alloc_cfg, struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) { @@ -1410,24 +1300,18 @@ static int gve_queues_start(struct gve_priv *priv, int err; /* Record new resources into priv */ - priv->qpls = qpls_alloc_cfg->qpls; priv->tx = tx_alloc_cfg->tx; priv->rx = rx_alloc_cfg->rx; /* Record new configs into priv */ - priv->qpl_cfg = *qpls_alloc_cfg->qpl_cfg; priv->tx_cfg = *tx_alloc_cfg->qcfg; - priv->rx_cfg = *rx_alloc_cfg->qcfg; + priv->tx_cfg.num_xdp_queues = tx_alloc_cfg->num_xdp_rings; + priv->rx_cfg = *rx_alloc_cfg->qcfg_rx; priv->tx_desc_cnt = tx_alloc_cfg->ring_size; priv->rx_desc_cnt = rx_alloc_cfg->ring_size; - if (priv->xdp_prog) - priv->num_xdp_queues = priv->rx_cfg.num_queues; - else - priv->num_xdp_queues = 0; - - gve_tx_start_rings(priv, 0, tx_alloc_cfg->num_rings); - gve_rx_start_rings(priv, rx_alloc_cfg->qcfg->num_queues); + gve_tx_start_rings(priv, gve_num_tx_queues(priv)); + gve_rx_start_rings(priv, rx_alloc_cfg->qcfg_rx->num_queues); gve_init_sync_stats(priv); err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues); @@ -1441,12 +1325,18 @@ static int gve_queues_start(struct gve_priv *priv, if (err) goto stop_and_free_rings; + if (rx_alloc_cfg->reset_rss) { + err = gve_init_rss_config(priv, priv->rx_cfg.num_queues); + if (err) + goto reset; + } + err = gve_register_qpls(priv); if (err) goto reset; priv->header_split_enabled = rx_alloc_cfg->enable_header_split; - priv->data_buffer_size_dqo = rx_alloc_cfg->packet_buffer_size; + priv->rx_cfg.packet_buffer_size = rx_alloc_cfg->packet_buffer_size; err = gve_create_rings(priv); if (err) @@ -1473,7 +1363,7 @@ reset: /* return the original error */ return err; stop_and_free_rings: - gve_tx_stop_rings(priv, 0, gve_num_tx_queues(priv)); + gve_tx_stop_rings(priv, gve_num_tx_queues(priv)); gve_rx_stop_rings(priv, priv->rx_cfg.num_queues); gve_queues_mem_remove(priv); return err; @@ -1483,23 +1373,19 @@ static int gve_open(struct net_device *dev) { struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; - struct gve_qpls_alloc_cfg qpls_alloc_cfg = {0}; struct gve_priv *priv = netdev_priv(dev); int err; - gve_get_curr_alloc_cfgs(priv, &qpls_alloc_cfg, - &tx_alloc_cfg, &rx_alloc_cfg); + gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); - err = gve_queues_mem_alloc(priv, &qpls_alloc_cfg, - &tx_alloc_cfg, &rx_alloc_cfg); + err = gve_queues_mem_alloc(priv, &tx_alloc_cfg, &rx_alloc_cfg); if (err) return err; /* No need to free on error: ownership of resources is lost after * calling gve_queues_start. */ - err = gve_queues_start(priv, &qpls_alloc_cfg, - &tx_alloc_cfg, &rx_alloc_cfg); + err = gve_queues_start(priv, &tx_alloc_cfg, &rx_alloc_cfg); if (err) return err; @@ -1522,11 +1408,11 @@ static int gve_queues_stop(struct gve_priv *priv) goto err; gve_clear_device_rings_ok(priv); } - del_timer_sync(&priv->stats_report_timer); + timer_delete_sync(&priv->stats_report_timer); gve_unreg_xdp_info(priv); - gve_tx_stop_rings(priv, 0, gve_num_tx_queues(priv)); + gve_tx_stop_rings(priv, gve_num_tx_queues(priv)); gve_rx_stop_rings(priv, priv->rx_cfg.num_queues); priv->interface_down_cnt++; @@ -1556,69 +1442,6 @@ static int gve_close(struct net_device *dev) return 0; } -static int gve_remove_xdp_queues(struct gve_priv *priv) -{ - int qpl_start_id; - int err; - - qpl_start_id = gve_xdp_tx_start_queue_id(priv); - - err = gve_destroy_xdp_rings(priv); - if (err) - return err; - - err = gve_unregister_xdp_qpls(priv); - if (err) - return err; - - gve_unreg_xdp_info(priv); - gve_free_xdp_rings(priv); - - gve_free_n_qpls(priv, priv->qpls, qpl_start_id, gve_num_xdp_qpls(priv)); - priv->num_xdp_queues = 0; - return 0; -} - -static int gve_add_xdp_queues(struct gve_priv *priv) -{ - int start_id; - int err; - - priv->num_xdp_queues = priv->rx_cfg.num_queues; - - start_id = gve_xdp_tx_start_queue_id(priv); - err = gve_alloc_n_qpls(priv, priv->qpls, priv->tx_pages_per_qpl, - start_id, gve_num_xdp_qpls(priv)); - if (err) - goto err; - - err = gve_alloc_xdp_rings(priv); - if (err) - goto free_xdp_qpls; - - err = gve_reg_xdp_info(priv, priv->dev); - if (err) - goto free_xdp_rings; - - err = gve_register_xdp_qpls(priv); - if (err) - goto free_xdp_rings; - - err = gve_create_xdp_rings(priv); - if (err) - goto free_xdp_rings; - - return 0; - -free_xdp_rings: - gve_free_xdp_rings(priv); -free_xdp_qpls: - gve_free_n_qpls(priv, priv->qpls, start_id, gve_num_xdp_qpls(priv)); -err: - priv->num_xdp_queues = 0; - return err; -} - static void gve_handle_link_status(struct gve_priv *priv, bool link_status) { if (!gve_get_napi_enabled(priv)) @@ -1636,6 +1459,19 @@ static void gve_handle_link_status(struct gve_priv *priv, bool link_status) } } +static int gve_configure_rings_xdp(struct gve_priv *priv, + u16 num_xdp_rings) +{ + struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; + struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; + + gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); + tx_alloc_cfg.num_xdp_rings = num_xdp_rings; + + rx_alloc_cfg.xdp = !!num_xdp_rings; + return gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); +} + static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog, struct netlink_ext_ack *extack) { @@ -1644,33 +1480,30 @@ static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog, u32 status; old_prog = READ_ONCE(priv->xdp_prog); - if (!netif_carrier_ok(priv->dev)) { + if (!netif_running(priv->dev)) { WRITE_ONCE(priv->xdp_prog, prog); if (old_prog) bpf_prog_put(old_prog); + + /* Update priv XDP queue configuration */ + priv->tx_cfg.num_xdp_queues = priv->xdp_prog ? + priv->rx_cfg.num_queues : 0; return 0; } - gve_turndown(priv); - if (!old_prog && prog) { - // Allocate XDP TX queues if an XDP program is - // being installed - err = gve_add_xdp_queues(priv); - if (err) - goto out; - } else if (old_prog && !prog) { - // Remove XDP TX queues if an XDP program is - // being uninstalled - err = gve_remove_xdp_queues(priv); - if (err) - goto out; - } + if (!old_prog && prog) + err = gve_configure_rings_xdp(priv, priv->rx_cfg.num_queues); + else if (old_prog && !prog) + err = gve_configure_rings_xdp(priv, 0); + + if (err) + goto out; + WRITE_ONCE(priv->xdp_prog, prog); if (old_prog) bpf_prog_put(old_prog); out: - gve_turnup(priv); status = ioread32be(&priv->reg_bar0->device_status); gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); return err; @@ -1701,8 +1534,8 @@ static int gve_xsk_pool_enable(struct net_device *dev, if (err) return err; - /* If XDP prog is not installed, return */ - if (!priv->xdp_prog) + /* If XDP prog is not installed or interface is down, return. */ + if (!priv->xdp_prog || !netif_running(dev)) return 0; rx = &priv->rx[qid]; @@ -1747,21 +1580,16 @@ static int gve_xsk_pool_disable(struct net_device *dev, if (qid >= priv->rx_cfg.num_queues) return -EINVAL; - /* If XDP prog is not installed, unmap DMA and return */ - if (!priv->xdp_prog) - goto done; - - tx_qid = gve_xdp_tx_queue_id(priv, qid); - if (!netif_running(dev)) { - priv->rx[qid].xsk_pool = NULL; - xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq); - priv->tx[tx_qid].xsk_pool = NULL; + /* If XDP prog is not installed or interface is down, unmap DMA and + * return. + */ + if (!priv->xdp_prog || !netif_running(dev)) goto done; - } napi_rx = &priv->ntfy_blocks[priv->rx[qid].ntfy_id].napi; napi_disable(napi_rx); /* make sure current rx poll is done */ + tx_qid = gve_xdp_tx_queue_id(priv, qid); napi_tx = &priv->ntfy_blocks[priv->tx[tx_qid].ntfy_id].napi; napi_disable(napi_tx); /* make sure current tx poll is done */ @@ -1787,24 +1615,20 @@ done: static int gve_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) { struct gve_priv *priv = netdev_priv(dev); - int tx_queue_id = gve_xdp_tx_queue_id(priv, queue_id); + struct napi_struct *napi; + + if (!gve_get_napi_enabled(priv)) + return -ENETDOWN; if (queue_id >= priv->rx_cfg.num_queues || !priv->xdp_prog) return -EINVAL; - if (flags & XDP_WAKEUP_TX) { - struct gve_tx_ring *tx = &priv->tx[tx_queue_id]; - struct napi_struct *napi = - &priv->ntfy_blocks[tx->ntfy_id].napi; - - if (!napi_if_scheduled_mark_missed(napi)) { - /* Call local_bh_enable to trigger SoftIRQ processing */ - local_bh_disable(); - napi_schedule(napi); - local_bh_enable(); - } - - tx->xdp_xsk_wakeup++; + napi = &priv->ntfy_blocks[gve_rx_idx_to_ntfy(priv, queue_id)].napi; + if (!napi_if_scheduled_mark_missed(napi)) { + /* Call local_bh_enable to trigger SoftIRQ processing */ + local_bh_disable(); + napi_schedule(napi); + local_bh_enable(); } return 0; @@ -1813,6 +1637,7 @@ static int gve_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) static int verify_xdp_configuration(struct net_device *dev) { struct gve_priv *priv = netdev_priv(dev); + u16 max_xdp_mtu; if (dev->features & NETIF_F_LRO) { netdev_warn(dev, "XDP is not supported when LRO is on.\n"); @@ -1825,7 +1650,11 @@ static int verify_xdp_configuration(struct net_device *dev) return -EOPNOTSUPP; } - if (dev->mtu > GVE_DEFAULT_RX_BUFFER_SIZE - sizeof(struct ethhdr) - GVE_RX_PAD) { + max_xdp_mtu = priv->rx_cfg.packet_buffer_size - sizeof(struct ethhdr); + if (priv->queue_format == GVE_GQI_QPL_FORMAT) + max_xdp_mtu -= GVE_RX_PAD; + + if (dev->mtu > max_xdp_mtu) { netdev_warn(dev, "XDP is not supported for mtu %d.\n", dev->mtu); return -EOPNOTSUPP; @@ -1863,16 +1692,42 @@ static int gve_xdp(struct net_device *dev, struct netdev_bpf *xdp) } } -static int gve_adjust_config(struct gve_priv *priv, - struct gve_qpls_alloc_cfg *qpls_alloc_cfg, - struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, - struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) +int gve_init_rss_config(struct gve_priv *priv, u16 num_queues) +{ + struct gve_rss_config *rss_config = &priv->rss_config; + struct ethtool_rxfh_param rxfh = {0}; + u16 i; + + if (!priv->cache_rss_config) + return 0; + + for (i = 0; i < priv->rss_lut_size; i++) + rss_config->hash_lut[i] = + ethtool_rxfh_indir_default(i, num_queues); + + netdev_rss_key_fill(rss_config->hash_key, priv->rss_key_size); + + rxfh.hfunc = ETH_RSS_HASH_TOP; + + return gve_adminq_configure_rss(priv, &rxfh); +} + +int gve_flow_rules_reset(struct gve_priv *priv) +{ + if (!priv->max_flow_rules) + return 0; + + return gve_adminq_reset_flow_rules(priv); +} + +int gve_adjust_config(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, + struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) { int err; /* Allocate resources for the new confiugration */ - err = gve_queues_mem_alloc(priv, qpls_alloc_cfg, - tx_alloc_cfg, rx_alloc_cfg); + err = gve_queues_mem_alloc(priv, tx_alloc_cfg, rx_alloc_cfg); if (err) { netif_err(priv, drv, priv->dev, "Adjust config failed to alloc new queues"); @@ -1884,14 +1739,12 @@ static int gve_adjust_config(struct gve_priv *priv, if (err) { netif_err(priv, drv, priv->dev, "Adjust config failed to close old queues"); - gve_queues_mem_free(priv, qpls_alloc_cfg, - tx_alloc_cfg, rx_alloc_cfg); + gve_queues_mem_free(priv, tx_alloc_cfg, rx_alloc_cfg); return err; } /* Bring the device back up again with the new resources. */ - err = gve_queues_start(priv, qpls_alloc_cfg, - tx_alloc_cfg, rx_alloc_cfg); + err = gve_queues_start(priv, tx_alloc_cfg, rx_alloc_cfg); if (err) { netif_err(priv, drv, priv->dev, "Adjust config failed to start new queues, !!! DISABLING ALL QUEUES !!!\n"); @@ -1906,40 +1759,32 @@ static int gve_adjust_config(struct gve_priv *priv, } int gve_adjust_queues(struct gve_priv *priv, - struct gve_queue_config new_rx_config, - struct gve_queue_config new_tx_config) + struct gve_rx_queue_config new_rx_config, + struct gve_tx_queue_config new_tx_config, + bool reset_rss) { struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; - struct gve_qpls_alloc_cfg qpls_alloc_cfg = {0}; - struct gve_qpl_config new_qpl_cfg; int err; - gve_get_curr_alloc_cfgs(priv, &qpls_alloc_cfg, - &tx_alloc_cfg, &rx_alloc_cfg); - - /* qpl_cfg is not read-only, it contains a map that gets updated as - * rings are allocated, which is why we cannot use the yet unreleased - * one in priv. - */ - qpls_alloc_cfg.qpl_cfg = &new_qpl_cfg; - tx_alloc_cfg.qpl_cfg = &new_qpl_cfg; - rx_alloc_cfg.qpl_cfg = &new_qpl_cfg; + gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); /* Relay the new config from ethtool */ - qpls_alloc_cfg.tx_cfg = &new_tx_config; tx_alloc_cfg.qcfg = &new_tx_config; rx_alloc_cfg.qcfg_tx = &new_tx_config; - qpls_alloc_cfg.rx_cfg = &new_rx_config; - rx_alloc_cfg.qcfg = &new_rx_config; - tx_alloc_cfg.num_rings = new_tx_config.num_queues; + rx_alloc_cfg.qcfg_rx = &new_rx_config; + rx_alloc_cfg.reset_rss = reset_rss; - if (netif_carrier_ok(priv->dev)) { - err = gve_adjust_config(priv, &qpls_alloc_cfg, - &tx_alloc_cfg, &rx_alloc_cfg); + if (netif_running(priv->dev)) { + err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); return err; } /* Set the config for the next up. */ + if (reset_rss) { + err = gve_init_rss_config(priv, new_rx_config.num_queues); + if (err) + return err; + } priv->tx_cfg = new_tx_config; priv->rx_cfg = new_rx_config; @@ -1961,20 +1806,37 @@ static void gve_turndown(struct gve_priv *priv) int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; - napi_disable(&block->napi); + if (!gve_tx_was_added_to_block(priv, idx)) + continue; + + if (idx < priv->tx_cfg.num_queues) + netif_queue_set_napi(priv->dev, idx, + NETDEV_QUEUE_TYPE_TX, NULL); + + napi_disable_locked(&block->napi); } for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; - napi_disable(&block->napi); + if (!gve_rx_was_added_to_block(priv, idx)) + continue; + + netif_queue_set_napi(priv->dev, idx, NETDEV_QUEUE_TYPE_RX, + NULL); + napi_disable_locked(&block->napi); } /* Stop tx queues */ netif_tx_disable(priv->dev); + xdp_features_clear_redirect_target_locked(priv->dev); + gve_clear_napi_enabled(priv); gve_clear_report_stats(priv); + + /* Make sure that all traffic is finished processing. */ + synchronize_net(); } static void gve_turnup(struct gve_priv *priv) @@ -1989,30 +1851,71 @@ static void gve_turnup(struct gve_priv *priv) int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; - napi_enable(&block->napi); + if (!gve_tx_was_added_to_block(priv, idx)) + continue; + + napi_enable_locked(&block->napi); + + if (idx < priv->tx_cfg.num_queues) + netif_queue_set_napi(priv->dev, idx, + NETDEV_QUEUE_TYPE_TX, + &block->napi); + if (gve_is_gqi(priv)) { iowrite32be(0, gve_irq_doorbell(priv, block)); } else { gve_set_itr_coalesce_usecs_dqo(priv, block, priv->tx_coalesce_usecs); } + + /* Any descs written by the NIC before this barrier will be + * handled by the one-off napi schedule below. Whereas any + * descs after the barrier will generate interrupts. + */ + mb(); + napi_schedule(&block->napi); } for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; - napi_enable(&block->napi); + if (!gve_rx_was_added_to_block(priv, idx)) + continue; + + napi_enable_locked(&block->napi); + netif_queue_set_napi(priv->dev, idx, NETDEV_QUEUE_TYPE_RX, + &block->napi); + if (gve_is_gqi(priv)) { iowrite32be(0, gve_irq_doorbell(priv, block)); } else { gve_set_itr_coalesce_usecs_dqo(priv, block, priv->rx_coalesce_usecs); } + + /* Any descs written by the NIC before this barrier will be + * handled by the one-off napi schedule below. Whereas any + * descs after the barrier will generate interrupts. + */ + mb(); + napi_schedule(&block->napi); } + if (priv->tx_cfg.num_xdp_queues && gve_supports_xdp_xmit(priv)) + xdp_features_set_redirect_target_locked(priv->dev, false); + gve_set_napi_enabled(priv); } +static void gve_turnup_and_check_status(struct gve_priv *priv) +{ + u32 status; + + gve_turnup(priv); + status = ioread32be(&priv->reg_bar0->device_status); + gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); +} + static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct gve_notify_block *block; @@ -2077,7 +1980,6 @@ int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split) { struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; - struct gve_qpls_alloc_cfg qpls_alloc_cfg = {0}; bool enable_hdr_split; int err = 0; @@ -2097,15 +1999,13 @@ int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split) if (enable_hdr_split == priv->header_split_enabled) return 0; - gve_get_curr_alloc_cfgs(priv, &qpls_alloc_cfg, - &tx_alloc_cfg, &rx_alloc_cfg); + gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); rx_alloc_cfg.enable_header_split = enable_hdr_split; rx_alloc_cfg.packet_buffer_size = gve_get_pkt_buf_size(priv, enable_hdr_split); if (netif_running(priv->dev)) - err = gve_adjust_config(priv, &qpls_alloc_cfg, - &tx_alloc_cfg, &rx_alloc_cfg); + err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); return err; } @@ -2115,35 +2015,30 @@ static int gve_set_features(struct net_device *netdev, const netdev_features_t orig_features = netdev->features; struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; - struct gve_qpls_alloc_cfg qpls_alloc_cfg = {0}; struct gve_priv *priv = netdev_priv(netdev); - struct gve_qpl_config new_qpl_cfg; int err; - gve_get_curr_alloc_cfgs(priv, &qpls_alloc_cfg, - &tx_alloc_cfg, &rx_alloc_cfg); - /* qpl_cfg is not read-only, it contains a map that gets updated as - * rings are allocated, which is why we cannot use the yet unreleased - * one in priv. - */ - qpls_alloc_cfg.qpl_cfg = &new_qpl_cfg; - tx_alloc_cfg.qpl_cfg = &new_qpl_cfg; - rx_alloc_cfg.qpl_cfg = &new_qpl_cfg; + gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) { netdev->features ^= NETIF_F_LRO; - if (netif_carrier_ok(netdev)) { - err = gve_adjust_config(priv, &qpls_alloc_cfg, - &tx_alloc_cfg, &rx_alloc_cfg); - if (err) { - /* Revert the change on error. */ - netdev->features = orig_features; - return err; - } + if (netif_running(netdev)) { + err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); + if (err) + goto revert_features; } } + if ((netdev->features & NETIF_F_NTUPLE) && !(features & NETIF_F_NTUPLE)) { + err = gve_flow_rules_reset(priv); + if (err) + goto revert_features; + } return 0; + +revert_features: + netdev->features = orig_features; + return err; } static const struct net_device_ops gve_netdev_ops = { @@ -2182,7 +2077,9 @@ static void gve_handle_reset(struct gve_priv *priv) if (gve_get_do_reset(priv)) { rtnl_lock(); + netdev_lock(priv->dev); gve_reset(priv, false); + netdev_unlock(priv->dev); rtnl_unlock(); } } @@ -2278,14 +2175,17 @@ static void gve_service_task(struct work_struct *work) static void gve_set_netdev_xdp_features(struct gve_priv *priv) { + xdp_features_t xdp_features; + if (priv->queue_format == GVE_GQI_QPL_FORMAT) { - priv->dev->xdp_features = NETDEV_XDP_ACT_BASIC; - priv->dev->xdp_features |= NETDEV_XDP_ACT_REDIRECT; - priv->dev->xdp_features |= NETDEV_XDP_ACT_NDO_XMIT; - priv->dev->xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY; + xdp_features = NETDEV_XDP_ACT_BASIC; + xdp_features |= NETDEV_XDP_ACT_REDIRECT; + xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY; } else { - priv->dev->xdp_features = 0; + xdp_features = 0; } + + xdp_set_features_flag_locked(priv->dev, xdp_features); } static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) @@ -2359,6 +2259,7 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues, priv->rx_cfg.num_queues); } + priv->tx_cfg.num_xdp_queues = 0; dev_info(&priv->pdev->dev, "TX queues %d, RX queues %d\n", priv->tx_cfg.num_queues, priv->rx_cfg.num_queues); @@ -2422,7 +2323,7 @@ err: int gve_reset(struct gve_priv *priv, bool attempt_teardown) { - bool was_up = netif_carrier_ok(priv->dev); + bool was_up = netif_running(priv->dev); int err; dev_info(&priv->pdev->dev, "Performing reset\n"); @@ -2473,6 +2374,188 @@ static void gve_write_version(u8 __iomem *driver_version_register) writeb('\n', driver_version_register); } +static int gve_rx_queue_stop(struct net_device *dev, void *per_q_mem, int idx) +{ + struct gve_priv *priv = netdev_priv(dev); + struct gve_rx_ring *gve_per_q_mem; + int err; + + if (!priv->rx) + return -EAGAIN; + + /* Destroying queue 0 while other queues exist is not supported in DQO */ + if (!gve_is_gqi(priv) && idx == 0) + return -ERANGE; + + /* Single-queue destruction requires quiescence on all queues */ + gve_turndown(priv); + + /* This failure will trigger a reset - no need to clean up */ + err = gve_adminq_destroy_single_rx_queue(priv, idx); + if (err) + return err; + + if (gve_is_qpl(priv)) { + /* This failure will trigger a reset - no need to clean up */ + err = gve_unregister_qpl(priv, gve_rx_get_qpl(priv, idx)); + if (err) + return err; + } + + gve_rx_stop_ring(priv, idx); + + /* Turn the unstopped queues back up */ + gve_turnup_and_check_status(priv); + + gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; + *gve_per_q_mem = priv->rx[idx]; + memset(&priv->rx[idx], 0, sizeof(priv->rx[idx])); + return 0; +} + +static void gve_rx_queue_mem_free(struct net_device *dev, void *per_q_mem) +{ + struct gve_priv *priv = netdev_priv(dev); + struct gve_rx_alloc_rings_cfg cfg = {0}; + struct gve_rx_ring *gve_per_q_mem; + + gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; + gve_rx_get_curr_alloc_cfg(priv, &cfg); + + if (gve_is_gqi(priv)) + gve_rx_free_ring_gqi(priv, gve_per_q_mem, &cfg); + else + gve_rx_free_ring_dqo(priv, gve_per_q_mem, &cfg); +} + +static int gve_rx_queue_mem_alloc(struct net_device *dev, void *per_q_mem, + int idx) +{ + struct gve_priv *priv = netdev_priv(dev); + struct gve_rx_alloc_rings_cfg cfg = {0}; + struct gve_rx_ring *gve_per_q_mem; + int err; + + if (!priv->rx) + return -EAGAIN; + + gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; + gve_rx_get_curr_alloc_cfg(priv, &cfg); + + if (gve_is_gqi(priv)) + err = gve_rx_alloc_ring_gqi(priv, &cfg, gve_per_q_mem, idx); + else + err = gve_rx_alloc_ring_dqo(priv, &cfg, gve_per_q_mem, idx); + + return err; +} + +static int gve_rx_queue_start(struct net_device *dev, void *per_q_mem, int idx) +{ + struct gve_priv *priv = netdev_priv(dev); + struct gve_rx_ring *gve_per_q_mem; + int err; + + if (!priv->rx) + return -EAGAIN; + + gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; + priv->rx[idx] = *gve_per_q_mem; + + /* Single-queue creation requires quiescence on all queues */ + gve_turndown(priv); + + gve_rx_start_ring(priv, idx); + + if (gve_is_qpl(priv)) { + /* This failure will trigger a reset - no need to clean up */ + err = gve_register_qpl(priv, gve_rx_get_qpl(priv, idx)); + if (err) + goto abort; + } + + /* This failure will trigger a reset - no need to clean up */ + err = gve_adminq_create_single_rx_queue(priv, idx); + if (err) + goto abort; + + if (gve_is_gqi(priv)) + gve_rx_write_doorbell(priv, &priv->rx[idx]); + else + gve_rx_post_buffers_dqo(&priv->rx[idx]); + + /* Turn the unstopped queues back up */ + gve_turnup_and_check_status(priv); + return 0; + +abort: + gve_rx_stop_ring(priv, idx); + + /* All failures in this func result in a reset, by clearing the struct + * at idx, we prevent a double free when that reset runs. The reset, + * which needs the rtnl lock, will not run till this func returns and + * its caller gives up the lock. + */ + memset(&priv->rx[idx], 0, sizeof(priv->rx[idx])); + return err; +} + +static const struct netdev_queue_mgmt_ops gve_queue_mgmt_ops = { + .ndo_queue_mem_size = sizeof(struct gve_rx_ring), + .ndo_queue_mem_alloc = gve_rx_queue_mem_alloc, + .ndo_queue_mem_free = gve_rx_queue_mem_free, + .ndo_queue_start = gve_rx_queue_start, + .ndo_queue_stop = gve_rx_queue_stop, +}; + +static void gve_get_rx_queue_stats(struct net_device *dev, int idx, + struct netdev_queue_stats_rx *rx_stats) +{ + struct gve_priv *priv = netdev_priv(dev); + struct gve_rx_ring *rx = &priv->rx[idx]; + unsigned int start; + + do { + start = u64_stats_fetch_begin(&rx->statss); + rx_stats->packets = rx->rpackets; + rx_stats->bytes = rx->rbytes; + rx_stats->alloc_fail = rx->rx_skb_alloc_fail + + rx->rx_buf_alloc_fail; + } while (u64_stats_fetch_retry(&rx->statss, start)); +} + +static void gve_get_tx_queue_stats(struct net_device *dev, int idx, + struct netdev_queue_stats_tx *tx_stats) +{ + struct gve_priv *priv = netdev_priv(dev); + struct gve_tx_ring *tx = &priv->tx[idx]; + unsigned int start; + + do { + start = u64_stats_fetch_begin(&tx->statss); + tx_stats->packets = tx->pkt_done; + tx_stats->bytes = tx->bytes_done; + } while (u64_stats_fetch_retry(&tx->statss, start)); +} + +static void gve_get_base_stats(struct net_device *dev, + struct netdev_queue_stats_rx *rx, + struct netdev_queue_stats_tx *tx) +{ + rx->packets = 0; + rx->bytes = 0; + rx->alloc_fail = 0; + + tx->packets = 0; + tx->bytes = 0; +} + +static const struct netdev_stat_ops gve_stat_ops = { + .get_queue_stats_rx = gve_get_rx_queue_stats, + .get_queue_stats_tx = gve_get_tx_queue_stats, + .get_base_stats = gve_get_base_stats, +}; + static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { int max_tx_queues, max_rx_queues; @@ -2527,6 +2610,8 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_drvdata(pdev, dev); dev->ethtool_ops = &gve_ethtool_ops; dev->netdev_ops = &gve_netdev_ops; + dev->queue_mgmt_ops = &gve_queue_mgmt_ops; + dev->stat_ops = &gve_stat_ops; /* Set default and supported features. * @@ -2555,7 +2640,7 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) priv->service_task_flags = 0x0; priv->state_flags = 0x0; priv->ethtool_flags = 0x0; - priv->data_buffer_size_dqo = GVE_DEFAULT_RX_BUFFER_SIZE; + priv->rx_cfg.packet_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE; priv->max_rx_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE; gve_set_probe_in_progress(priv); @@ -2574,6 +2659,9 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) goto abort_with_wq; + if (!gve_is_gqi(priv) && !gve_is_qpl(priv)) + dev->netmem_tx = true; + err = register_netdev(dev); if (err) goto abort_with_gve_init; @@ -2628,9 +2716,10 @@ static void gve_shutdown(struct pci_dev *pdev) { struct net_device *netdev = pci_get_drvdata(pdev); struct gve_priv *priv = netdev_priv(netdev); - bool was_up = netif_carrier_ok(priv->dev); + bool was_up = netif_running(priv->dev); rtnl_lock(); + netdev_lock(netdev); if (was_up && gve_close(priv->dev)) { /* If the dev was up, attempt to close, if close fails, reset */ gve_reset_and_teardown(priv, was_up); @@ -2638,6 +2727,7 @@ static void gve_shutdown(struct pci_dev *pdev) /* If the dev wasn't up or close worked, finish tearing down */ gve_teardown_priv_resources(priv); } + netdev_unlock(netdev); rtnl_unlock(); } @@ -2646,10 +2736,11 @@ static int gve_suspend(struct pci_dev *pdev, pm_message_t state) { struct net_device *netdev = pci_get_drvdata(pdev); struct gve_priv *priv = netdev_priv(netdev); - bool was_up = netif_carrier_ok(priv->dev); + bool was_up = netif_running(priv->dev); priv->suspend_cnt++; rtnl_lock(); + netdev_lock(netdev); if (was_up && gve_close(priv->dev)) { /* If the dev was up, attempt to close, if close fails, reset */ gve_reset_and_teardown(priv, was_up); @@ -2658,6 +2749,7 @@ static int gve_suspend(struct pci_dev *pdev, pm_message_t state) gve_teardown_priv_resources(priv); } priv->up_before_suspend = was_up; + netdev_unlock(netdev); rtnl_unlock(); return 0; } @@ -2670,7 +2762,9 @@ static int gve_resume(struct pci_dev *pdev) priv->resume_cnt++; rtnl_lock(); + netdev_lock(netdev); err = gve_reset_recovery(priv, priv->up_before_suspend); + netdev_unlock(netdev); rtnl_unlock(); return err; } diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c index 20f5a9e7fae9..90e875c1832f 100644 --- a/drivers/net/ethernet/google/gve/gve_rx.c +++ b/drivers/net/ethernet/google/gve/gve_rx.c @@ -30,6 +30,9 @@ static void gve_rx_unfill_pages(struct gve_priv *priv, u32 slots = rx->mask + 1; int i; + if (!rx->data.page_info) + return; + if (rx->data.raw_addressing) { for (i = 0; i < slots; i++) gve_rx_free_buffer(&priv->pdev->dev, &rx->data.page_info[i], @@ -38,8 +41,6 @@ static void gve_rx_unfill_pages(struct gve_priv *priv, for (i = 0; i < slots; i++) page_ref_sub(rx->data.page_info[i].page, rx->data.page_info[i].pagecnt_bias - 1); - gve_unassign_qpl(cfg->qpl_cfg, rx->data.qpl->id); - rx->data.qpl = NULL; for (i = 0; i < rx->qpl_copy_pool_mask + 1; i++) { page_ref_sub(rx->qpl_copy_pool[i].page, @@ -51,6 +52,41 @@ static void gve_rx_unfill_pages(struct gve_priv *priv, rx->data.page_info = NULL; } +static void gve_rx_ctx_clear(struct gve_rx_ctx *ctx) +{ + ctx->skb_head = NULL; + ctx->skb_tail = NULL; + ctx->total_size = 0; + ctx->frag_cnt = 0; + ctx->drop_pkt = false; +} + +static void gve_rx_init_ring_state_gqi(struct gve_rx_ring *rx) +{ + rx->desc.seqno = 1; + rx->cnt = 0; + gve_rx_ctx_clear(&rx->ctx); +} + +static void gve_rx_reset_ring_gqi(struct gve_priv *priv, int idx) +{ + struct gve_rx_ring *rx = &priv->rx[idx]; + const u32 slots = priv->rx_desc_cnt; + size_t size; + + /* Reset desc ring */ + if (rx->desc.desc_ring) { + size = slots * sizeof(rx->desc.desc_ring[0]); + memset(rx->desc.desc_ring, 0, size); + } + + /* Reset q_resources */ + if (rx->q_resources) + memset(rx->q_resources, 0, sizeof(*rx->q_resources)); + + gve_rx_init_ring_state_gqi(rx); +} + void gve_rx_stop_ring_gqi(struct gve_priv *priv, int idx) { int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); @@ -60,43 +96,60 @@ void gve_rx_stop_ring_gqi(struct gve_priv *priv, int idx) gve_remove_napi(priv, ntfy_idx); gve_rx_remove_from_block(priv, idx); + gve_rx_reset_ring_gqi(priv, idx); } -static void gve_rx_free_ring_gqi(struct gve_priv *priv, struct gve_rx_ring *rx, - struct gve_rx_alloc_rings_cfg *cfg) +void gve_rx_free_ring_gqi(struct gve_priv *priv, struct gve_rx_ring *rx, + struct gve_rx_alloc_rings_cfg *cfg) { struct device *dev = &priv->pdev->dev; u32 slots = rx->mask + 1; int idx = rx->q_num; size_t bytes; + u32 qpl_id; - bytes = sizeof(struct gve_rx_desc) * cfg->ring_size; - dma_free_coherent(dev, bytes, rx->desc.desc_ring, rx->desc.bus); - rx->desc.desc_ring = NULL; + if (rx->desc.desc_ring) { + bytes = sizeof(struct gve_rx_desc) * cfg->ring_size; + dma_free_coherent(dev, bytes, rx->desc.desc_ring, rx->desc.bus); + rx->desc.desc_ring = NULL; + } - dma_free_coherent(dev, sizeof(*rx->q_resources), - rx->q_resources, rx->q_resources_bus); - rx->q_resources = NULL; + if (rx->q_resources) { + dma_free_coherent(dev, sizeof(*rx->q_resources), + rx->q_resources, rx->q_resources_bus); + rx->q_resources = NULL; + } gve_rx_unfill_pages(priv, rx, cfg); - bytes = sizeof(*rx->data.data_ring) * slots; - dma_free_coherent(dev, bytes, rx->data.data_ring, - rx->data.data_bus); - rx->data.data_ring = NULL; + if (rx->data.data_ring) { + bytes = sizeof(*rx->data.data_ring) * slots; + dma_free_coherent(dev, bytes, rx->data.data_ring, + rx->data.data_bus); + rx->data.data_ring = NULL; + } kvfree(rx->qpl_copy_pool); rx->qpl_copy_pool = NULL; + if (rx->data.qpl) { + qpl_id = gve_get_rx_qpl_id(cfg->qcfg_tx, idx); + gve_free_queue_page_list(priv, rx->data.qpl, qpl_id); + rx->data.qpl = NULL; + } + netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx); } -static void gve_setup_rx_buffer(struct gve_rx_slot_page_info *page_info, - dma_addr_t addr, struct page *page, __be64 *slot_addr) +static void gve_setup_rx_buffer(struct gve_rx_ring *rx, + struct gve_rx_slot_page_info *page_info, + dma_addr_t addr, struct page *page, + __be64 *slot_addr) { page_info->page = page; page_info->page_offset = 0; page_info->page_address = page_address(page); + page_info->buf_size = rx->packet_buffer_size; *slot_addr = cpu_to_be64(addr); /* The page already has 1 ref */ page_ref_add(page, INT_MAX - 1); @@ -121,7 +174,7 @@ static int gve_rx_alloc_buffer(struct gve_priv *priv, struct device *dev, return err; } - gve_setup_rx_buffer(page_info, dma, page, &data_slot->addr); + gve_setup_rx_buffer(rx, page_info, dma, page, &data_slot->addr); return 0; } @@ -144,20 +197,13 @@ static int gve_rx_prefill_pages(struct gve_rx_ring *rx, if (!rx->data.page_info) return -ENOMEM; - if (!rx->data.raw_addressing) { - rx->data.qpl = gve_assign_rx_qpl(cfg, rx->q_num); - if (!rx->data.qpl) { - kvfree(rx->data.page_info); - rx->data.page_info = NULL; - return -ENOMEM; - } - } for (i = 0; i < slots; i++) { if (!rx->data.raw_addressing) { struct page *page = rx->data.qpl->pages[i]; dma_addr_t addr = i * PAGE_SIZE; - gve_setup_rx_buffer(&rx->data.page_info[i], addr, page, + gve_setup_rx_buffer(rx, &rx->data.page_info[i], addr, + page, &rx->data.data_ring[i].qpl_offset); continue; } @@ -180,6 +226,7 @@ static int gve_rx_prefill_pages(struct gve_rx_ring *rx, rx->qpl_copy_pool[j].page = page; rx->qpl_copy_pool[j].page_offset = 0; rx->qpl_copy_pool[j].page_address = page_address(page); + rx->qpl_copy_pool[j].buf_size = rx->packet_buffer_size; /* The page already has 1 ref. */ page_ref_add(page, INT_MAX - 1); @@ -204,9 +251,6 @@ alloc_err_qpl: page_ref_sub(rx->data.page_info[i].page, rx->data.page_info[i].pagecnt_bias - 1); - gve_unassign_qpl(cfg->qpl_cfg, rx->data.qpl->id); - rx->data.qpl = NULL; - return err; alloc_err_rda: @@ -217,15 +261,6 @@ alloc_err_rda: return err; } -static void gve_rx_ctx_clear(struct gve_rx_ctx *ctx) -{ - ctx->skb_head = NULL; - ctx->skb_tail = NULL; - ctx->total_size = 0; - ctx->frag_cnt = 0; - ctx->drop_pkt = false; -} - void gve_rx_start_ring_gqi(struct gve_priv *priv, int idx) { int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); @@ -234,14 +269,16 @@ void gve_rx_start_ring_gqi(struct gve_priv *priv, int idx) gve_add_napi(priv, ntfy_idx, gve_napi_poll); } -static int gve_rx_alloc_ring_gqi(struct gve_priv *priv, - struct gve_rx_alloc_rings_cfg *cfg, - struct gve_rx_ring *rx, - int idx) +int gve_rx_alloc_ring_gqi(struct gve_priv *priv, + struct gve_rx_alloc_rings_cfg *cfg, + struct gve_rx_ring *rx, + int idx) { struct device *hdev = &priv->pdev->dev; - u32 slots = priv->rx_data_slot_cnt; + u32 slots = cfg->ring_size; int filled_pages; + int qpl_page_cnt; + u32 qpl_id = 0; size_t bytes; int err; @@ -251,6 +288,7 @@ static int gve_rx_alloc_ring_gqi(struct gve_priv *priv, rx->gve = priv; rx->q_num = idx; + rx->packet_buffer_size = cfg->packet_buffer_size; rx->mask = slots - 1; rx->data.raw_addressing = cfg->raw_addressing; @@ -274,10 +312,22 @@ static int gve_rx_alloc_ring_gqi(struct gve_priv *priv, goto abort_with_slots; } + if (!rx->data.raw_addressing) { + qpl_id = gve_get_rx_qpl_id(cfg->qcfg_tx, rx->q_num); + qpl_page_cnt = cfg->ring_size; + + rx->data.qpl = gve_alloc_queue_page_list(priv, qpl_id, + qpl_page_cnt); + if (!rx->data.qpl) { + err = -ENOMEM; + goto abort_with_copy_pool; + } + } + filled_pages = gve_rx_prefill_pages(rx, cfg); if (filled_pages < 0) { err = -ENOMEM; - goto abort_with_copy_pool; + goto abort_with_qpl; } rx->fill_cnt = filled_pages; /* Ensure data ring slots (packet buffers) are visible. */ @@ -304,11 +354,9 @@ static int gve_rx_alloc_ring_gqi(struct gve_priv *priv, err = -ENOMEM; goto abort_with_q_resources; } - rx->cnt = 0; rx->db_threshold = slots / 2; - rx->desc.seqno = 1; + gve_rx_init_ring_state_gqi(rx); - rx->packet_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE; gve_rx_ctx_clear(&rx->ctx); return 0; @@ -319,6 +367,11 @@ abort_with_q_resources: rx->q_resources = NULL; abort_filled: gve_rx_unfill_pages(priv, rx, cfg); +abort_with_qpl: + if (!rx->data.raw_addressing) { + gve_free_queue_page_list(priv, rx->data.qpl, qpl_id); + rx->data.qpl = NULL; + } abort_with_copy_pool: kvfree(rx->qpl_copy_pool); rx->qpl_copy_pool = NULL; @@ -337,18 +390,12 @@ int gve_rx_alloc_rings_gqi(struct gve_priv *priv, int err = 0; int i, j; - if (!cfg->raw_addressing && !cfg->qpls) { - netif_err(priv, drv, priv->dev, - "Cannot alloc QPL ring before allocing QPLs\n"); - return -EINVAL; - } - - rx = kvcalloc(cfg->qcfg->max_queues, sizeof(struct gve_rx_ring), + rx = kvcalloc(cfg->qcfg_rx->max_queues, sizeof(struct gve_rx_ring), GFP_KERNEL); if (!rx) return -ENOMEM; - for (i = 0; i < cfg->qcfg->num_queues; i++) { + for (i = 0; i < cfg->qcfg_rx->num_queues; i++) { err = gve_rx_alloc_ring_gqi(priv, cfg, &rx[i], i); if (err) { netif_err(priv, drv, priv->dev, @@ -377,7 +424,7 @@ void gve_rx_free_rings_gqi(struct gve_priv *priv, if (!rx) return; - for (i = 0; i < cfg->qcfg->num_queues; i++) + for (i = 0; i < cfg->qcfg_rx->num_queues; i++) gve_rx_free_ring_gqi(priv, &rx[i], cfg); kvfree(rx); @@ -548,7 +595,7 @@ static struct sk_buff *gve_rx_copy_to_pool(struct gve_rx_ring *rx, copy_page_info->pad = page_info->pad; skb = gve_rx_add_frags(napi, copy_page_info, - rx->packet_buffer_size, len, ctx); + copy_page_info->buf_size, len, ctx); if (unlikely(!skb)) return NULL; @@ -588,7 +635,8 @@ gve_rx_qpl(struct device *dev, struct net_device *netdev, * device. */ if (page_info->can_flip) { - skb = gve_rx_add_frags(napi, page_info, rx->packet_buffer_size, len, ctx); + skb = gve_rx_add_frags(napi, page_info, page_info->buf_size, + len, ctx); /* No point in recycling if we didn't get the skb */ if (skb) { /* Make sure that the page isn't freed. */ @@ -638,7 +686,7 @@ static struct sk_buff *gve_rx_skb(struct gve_priv *priv, struct gve_rx_ring *rx, skb = gve_rx_raw_addressing(&priv->pdev->dev, netdev, page_info, len, napi, data_slot, - rx->packet_buffer_size, ctx); + page_info->buf_size, ctx); } else { skb = gve_rx_qpl(&priv->pdev->dev, netdev, rx, page_info, len, napi, data_slot); @@ -813,7 +861,7 @@ static void gve_rx(struct gve_rx_ring *rx, netdev_features_t feat, void *old_data; int xdp_act; - xdp_init_buff(&xdp, rx->packet_buffer_size, &rx->xdp_rxq); + xdp_init_buff(&xdp, page_info->buf_size, &rx->xdp_rxq); xdp_prepare_buff(&xdp, page_info->page_address + page_info->page_offset, GVE_RX_PAD, len, false); diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c index 8e8071308aeb..dcb0545baa50 100644 --- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c @@ -16,220 +16,120 @@ #include <net/ipv6.h> #include <net/tcp.h> -static int gve_buf_ref_cnt(struct gve_rx_buf_state_dqo *bs) -{ - return page_count(bs->page_info.page) - bs->page_info.pagecnt_bias; -} - -static void gve_free_page_dqo(struct gve_priv *priv, - struct gve_rx_buf_state_dqo *bs, - bool free_page) -{ - page_ref_sub(bs->page_info.page, bs->page_info.pagecnt_bias - 1); - if (free_page) - gve_free_page(&priv->pdev->dev, bs->page_info.page, bs->addr, - DMA_FROM_DEVICE); - bs->page_info.page = NULL; -} - -static struct gve_rx_buf_state_dqo *gve_alloc_buf_state(struct gve_rx_ring *rx) -{ - struct gve_rx_buf_state_dqo *buf_state; - s16 buffer_id; - - buffer_id = rx->dqo.free_buf_states; - if (unlikely(buffer_id == -1)) - return NULL; - - buf_state = &rx->dqo.buf_states[buffer_id]; - - /* Remove buf_state from free list */ - rx->dqo.free_buf_states = buf_state->next; - - /* Point buf_state to itself to mark it as allocated */ - buf_state->next = buffer_id; - - return buf_state; -} - -static bool gve_buf_state_is_allocated(struct gve_rx_ring *rx, - struct gve_rx_buf_state_dqo *buf_state) -{ - s16 buffer_id = buf_state - rx->dqo.buf_states; - - return buf_state->next == buffer_id; -} - -static void gve_free_buf_state(struct gve_rx_ring *rx, - struct gve_rx_buf_state_dqo *buf_state) +static void gve_rx_free_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx) { - s16 buffer_id = buf_state - rx->dqo.buf_states; + struct device *hdev = &priv->pdev->dev; + int buf_count = rx->dqo.bufq.mask + 1; - buf_state->next = rx->dqo.free_buf_states; - rx->dqo.free_buf_states = buffer_id; + if (rx->dqo.hdr_bufs.data) { + dma_free_coherent(hdev, priv->header_buf_size * buf_count, + rx->dqo.hdr_bufs.data, rx->dqo.hdr_bufs.addr); + rx->dqo.hdr_bufs.data = NULL; + } } -static struct gve_rx_buf_state_dqo * -gve_dequeue_buf_state(struct gve_rx_ring *rx, struct gve_index_list *list) +static void gve_rx_init_ring_state_dqo(struct gve_rx_ring *rx, + const u32 buffer_queue_slots, + const u32 completion_queue_slots) { - struct gve_rx_buf_state_dqo *buf_state; - s16 buffer_id; - - buffer_id = list->head; - if (unlikely(buffer_id == -1)) - return NULL; - - buf_state = &rx->dqo.buf_states[buffer_id]; - - /* Remove buf_state from list */ - list->head = buf_state->next; - if (buf_state->next == -1) - list->tail = -1; - - /* Point buf_state to itself to mark it as allocated */ - buf_state->next = buffer_id; - - return buf_state; -} + int i; -static void gve_enqueue_buf_state(struct gve_rx_ring *rx, - struct gve_index_list *list, - struct gve_rx_buf_state_dqo *buf_state) -{ - s16 buffer_id = buf_state - rx->dqo.buf_states; + /* Set buffer queue state */ + rx->dqo.bufq.mask = buffer_queue_slots - 1; + rx->dqo.bufq.head = 0; + rx->dqo.bufq.tail = 0; - buf_state->next = -1; + /* Set completion queue state */ + rx->dqo.complq.num_free_slots = completion_queue_slots; + rx->dqo.complq.mask = completion_queue_slots - 1; + rx->dqo.complq.cur_gen_bit = 0; + rx->dqo.complq.head = 0; - if (list->head == -1) { - list->head = buffer_id; - list->tail = buffer_id; - } else { - int tail = list->tail; + /* Set RX SKB context */ + rx->ctx.skb_head = NULL; + rx->ctx.skb_tail = NULL; - rx->dqo.buf_states[tail].next = buffer_id; - list->tail = buffer_id; + /* Set up linked list of buffer IDs */ + if (rx->dqo.buf_states) { + for (i = 0; i < rx->dqo.num_buf_states - 1; i++) + rx->dqo.buf_states[i].next = i + 1; + rx->dqo.buf_states[rx->dqo.num_buf_states - 1].next = -1; } + + rx->dqo.free_buf_states = 0; + rx->dqo.recycled_buf_states.head = -1; + rx->dqo.recycled_buf_states.tail = -1; + rx->dqo.used_buf_states.head = -1; + rx->dqo.used_buf_states.tail = -1; } -static struct gve_rx_buf_state_dqo * -gve_get_recycled_buf_state(struct gve_rx_ring *rx) +static void gve_rx_reset_ring_dqo(struct gve_priv *priv, int idx) { - struct gve_rx_buf_state_dqo *buf_state; + struct gve_rx_ring *rx = &priv->rx[idx]; + size_t size; int i; - /* Recycled buf states are immediately usable. */ - buf_state = gve_dequeue_buf_state(rx, &rx->dqo.recycled_buf_states); - if (likely(buf_state)) - return buf_state; - - if (unlikely(rx->dqo.used_buf_states.head == -1)) - return NULL; - - /* Used buf states are only usable when ref count reaches 0, which means - * no SKBs refer to them. - * - * Search a limited number before giving up. - */ - for (i = 0; i < 5; i++) { - buf_state = gve_dequeue_buf_state(rx, &rx->dqo.used_buf_states); - if (gve_buf_ref_cnt(buf_state) == 0) { - rx->dqo.used_buf_states_cnt--; - return buf_state; - } + const u32 buffer_queue_slots = priv->rx_desc_cnt; + const u32 completion_queue_slots = priv->rx_desc_cnt; - gve_enqueue_buf_state(rx, &rx->dqo.used_buf_states, buf_state); + /* Reset buffer queue */ + if (rx->dqo.bufq.desc_ring) { + size = sizeof(rx->dqo.bufq.desc_ring[0]) * + buffer_queue_slots; + memset(rx->dqo.bufq.desc_ring, 0, size); } - /* For QPL, we cannot allocate any new buffers and must - * wait for the existing ones to be available. - */ - if (rx->dqo.qpl) - return NULL; - - /* If there are no free buf states discard an entry from - * `used_buf_states` so it can be used. - */ - if (unlikely(rx->dqo.free_buf_states == -1)) { - buf_state = gve_dequeue_buf_state(rx, &rx->dqo.used_buf_states); - if (gve_buf_ref_cnt(buf_state) == 0) - return buf_state; - - gve_free_page_dqo(rx->gve, buf_state, true); - gve_free_buf_state(rx, buf_state); + /* Reset completion queue */ + if (rx->dqo.complq.desc_ring) { + size = sizeof(rx->dqo.complq.desc_ring[0]) * + completion_queue_slots; + memset(rx->dqo.complq.desc_ring, 0, size); } - return NULL; -} - -static int gve_alloc_page_dqo(struct gve_rx_ring *rx, - struct gve_rx_buf_state_dqo *buf_state) -{ - struct gve_priv *priv = rx->gve; - u32 idx; + /* Reset q_resources */ + if (rx->q_resources) + memset(rx->q_resources, 0, sizeof(*rx->q_resources)); - if (!rx->dqo.qpl) { - int err; + /* Reset buf states */ + if (rx->dqo.buf_states) { + for (i = 0; i < rx->dqo.num_buf_states; i++) { + struct gve_rx_buf_state_dqo *bs = &rx->dqo.buf_states[i]; - err = gve_alloc_page(priv, &priv->pdev->dev, - &buf_state->page_info.page, - &buf_state->addr, - DMA_FROM_DEVICE, GFP_ATOMIC); - if (err) - return err; - } else { - idx = rx->dqo.next_qpl_page_idx; - if (idx >= priv->rx_pages_per_qpl) { - net_err_ratelimited("%s: Out of QPL pages\n", - priv->dev->name); - return -ENOMEM; + if (rx->dqo.page_pool) + gve_free_to_page_pool(rx, bs, false); + else + gve_free_qpl_page_dqo(bs); } - buf_state->page_info.page = rx->dqo.qpl->pages[idx]; - buf_state->addr = rx->dqo.qpl->page_buses[idx]; - rx->dqo.next_qpl_page_idx++; } - buf_state->page_info.page_offset = 0; - buf_state->page_info.page_address = - page_address(buf_state->page_info.page); - buf_state->last_single_ref_offset = 0; - - /* The page already has 1 ref. */ - page_ref_add(buf_state->page_info.page, INT_MAX - 1); - buf_state->page_info.pagecnt_bias = INT_MAX; - return 0; -} - -static void gve_rx_free_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx) -{ - struct device *hdev = &priv->pdev->dev; - int buf_count = rx->dqo.bufq.mask + 1; - - if (rx->dqo.hdr_bufs.data) { - dma_free_coherent(hdev, priv->header_buf_size * buf_count, - rx->dqo.hdr_bufs.data, rx->dqo.hdr_bufs.addr); - rx->dqo.hdr_bufs.data = NULL; - } + gve_rx_init_ring_state_dqo(rx, buffer_queue_slots, + completion_queue_slots); } void gve_rx_stop_ring_dqo(struct gve_priv *priv, int idx) { int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); + struct gve_rx_ring *rx = &priv->rx[idx]; if (!gve_rx_was_added_to_block(priv, idx)) return; + if (rx->dqo.page_pool) + page_pool_disable_direct_recycling(rx->dqo.page_pool); gve_remove_napi(priv, ntfy_idx); gve_rx_remove_from_block(priv, idx); + gve_rx_reset_ring_dqo(priv, idx); } -static void gve_rx_free_ring_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, - struct gve_rx_alloc_rings_cfg *cfg) +void gve_rx_free_ring_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, + struct gve_rx_alloc_rings_cfg *cfg) { struct device *hdev = &priv->pdev->dev; size_t completion_queue_slots; size_t buffer_queue_slots; int idx = rx->q_num; size_t size; + u32 qpl_id; int i; completion_queue_slots = rx->dqo.complq.mask + 1; @@ -243,12 +143,16 @@ static void gve_rx_free_ring_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, for (i = 0; i < rx->dqo.num_buf_states; i++) { struct gve_rx_buf_state_dqo *bs = &rx->dqo.buf_states[i]; - /* Only free page for RDA. QPL pages are freed in gve_main. */ - if (bs->page_info.page) - gve_free_page_dqo(priv, bs, !rx->dqo.qpl); + + if (rx->dqo.page_pool) + gve_free_to_page_pool(rx, bs, false); + else + gve_free_qpl_page_dqo(bs); } + if (rx->dqo.qpl) { - gve_unassign_qpl(cfg->qpl_cfg, rx->dqo.qpl->id); + qpl_id = gve_get_rx_qpl_id(cfg->qcfg_tx, rx->q_num); + gve_free_queue_page_list(priv, rx->dqo.qpl, qpl_id); rx->dqo.qpl = NULL; } @@ -270,15 +174,20 @@ static void gve_rx_free_ring_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, kvfree(rx->dqo.buf_states); rx->dqo.buf_states = NULL; + if (rx->dqo.page_pool) { + page_pool_destroy(rx->dqo.page_pool); + rx->dqo.page_pool = NULL; + } + gve_rx_free_hdr_bufs(priv, rx); netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx); } -static int gve_rx_alloc_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx) +static int gve_rx_alloc_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx, + const u32 buf_count) { struct device *hdev = &priv->pdev->dev; - int buf_count = rx->dqo.bufq.mask + 1; rx->dqo.hdr_bufs.data = dma_alloc_coherent(hdev, priv->header_buf_size * buf_count, &rx->dqo.hdr_bufs.addr, GFP_KERNEL); @@ -296,17 +205,18 @@ void gve_rx_start_ring_dqo(struct gve_priv *priv, int idx) gve_add_napi(priv, ntfy_idx, gve_napi_poll_dqo); } -static int gve_rx_alloc_ring_dqo(struct gve_priv *priv, - struct gve_rx_alloc_rings_cfg *cfg, - struct gve_rx_ring *rx, - int idx) +int gve_rx_alloc_ring_dqo(struct gve_priv *priv, + struct gve_rx_alloc_rings_cfg *cfg, + struct gve_rx_ring *rx, + int idx) { struct device *hdev = &priv->pdev->dev; + struct page_pool *pool; + int qpl_page_cnt; size_t size; - int i; + u32 qpl_id; - const u32 buffer_queue_slots = cfg->raw_addressing ? - priv->options_dqo_rda.rx_buff_ring_entries : cfg->ring_size; + const u32 buffer_queue_slots = cfg->ring_size; const u32 completion_queue_slots = cfg->ring_size; netif_dbg(priv, drv, priv->dev, "allocating rx ring DQO\n"); @@ -314,15 +224,18 @@ static int gve_rx_alloc_ring_dqo(struct gve_priv *priv, memset(rx, 0, sizeof(*rx)); rx->gve = priv; rx->q_num = idx; - rx->dqo.bufq.mask = buffer_queue_slots - 1; - rx->dqo.complq.num_free_slots = completion_queue_slots; - rx->dqo.complq.mask = completion_queue_slots - 1; - rx->ctx.skb_head = NULL; - rx->ctx.skb_tail = NULL; + rx->packet_buffer_size = cfg->packet_buffer_size; + + if (cfg->xdp) { + rx->packet_buffer_truesize = GVE_XDP_RX_BUFFER_SIZE_DQO; + rx->rx_headroom = XDP_PACKET_HEADROOM; + } else { + rx->packet_buffer_truesize = rx->packet_buffer_size; + rx->rx_headroom = 0; + } - rx->dqo.num_buf_states = cfg->raw_addressing ? - min_t(s16, S16_MAX, buffer_queue_slots * 4) : - priv->rx_pages_per_qpl; + rx->dqo.num_buf_states = cfg->raw_addressing ? buffer_queue_slots : + gve_get_rx_pages_per_qpl_dqo(cfg->ring_size); rx->dqo.buf_states = kvcalloc(rx->dqo.num_buf_states, sizeof(rx->dqo.buf_states[0]), GFP_KERNEL); @@ -331,19 +244,9 @@ static int gve_rx_alloc_ring_dqo(struct gve_priv *priv, /* Allocate header buffers for header-split */ if (cfg->enable_header_split) - if (gve_rx_alloc_hdr_bufs(priv, rx)) + if (gve_rx_alloc_hdr_bufs(priv, rx, buffer_queue_slots)) goto err; - /* Set up linked list of buffer IDs */ - for (i = 0; i < rx->dqo.num_buf_states - 1; i++) - rx->dqo.buf_states[i].next = i + 1; - - rx->dqo.buf_states[rx->dqo.num_buf_states - 1].next = -1; - rx->dqo.recycled_buf_states.head = -1; - rx->dqo.recycled_buf_states.tail = -1; - rx->dqo.used_buf_states.head = -1; - rx->dqo.used_buf_states.tail = -1; - /* Allocate RX completion queue */ size = sizeof(rx->dqo.complq.desc_ring[0]) * completion_queue_slots; @@ -359,8 +262,18 @@ static int gve_rx_alloc_ring_dqo(struct gve_priv *priv, if (!rx->dqo.bufq.desc_ring) goto err; - if (!cfg->raw_addressing) { - rx->dqo.qpl = gve_assign_rx_qpl(cfg, rx->q_num); + if (cfg->raw_addressing) { + pool = gve_rx_create_page_pool(priv, rx, cfg->xdp); + if (IS_ERR(pool)) + goto err; + + rx->dqo.page_pool = pool; + } else { + qpl_id = gve_get_rx_qpl_id(cfg->qcfg_tx, rx->q_num); + qpl_page_cnt = gve_get_rx_pages_per_qpl_dqo(cfg->ring_size); + + rx->dqo.qpl = gve_alloc_queue_page_list(priv, qpl_id, + qpl_page_cnt); if (!rx->dqo.qpl) goto err; rx->dqo.next_qpl_page_idx = 0; @@ -371,6 +284,9 @@ static int gve_rx_alloc_ring_dqo(struct gve_priv *priv, if (!rx->q_resources) goto err; + gve_rx_init_ring_state_dqo(rx, buffer_queue_slots, + completion_queue_slots); + return 0; err: @@ -393,18 +309,12 @@ int gve_rx_alloc_rings_dqo(struct gve_priv *priv, int err; int i; - if (!cfg->raw_addressing && !cfg->qpls) { - netif_err(priv, drv, priv->dev, - "Cannot alloc QPL ring before allocing QPLs\n"); - return -EINVAL; - } - - rx = kvcalloc(cfg->qcfg->max_queues, sizeof(struct gve_rx_ring), + rx = kvcalloc(cfg->qcfg_rx->max_queues, sizeof(struct gve_rx_ring), GFP_KERNEL); if (!rx) return -ENOMEM; - for (i = 0; i < cfg->qcfg->num_queues; i++) { + for (i = 0; i < cfg->qcfg_rx->num_queues; i++) { err = gve_rx_alloc_ring_dqo(priv, cfg, &rx[i], i); if (err) { netif_err(priv, drv, priv->dev, @@ -433,7 +343,7 @@ void gve_rx_free_rings_dqo(struct gve_priv *priv, if (!rx) return; - for (i = 0; i < cfg->qcfg->num_queues; i++) + for (i = 0; i < cfg->qcfg_rx->num_queues; i++) gve_rx_free_ring_dqo(priv, &rx[i], cfg); kvfree(rx); @@ -455,26 +365,14 @@ void gve_rx_post_buffers_dqo(struct gve_rx_ring *rx) num_avail_slots = min_t(u32, num_avail_slots, complq->num_free_slots); while (num_posted < num_avail_slots) { struct gve_rx_desc_dqo *desc = &bufq->desc_ring[bufq->tail]; - struct gve_rx_buf_state_dqo *buf_state; - - buf_state = gve_get_recycled_buf_state(rx); - if (unlikely(!buf_state)) { - buf_state = gve_alloc_buf_state(rx); - if (unlikely(!buf_state)) - break; - - if (unlikely(gve_alloc_page_dqo(rx, buf_state))) { - u64_stats_update_begin(&rx->statss); - rx->rx_buf_alloc_fail++; - u64_stats_update_end(&rx->statss); - gve_free_buf_state(rx, buf_state); - break; - } + + if (unlikely(gve_alloc_buffer(rx, desc))) { + u64_stats_update_begin(&rx->statss); + rx->rx_buf_alloc_fail++; + u64_stats_update_end(&rx->statss); + break; } - desc->buf_id = cpu_to_le16(buf_state - rx->dqo.buf_states); - desc->buf_addr = cpu_to_le64(buf_state->addr + - buf_state->page_info.page_offset); if (rx->dqo.hdr_bufs.data) desc->header_buf_addr = cpu_to_le64(rx->dqo.hdr_bufs.addr + @@ -491,48 +389,6 @@ void gve_rx_post_buffers_dqo(struct gve_rx_ring *rx) rx->fill_cnt += num_posted; } -static void gve_try_recycle_buf(struct gve_priv *priv, struct gve_rx_ring *rx, - struct gve_rx_buf_state_dqo *buf_state) -{ - const u16 data_buffer_size = priv->data_buffer_size_dqo; - int pagecount; - - /* Can't reuse if we only fit one buffer per page */ - if (data_buffer_size * 2 > PAGE_SIZE) - goto mark_used; - - pagecount = gve_buf_ref_cnt(buf_state); - - /* Record the offset when we have a single remaining reference. - * - * When this happens, we know all of the other offsets of the page are - * usable. - */ - if (pagecount == 1) { - buf_state->last_single_ref_offset = - buf_state->page_info.page_offset; - } - - /* Use the next buffer sized chunk in the page. */ - buf_state->page_info.page_offset += data_buffer_size; - buf_state->page_info.page_offset &= (PAGE_SIZE - 1); - - /* If we wrap around to the same offset without ever dropping to 1 - * reference, then we don't know if this offset was ever freed. - */ - if (buf_state->page_info.page_offset == - buf_state->last_single_ref_offset) { - goto mark_used; - } - - gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state); - return; - -mark_used: - gve_enqueue_buf_state(rx, &rx->dqo.used_buf_states, buf_state); - rx->dqo.used_buf_states_cnt++; -} - static void gve_rx_skb_csum(struct sk_buff *skb, const struct gve_rx_compl_desc_dqo *desc, struct gve_ptype ptype) @@ -581,11 +437,13 @@ static void gve_rx_skb_hash(struct sk_buff *skb, skb_set_hash(skb, le32_to_cpu(compl_desc->hash), hash_type); } -static void gve_rx_free_skb(struct gve_rx_ring *rx) +static void gve_rx_free_skb(struct napi_struct *napi, struct gve_rx_ring *rx) { if (!rx->ctx.skb_head) return; + if (rx->ctx.skb_head == napi->skb) + napi->skb = NULL; dev_kfree_skb_any(rx->ctx.skb_head); rx->ctx.skb_head = NULL; rx->ctx.skb_tail = NULL; @@ -628,6 +486,25 @@ static int gve_rx_copy_ondemand(struct gve_rx_ring *rx, return 0; } +static void gve_skb_add_rx_frag(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state, + int num_frags, u16 buf_len) +{ + if (rx->dqo.page_pool) { + skb_add_rx_frag_netmem(rx->ctx.skb_tail, num_frags, + buf_state->page_info.netmem, + buf_state->page_info.page_offset + + buf_state->page_info.pad, buf_len, + buf_state->page_info.buf_size); + } else { + skb_add_rx_frag(rx->ctx.skb_tail, num_frags, + buf_state->page_info.page, + buf_state->page_info.page_offset + + buf_state->page_info.pad, buf_len, + buf_state->page_info.buf_size); + } +} + /* Chains multi skbs for single rx packet. * Returns 0 if buffer is appended, -1 otherwise. */ @@ -645,6 +522,9 @@ static int gve_rx_append_frags(struct napi_struct *napi, if (!skb) return -1; + if (rx->dqo.page_pool) + skb_mark_for_recycle(skb); + if (rx->ctx.skb_tail == rx->ctx.skb_head) skb_shinfo(rx->ctx.skb_head)->frag_list = skb; else @@ -655,26 +535,41 @@ static int gve_rx_append_frags(struct napi_struct *napi, if (rx->ctx.skb_tail != rx->ctx.skb_head) { rx->ctx.skb_head->len += buf_len; rx->ctx.skb_head->data_len += buf_len; - rx->ctx.skb_head->truesize += priv->data_buffer_size_dqo; + rx->ctx.skb_head->truesize += buf_state->page_info.buf_size; } /* Trigger ondemand page allocation if we are running low on buffers */ if (gve_rx_should_trigger_copy_ondemand(rx)) return gve_rx_copy_ondemand(rx, buf_state, buf_len); - skb_add_rx_frag(rx->ctx.skb_tail, num_frags, - buf_state->page_info.page, - buf_state->page_info.page_offset, - buf_len, priv->data_buffer_size_dqo); - gve_dec_pagecnt_bias(&buf_state->page_info); - - /* Advances buffer page-offset if page is partially used. - * Marks buffer as used if page is full. - */ - gve_try_recycle_buf(priv, rx, buf_state); + gve_skb_add_rx_frag(rx, buf_state, num_frags, buf_len); + gve_reuse_buffer(rx, buf_state); return 0; } +static void gve_xdp_done_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, + struct xdp_buff *xdp, struct bpf_prog *xprog, + int xdp_act, + struct gve_rx_buf_state_dqo *buf_state) +{ + u64_stats_update_begin(&rx->statss); + switch (xdp_act) { + case XDP_ABORTED: + case XDP_DROP: + default: + rx->xdp_actions[xdp_act]++; + break; + case XDP_TX: + rx->xdp_tx_errors++; + break; + case XDP_REDIRECT: + rx->xdp_redirect_errors++; + break; + } + u64_stats_update_end(&rx->statss); + gve_free_buffer(rx, buf_state); +} + /* Returns 0 if descriptor is completed successfully. * Returns -EINVAL if descriptor is invalid. * Returns -ENOMEM if data cannot be copied to skb. @@ -689,6 +584,7 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, const bool hsplit = compl_desc->split_header; struct gve_rx_buf_state_dqo *buf_state; struct gve_priv *priv = rx->gve; + struct bpf_prog *xprog; u16 buf_len; u16 hdr_len; @@ -705,8 +601,7 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, } if (unlikely(compl_desc->rx_error)) { - gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, - buf_state); + gve_free_buffer(rx, buf_state); return -EINVAL; } @@ -716,7 +611,12 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, /* Page might have not been used for awhile and was likely last written * by a different thread. */ - prefetch(buf_state->page_info.page); + if (rx->dqo.page_pool) { + if (!netmem_is_net_iov(buf_state->page_info.netmem)) + prefetch(netmem_to_page(buf_state->page_info.netmem)); + } else { + prefetch(buf_state->page_info.page); + } /* Copy the header into the skb in the case of header split */ if (hsplit) { @@ -730,6 +630,9 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, if (unlikely(!rx->ctx.skb_head)) goto error; rx->ctx.skb_tail = rx->ctx.skb_head; + + if (rx->dqo.page_pool) + skb_mark_for_recycle(rx->ctx.skb_head); } else { unsplit = 1; } @@ -742,7 +645,8 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, /* Sync the portion of dma buffer for CPU to read. */ dma_sync_single_range_for_cpu(&priv->pdev->dev, buf_state->addr, - buf_state->page_info.page_offset, + buf_state->page_info.page_offset + + buf_state->page_info.pad, buf_len, DMA_FROM_DEVICE); /* Append to current skb if one exists. */ @@ -754,6 +658,34 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, return 0; } + xprog = READ_ONCE(priv->xdp_prog); + if (xprog) { + struct xdp_buff xdp; + void *old_data; + int xdp_act; + + xdp_init_buff(&xdp, buf_state->page_info.buf_size, + &rx->xdp_rxq); + xdp_prepare_buff(&xdp, + buf_state->page_info.page_address + + buf_state->page_info.page_offset, + buf_state->page_info.pad, + buf_len, false); + old_data = xdp.data; + xdp_act = bpf_prog_run_xdp(xprog, &xdp); + buf_state->page_info.pad += xdp.data - old_data; + buf_len = xdp.data_end - xdp.data; + if (xdp_act != XDP_PASS) { + gve_xdp_done_dqo(priv, rx, &xdp, xprog, xdp_act, + buf_state); + return 0; + } + + u64_stats_update_begin(&rx->statss); + rx->xdp_actions[XDP_PASS]++; + u64_stats_update_end(&rx->statss); + } + if (eop && buf_len <= priv->rx_copybreak) { rx->ctx.skb_head = gve_rx_copy(priv->dev, napi, &buf_state->page_info, buf_len); @@ -766,8 +698,7 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, rx->rx_copybreak_pkt++; u64_stats_update_end(&rx->statss); - gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, - buf_state); + gve_free_buffer(rx, buf_state); return 0; } @@ -782,16 +713,15 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, return 0; } - skb_add_rx_frag(rx->ctx.skb_head, 0, buf_state->page_info.page, - buf_state->page_info.page_offset, buf_len, - priv->data_buffer_size_dqo); - gve_dec_pagecnt_bias(&buf_state->page_info); + if (rx->dqo.page_pool) + skb_mark_for_recycle(rx->ctx.skb_head); - gve_try_recycle_buf(priv, rx, buf_state); + gve_skb_add_rx_frag(rx, buf_state, 0, buf_len); + gve_reuse_buffer(rx, buf_state); return 0; error: - gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state); + gve_free_buffer(rx, buf_state); return -ENOMEM; } @@ -884,7 +814,7 @@ int gve_rx_poll_dqo(struct gve_notify_block *block, int budget) err = gve_rx_dqo(napi, rx, compl_desc, complq->head, rx->q_num); if (err < 0) { - gve_rx_free_skb(rx); + gve_rx_free_skb(napi, rx); u64_stats_update_begin(&rx->statss); if (err == -ENOMEM) rx->rx_skb_alloc_fail++; @@ -927,7 +857,7 @@ int gve_rx_poll_dqo(struct gve_notify_block *block, int budget) /* gve_rx_complete_skb() will consume skb if successful */ if (gve_rx_complete_skb(rx, napi, compl_desc, feat) != 0) { - gve_rx_free_skb(rx); + gve_rx_free_skb(napi, rx); u64_stats_update_begin(&rx->statss); rx->rx_desc_err_dropped_pkt++; u64_stats_update_end(&rx->statss); diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c index 4b9853adc113..1b40bf0c811a 100644 --- a/drivers/net/ethernet/google/gve/gve_tx.c +++ b/drivers/net/ethernet/google/gve/gve_tx.c @@ -158,15 +158,16 @@ static int gve_clean_xdp_done(struct gve_priv *priv, struct gve_tx_ring *tx, u32 to_do) { struct gve_tx_buffer_state *info; - u32 clean_end = tx->done + to_do; u64 pkts = 0, bytes = 0; size_t space_freed = 0; u32 xsk_complete = 0; u32 idx; + int i; - for (; tx->done < clean_end; tx->done++) { + for (i = 0; i < to_do; i++) { idx = tx->done & tx->mask; info = &tx->info[idx]; + tx->done++; if (unlikely(!info->xdp.size)) continue; @@ -205,7 +206,10 @@ void gve_tx_stop_ring_gqi(struct gve_priv *priv, int idx) return; gve_remove_napi(priv, ntfy_idx); - gve_clean_tx_done(priv, tx, priv->tx_desc_cnt, false); + if (tx->q_num < priv->tx_cfg.num_queues) + gve_clean_tx_done(priv, tx, priv->tx_desc_cnt, false); + else + gve_clean_xdp_done(priv, tx, priv->tx_desc_cnt); netdev_tx_reset_queue(tx->netdev_txq); gve_tx_remove_from_block(priv, idx); } @@ -216,6 +220,7 @@ static void gve_tx_free_ring_gqi(struct gve_priv *priv, struct gve_tx_ring *tx, struct device *hdev = &priv->pdev->dev; int idx = tx->q_num; size_t bytes; + u32 qpl_id; u32 slots; slots = tx->mask + 1; @@ -223,9 +228,12 @@ static void gve_tx_free_ring_gqi(struct gve_priv *priv, struct gve_tx_ring *tx, tx->q_resources, tx->q_resources_bus); tx->q_resources = NULL; - if (!tx->raw_addressing) { - gve_tx_fifo_release(priv, &tx->tx_fifo); - gve_unassign_qpl(cfg->qpl_cfg, tx->tx_fifo.qpl->id); + if (tx->tx_fifo.qpl) { + if (tx->tx_fifo.base) + gve_tx_fifo_release(priv, &tx->tx_fifo); + + qpl_id = gve_tx_qpl_id(priv, tx->q_num); + gve_free_queue_page_list(priv, tx->tx_fifo.qpl, qpl_id); tx->tx_fifo.qpl = NULL; } @@ -256,6 +264,8 @@ static int gve_tx_alloc_ring_gqi(struct gve_priv *priv, int idx) { struct device *hdev = &priv->pdev->dev; + int qpl_page_cnt; + u32 qpl_id = 0; size_t bytes; /* Make sure everything is zeroed to start */ @@ -280,9 +290,14 @@ static int gve_tx_alloc_ring_gqi(struct gve_priv *priv, tx->raw_addressing = cfg->raw_addressing; tx->dev = hdev; if (!tx->raw_addressing) { - tx->tx_fifo.qpl = gve_assign_tx_qpl(cfg, idx); + qpl_id = gve_tx_qpl_id(priv, tx->q_num); + qpl_page_cnt = priv->tx_pages_per_qpl; + + tx->tx_fifo.qpl = gve_alloc_queue_page_list(priv, qpl_id, + qpl_page_cnt); if (!tx->tx_fifo.qpl) goto abort_with_desc; + /* map Tx FIFO */ if (gve_tx_fifo_init(priv, &tx->tx_fifo)) goto abort_with_qpl; @@ -302,8 +317,10 @@ abort_with_fifo: if (!tx->raw_addressing) gve_tx_fifo_release(priv, &tx->tx_fifo); abort_with_qpl: - if (!tx->raw_addressing) - gve_unassign_qpl(cfg->qpl_cfg, tx->tx_fifo.qpl->id); + if (!tx->raw_addressing) { + gve_free_queue_page_list(priv, tx->tx_fifo.qpl, qpl_id); + tx->tx_fifo.qpl = NULL; + } abort_with_desc: dma_free_coherent(hdev, bytes, tx->desc, tx->bus); tx->desc = NULL; @@ -317,33 +334,23 @@ int gve_tx_alloc_rings_gqi(struct gve_priv *priv, struct gve_tx_alloc_rings_cfg *cfg) { struct gve_tx_ring *tx = cfg->tx; + int total_queues; int err = 0; int i, j; - if (!cfg->raw_addressing && !cfg->qpls) { - netif_err(priv, drv, priv->dev, - "Cannot alloc QPL ring before allocing QPLs\n"); - return -EINVAL; - } - - if (cfg->start_idx + cfg->num_rings > cfg->qcfg->max_queues) { + total_queues = cfg->qcfg->num_queues + cfg->num_xdp_rings; + if (total_queues > cfg->qcfg->max_queues) { netif_err(priv, drv, priv->dev, "Cannot alloc more than the max num of Tx rings\n"); return -EINVAL; } - if (cfg->start_idx == 0) { - tx = kvcalloc(cfg->qcfg->max_queues, sizeof(struct gve_tx_ring), - GFP_KERNEL); - if (!tx) - return -ENOMEM; - } else if (!tx) { - netif_err(priv, drv, priv->dev, - "Cannot alloc tx rings from a nonzero start idx without tx array\n"); - return -EINVAL; - } + tx = kvcalloc(cfg->qcfg->max_queues, sizeof(struct gve_tx_ring), + GFP_KERNEL); + if (!tx) + return -ENOMEM; - for (i = cfg->start_idx; i < cfg->start_idx + cfg->num_rings; i++) { + for (i = 0; i < total_queues; i++) { err = gve_tx_alloc_ring_gqi(priv, cfg, &tx[i], i); if (err) { netif_err(priv, drv, priv->dev, @@ -359,8 +366,7 @@ int gve_tx_alloc_rings_gqi(struct gve_priv *priv, cleanup: for (j = 0; j < i; j++) gve_tx_free_ring_gqi(priv, &tx[j], cfg); - if (cfg->start_idx == 0) - kvfree(tx); + kvfree(tx); return err; } @@ -373,13 +379,11 @@ void gve_tx_free_rings_gqi(struct gve_priv *priv, if (!tx) return; - for (i = cfg->start_idx; i < cfg->start_idx + cfg->num_rings; i++) + for (i = 0; i < cfg->qcfg->num_queues + cfg->qcfg->num_xdp_queues; i++) gve_tx_free_ring_gqi(priv, &tx[i], cfg); - if (cfg->start_idx == 0) { - kvfree(tx); - cfg->tx = NULL; - } + kvfree(tx); + cfg->tx = NULL; } /* gve_tx_avail - Calculates the number of slots available in the ring @@ -826,11 +830,14 @@ int gve_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, struct gve_tx_ring *tx; int i, err = 0, qid; - if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) + if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK) || !priv->xdp_prog) return -EINVAL; + if (!gve_get_napi_enabled(priv)) + return -ENETDOWN; + qid = gve_xdp_tx_queue_id(priv, - smp_processor_id() % priv->num_xdp_queues); + smp_processor_id() % priv->tx_cfg.num_xdp_queues); tx = &priv->tx[qid]; @@ -945,14 +952,10 @@ static int gve_xsk_tx(struct gve_priv *priv, struct gve_tx_ring *tx, spin_lock(&tx->xdp_lock); while (sent < budget) { - if (!gve_can_tx(tx, GVE_TX_START_THRESH)) + if (!gve_can_tx(tx, GVE_TX_START_THRESH) || + !xsk_tx_peek_desc(tx->xsk_pool, &desc)) goto out; - if (!xsk_tx_peek_desc(tx->xsk_pool, &desc)) { - tx->xdp_xsk_done = tx->xdp_xsk_wakeup; - goto out; - } - data = xsk_buff_raw_get_data(tx->xsk_pool, desc.addr); nsegs = gve_tx_fill_xdp(priv, tx, data, desc.len, NULL, true); tx->req += nsegs; @@ -967,33 +970,41 @@ out: return sent; } +int gve_xsk_tx_poll(struct gve_notify_block *rx_block, int budget) +{ + struct gve_rx_ring *rx = rx_block->rx; + struct gve_priv *priv = rx->gve; + struct gve_tx_ring *tx; + int sent = 0; + + tx = &priv->tx[gve_xdp_tx_queue_id(priv, rx->q_num)]; + if (tx->xsk_pool) { + sent = gve_xsk_tx(priv, tx, budget); + + u64_stats_update_begin(&tx->statss); + tx->xdp_xsk_sent += sent; + u64_stats_update_end(&tx->statss); + if (xsk_uses_need_wakeup(tx->xsk_pool)) + xsk_set_tx_need_wakeup(tx->xsk_pool); + } + + return sent; +} + bool gve_xdp_poll(struct gve_notify_block *block, int budget) { struct gve_priv *priv = block->priv; struct gve_tx_ring *tx = block->tx; u32 nic_done; - bool repoll; u32 to_do; /* Find out how much work there is to be done */ nic_done = gve_tx_load_event_counter(priv, tx); to_do = min_t(u32, (nic_done - tx->done), budget); gve_clean_xdp_done(priv, tx, to_do); - repoll = nic_done != tx->done; - - if (tx->xsk_pool) { - int sent = gve_xsk_tx(priv, tx, budget); - - u64_stats_update_begin(&tx->statss); - tx->xdp_xsk_sent += sent; - u64_stats_update_end(&tx->statss); - repoll |= (sent == budget); - if (xsk_uses_need_wakeup(tx->xsk_pool)) - xsk_set_tx_need_wakeup(tx->xsk_pool); - } /* If we still have work we want to repoll */ - return repoll; + return nic_done != tx->done; } bool gve_tx_poll(struct gve_notify_block *block, int budget) diff --git a/drivers/net/ethernet/google/gve/gve_tx_dqo.c b/drivers/net/ethernet/google/gve/gve_tx_dqo.c index bc34b6cd3a3e..a27f1574a733 100644 --- a/drivers/net/ethernet/google/gve/gve_tx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_tx_dqo.c @@ -209,6 +209,7 @@ static void gve_tx_free_ring_dqo(struct gve_priv *priv, struct gve_tx_ring *tx, struct device *hdev = &priv->pdev->dev; int idx = tx->q_num; size_t bytes; + u32 qpl_id; if (tx->q_resources) { dma_free_coherent(hdev, sizeof(*tx->q_resources), @@ -237,7 +238,8 @@ static void gve_tx_free_ring_dqo(struct gve_priv *priv, struct gve_tx_ring *tx, tx->dqo.tx_qpl_buf_next = NULL; if (tx->dqo.qpl) { - gve_unassign_qpl(cfg->qpl_cfg, tx->dqo.qpl->id); + qpl_id = gve_tx_qpl_id(priv, tx->q_num); + gve_free_queue_page_list(priv, tx->dqo.qpl, qpl_id); tx->dqo.qpl = NULL; } @@ -285,7 +287,9 @@ static int gve_tx_alloc_ring_dqo(struct gve_priv *priv, { struct device *hdev = &priv->pdev->dev; int num_pending_packets; + int qpl_page_cnt; size_t bytes; + u32 qpl_id; int i; memset(tx, 0, sizeof(*tx)); @@ -295,9 +299,7 @@ static int gve_tx_alloc_ring_dqo(struct gve_priv *priv, /* Queue sizes must be a power of 2 */ tx->mask = cfg->ring_size - 1; - tx->dqo.complq_mask = priv->queue_format == GVE_DQO_RDA_FORMAT ? - priv->options_dqo_rda.tx_comp_ring_entries - 1 : - tx->mask; + tx->dqo.complq_mask = tx->mask; /* The max number of pending packets determines the maximum number of * descriptors which maybe written to the completion queue. @@ -354,7 +356,11 @@ static int gve_tx_alloc_ring_dqo(struct gve_priv *priv, goto err; if (!cfg->raw_addressing) { - tx->dqo.qpl = gve_assign_tx_qpl(cfg, idx); + qpl_id = gve_tx_qpl_id(priv, tx->q_num); + qpl_page_cnt = priv->tx_pages_per_qpl; + + tx->dqo.qpl = gve_alloc_queue_page_list(priv, qpl_id, + qpl_page_cnt); if (!tx->dqo.qpl) goto err; @@ -373,33 +379,23 @@ int gve_tx_alloc_rings_dqo(struct gve_priv *priv, struct gve_tx_alloc_rings_cfg *cfg) { struct gve_tx_ring *tx = cfg->tx; + int total_queues; int err = 0; int i, j; - if (!cfg->raw_addressing && !cfg->qpls) { - netif_err(priv, drv, priv->dev, - "Cannot alloc QPL ring before allocing QPLs\n"); - return -EINVAL; - } - - if (cfg->start_idx + cfg->num_rings > cfg->qcfg->max_queues) { + total_queues = cfg->qcfg->num_queues + cfg->num_xdp_rings; + if (total_queues > cfg->qcfg->max_queues) { netif_err(priv, drv, priv->dev, "Cannot alloc more than the max num of Tx rings\n"); return -EINVAL; } - if (cfg->start_idx == 0) { - tx = kvcalloc(cfg->qcfg->max_queues, sizeof(struct gve_tx_ring), - GFP_KERNEL); - if (!tx) - return -ENOMEM; - } else if (!tx) { - netif_err(priv, drv, priv->dev, - "Cannot alloc tx rings from a nonzero start idx without tx array\n"); - return -EINVAL; - } + tx = kvcalloc(cfg->qcfg->max_queues, sizeof(struct gve_tx_ring), + GFP_KERNEL); + if (!tx) + return -ENOMEM; - for (i = cfg->start_idx; i < cfg->start_idx + cfg->num_rings; i++) { + for (i = 0; i < total_queues; i++) { err = gve_tx_alloc_ring_dqo(priv, cfg, &tx[i], i); if (err) { netif_err(priv, drv, priv->dev, @@ -415,8 +411,7 @@ int gve_tx_alloc_rings_dqo(struct gve_priv *priv, err: for (j = 0; j < i; j++) gve_tx_free_ring_dqo(priv, &tx[j], cfg); - if (cfg->start_idx == 0) - kvfree(tx); + kvfree(tx); return err; } @@ -429,13 +424,11 @@ void gve_tx_free_rings_dqo(struct gve_priv *priv, if (!tx) return; - for (i = cfg->start_idx; i < cfg->start_idx + cfg->num_rings; i++) + for (i = 0; i < cfg->qcfg->num_queues + cfg->qcfg->num_xdp_queues; i++) gve_tx_free_ring_dqo(priv, &tx[i], cfg); - if (cfg->start_idx == 0) { - kvfree(tx); - cfg->tx = NULL; - } + kvfree(tx); + cfg->tx = NULL; } /* Returns the number of slots available in the ring */ @@ -555,28 +548,18 @@ static int gve_prep_tso(struct sk_buff *skb) if (unlikely(skb_shinfo(skb)->gso_size < GVE_TX_MIN_TSO_MSS_DQO)) return -1; + if (!(skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) + return -EINVAL; + /* Needed because we will modify header. */ err = skb_cow_head(skb, 0); if (err < 0) return err; tcp = tcp_hdr(skb); - - /* Remove payload length from checksum. */ paylen = skb->len - skb_transport_offset(skb); - - switch (skb_shinfo(skb)->gso_type) { - case SKB_GSO_TCPV4: - case SKB_GSO_TCPV6: - csum_replace_by_diff(&tcp->check, - (__force __wsum)htonl(paylen)); - - /* Compute length of segmentation header. */ - header_len = skb_tcp_all_headers(skb); - break; - default: - return -EINVAL; - } + csum_replace_by_diff(&tcp->check, (__force __wsum)htonl(paylen)); + header_len = skb_tcp_all_headers(skb); if (unlikely(header_len > GVE_TX_MAX_HDR_SIZE_DQO)) return -EINVAL; @@ -677,7 +660,8 @@ static int gve_tx_add_skb_no_copy_dqo(struct gve_tx_ring *tx, goto err; dma_unmap_len_set(pkt, len[pkt->num_bufs], len); - dma_unmap_addr_set(pkt, dma[pkt->num_bufs], addr); + netmem_dma_unmap_addr_set(skb_frag_netmem(frag), pkt, + dma[pkt->num_bufs], addr); ++pkt->num_bufs; gve_tx_fill_pkt_desc_dqo(tx, desc_idx, skb, len, addr, @@ -876,22 +860,42 @@ static bool gve_can_send_tso(const struct sk_buff *skb) const int header_len = skb_tcp_all_headers(skb); const int gso_size = shinfo->gso_size; int cur_seg_num_bufs; + int prev_frag_size; int cur_seg_size; int i; cur_seg_size = skb_headlen(skb) - header_len; + prev_frag_size = skb_headlen(skb); cur_seg_num_bufs = cur_seg_size > 0; for (i = 0; i < shinfo->nr_frags; i++) { if (cur_seg_size >= gso_size) { cur_seg_size %= gso_size; cur_seg_num_bufs = cur_seg_size > 0; + + if (prev_frag_size > GVE_TX_MAX_BUF_SIZE_DQO) { + int prev_frag_remain = prev_frag_size % + GVE_TX_MAX_BUF_SIZE_DQO; + + /* If the last descriptor of the previous frag + * is less than cur_seg_size, the segment will + * span two descriptors in the previous frag. + * Since max gso size (9728) is less than + * GVE_TX_MAX_BUF_SIZE_DQO, it is impossible + * for the segment to span more than two + * descriptors. + */ + if (prev_frag_remain && + cur_seg_size > prev_frag_remain) + cur_seg_num_bufs++; + } } if (unlikely(++cur_seg_num_bufs > max_bufs_per_seg)) return false; - cur_seg_size += skb_frag_size(&shinfo->frags[i]); + prev_frag_size = skb_frag_size(&shinfo->frags[i]); + cur_seg_size += prev_frag_size; } return true; @@ -1035,8 +1039,9 @@ static void gve_unmap_packet(struct device *dev, dma_unmap_single(dev, dma_unmap_addr(pkt, dma[0]), dma_unmap_len(pkt, len[0]), DMA_TO_DEVICE); for (i = 1; i < pkt->num_bufs; i++) { - dma_unmap_page(dev, dma_unmap_addr(pkt, dma[i]), - dma_unmap_len(pkt, len[i]), DMA_TO_DEVICE); + netmem_dma_unmap_page_attrs(dev, dma_unmap_addr(pkt, dma[i]), + dma_unmap_len(pkt, len[i]), + DMA_TO_DEVICE, 0); } pkt->num_bufs = 0; } @@ -1136,8 +1141,7 @@ static void gve_handle_miss_completion(struct gve_priv *priv, /* jiffies can wraparound but time comparisons can handle overflows. */ pending_packet->timeout_jiffies = jiffies + - msecs_to_jiffies(GVE_REINJECT_COMPL_TIMEOUT * - MSEC_PER_SEC); + secs_to_jiffies(GVE_REINJECT_COMPL_TIMEOUT); add_to_list(tx, &tx->dqo_compl.miss_completions, pending_packet); *bytes += pending_packet->skb->len; @@ -1181,8 +1185,7 @@ static void remove_miss_completions(struct gve_priv *priv, pending_packet->state = GVE_PACKET_STATE_TIMED_OUT_COMPL; pending_packet->timeout_jiffies = jiffies + - msecs_to_jiffies(GVE_DEALLOCATE_COMPL_TIMEOUT * - MSEC_PER_SEC); + secs_to_jiffies(GVE_DEALLOCATE_COMPL_TIMEOUT); /* Maintain pending packet in another list so the packet can be * unallocated at a later time. */ diff --git a/drivers/net/ethernet/google/gve/gve_utils.c b/drivers/net/ethernet/google/gve/gve_utils.c index 2349750075a5..ace9b8698021 100644 --- a/drivers/net/ethernet/google/gve/gve_utils.c +++ b/drivers/net/ethernet/google/gve/gve_utils.c @@ -110,12 +110,13 @@ void gve_add_napi(struct gve_priv *priv, int ntfy_idx, { struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; - netif_napi_add(priv->dev, &block->napi, gve_poll); + netif_napi_add_locked(priv->dev, &block->napi, gve_poll); + netif_napi_set_irq_locked(&block->napi, block->irq); } void gve_remove_napi(struct gve_priv *priv, int ntfy_idx) { struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; - netif_napi_del(&block->napi); + netif_napi_del_locked(&block->napi); } |