diff options
Diffstat (limited to 'drivers/net/ethernet/google')
-rw-r--r-- | drivers/net/ethernet/google/gve/gve.h | 186 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_adminq.c | 263 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_adminq.h | 64 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_dqo.h | 24 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_ethtool.c | 216 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_main.c | 1053 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_rx.c | 231 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_rx_dqo.c | 255 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_tx.c | 141 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_tx_dqo.c | 114 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_utils.c | 48 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_utils.h | 8 |
12 files changed, 1786 insertions, 817 deletions
diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index b80349154604..ae1e21c9b0a5 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h @@ -9,6 +9,7 @@ #include <linux/dma-mapping.h> #include <linux/dmapool.h> +#include <linux/ethtool_netlink.h> #include <linux/netdevice.h> #include <linux/pci.h> #include <linux/u64_stats_sync.h> @@ -49,16 +50,23 @@ /* PTYPEs are always 10 bits. */ #define GVE_NUM_PTYPES 1024 +/* Default minimum ring size */ +#define GVE_DEFAULT_MIN_TX_RING_SIZE 256 +#define GVE_DEFAULT_MIN_RX_RING_SIZE 512 + #define GVE_DEFAULT_RX_BUFFER_SIZE 2048 +#define GVE_MAX_RX_BUFFER_SIZE 4096 + #define GVE_DEFAULT_RX_BUFFER_OFFSET 2048 #define GVE_XDP_ACTIONS 5 #define GVE_GQ_TX_MIN_PKT_DESC_BYTES 182 +#define GVE_DEFAULT_HEADER_BUFFER_SIZE 128 + #define DQO_QPL_DEFAULT_TX_PAGES 512 -#define DQO_QPL_DEFAULT_RX_PAGES 2048 /* Maximum TSO size supported on DQO */ #define GVE_DQO_TX_MAX 0x3FFFF @@ -150,6 +158,11 @@ struct gve_rx_compl_queue_dqo { u32 mask; /* Mask for indices to the size of the ring */ }; +struct gve_header_buf { + u8 *data; + dma_addr_t addr; +}; + /* Stores state for tracking buffers posted to HW */ struct gve_rx_buf_state_dqo { /* The page posted to HW. */ @@ -252,19 +265,26 @@ struct gve_rx_ring { /* track number of used buffers */ u16 used_buf_states_cnt; + + /* Address info of the buffers for header-split */ + struct gve_header_buf hdr_bufs; } dqo; }; u64 rbytes; /* free-running bytes received */ + u64 rx_hsplit_bytes; /* free-running header bytes received */ u64 rpackets; /* free-running packets received */ u32 cnt; /* free-running total number of completed packets */ u32 fill_cnt; /* free-running total number of descs and buffs posted */ u32 mask; /* masks the cnt and fill_cnt to the size of the ring */ + u64 rx_hsplit_pkt; /* free-running packets with headers split */ u64 rx_copybreak_pkt; /* free-running count of copybreak packets */ u64 rx_copied_pkt; /* free-running total number of copied packets */ u64 rx_skb_alloc_fail; /* free-running count of skb alloc fails */ u64 rx_buf_alloc_fail; /* free-running count of buffer alloc fails */ u64 rx_desc_err_dropped_pkt; /* free-running count of packets dropped by descriptor error */ + /* free-running count of unsplit packets due to header buffer overflow or hdr_len is 0 */ + u64 rx_hsplit_unsplit_pkt; u64 rx_cont_packet_cnt; /* free-running multi-fragment packets received */ u64 rx_frag_flip_cnt; /* free-running count of rx segments where page_flip was used */ u64 rx_frag_copy_cnt; /* free-running count of rx segments copied */ @@ -590,6 +610,7 @@ struct gve_notify_block { struct gve_priv *priv; struct gve_tx_ring *tx; /* tx rings on this block */ struct gve_rx_ring *rx; /* rx rings on this block */ + u32 irq; }; /* Tracks allowed and current queue settings */ @@ -604,11 +625,6 @@ struct gve_qpl_config { unsigned long *qpl_id_map; /* bitmap of used qpl ids */ }; -struct gve_options_dqo_rda { - u16 tx_comp_ring_entries; /* number of tx_comp descriptors */ - u16 rx_buff_ring_entries; /* number of rx_buff descriptors */ -}; - struct gve_irq_db { __be32 index; } ____cacheline_aligned; @@ -622,6 +638,34 @@ struct gve_ptype_lut { struct gve_ptype ptypes[GVE_NUM_PTYPES]; }; +/* Parameters for allocating resources for tx queues */ +struct gve_tx_alloc_rings_cfg { + struct gve_queue_config *qcfg; + + u16 ring_size; + u16 start_idx; + u16 num_rings; + bool raw_addressing; + + /* Allocated resources are returned here */ + struct gve_tx_ring *tx; +}; + +/* Parameters for allocating resources for rx queues */ +struct gve_rx_alloc_rings_cfg { + /* tx config is also needed to determine QPL ids */ + struct gve_queue_config *qcfg; + struct gve_queue_config *qcfg_tx; + + u16 ring_size; + u16 packet_buffer_size; + bool raw_addressing; + bool enable_header_split; + + /* Allocated resources are returned here */ + struct gve_rx_ring *rx; +}; + /* GVE_QUEUE_FORMAT_UNSPECIFIED must be zero since 0 is the default value * when the entire configure_device_resources command is zeroed out and the * queue_format is not specified. @@ -638,7 +682,6 @@ struct gve_priv { struct net_device *dev; struct gve_tx_ring *tx; /* array of tx_cfg.num_queues */ struct gve_rx_ring *rx; /* array of rx_cfg.num_queues */ - struct gve_queue_page_list *qpls; /* array of num qpls */ struct gve_notify_block *ntfy_blocks; /* array of num_ntfy_blks */ struct gve_irq_db *irq_db_indices; /* array of num_ntfy_blks */ dma_addr_t irq_db_indices_bus; @@ -651,9 +694,13 @@ struct gve_priv { u16 num_event_counters; u16 tx_desc_cnt; /* num desc per ring */ u16 rx_desc_cnt; /* num desc per ring */ + u16 max_tx_desc_cnt; + u16 max_rx_desc_cnt; + u16 min_tx_desc_cnt; + u16 min_rx_desc_cnt; + bool modify_ring_size_enabled; + bool default_min_ring_size; u16 tx_pages_per_qpl; /* Suggested number of pages per qpl for TX queues by NIC */ - u16 rx_pages_per_qpl; /* Suggested number of pages per qpl for RX queues by NIC */ - u16 rx_data_slot_cnt; /* rx buffer length */ u64 max_registered_pages; u64 num_registered_pages; /* num pages registered with NIC */ struct bpf_prog *xdp_prog; /* XDP BPF program */ @@ -663,7 +710,6 @@ struct gve_priv { u16 num_xdp_queues; struct gve_queue_config tx_cfg; struct gve_queue_config rx_cfg; - struct gve_qpl_config qpl_cfg; /* map used QPL ids */ u32 num_ntfy_blks; /* spilt between TX and RX so must be even */ struct gve_registers __iomem *reg_bar0; /* see gve_register.h */ @@ -725,17 +771,20 @@ struct gve_priv { u64 link_speed; bool up_before_suspend; /* True if dev was up before suspend */ - struct gve_options_dqo_rda options_dqo_rda; struct gve_ptype_lut *ptype_lut_dqo; /* Must be a power of two. */ - int data_buffer_size_dqo; + u16 data_buffer_size_dqo; + u16 max_rx_buffer_size; /* device limit */ enum gve_queue_format queue_format; /* Interrupt coalescing settings */ u32 tx_coalesce_usecs; u32 rx_coalesce_usecs; + + u16 header_buf_size; /* device configured, header-split supported if non-zero */ + bool header_split_enabled; /* True if the header split is enabled by the user */ }; enum gve_service_task_flags_bit { @@ -917,14 +966,14 @@ static inline bool gve_is_qpl(struct gve_priv *priv) priv->queue_format == GVE_DQO_QPL_FORMAT; } -/* Returns the number of tx queue page lists - */ -static inline u32 gve_num_tx_qpls(struct gve_priv *priv) +/* Returns the number of tx queue page lists */ +static inline u32 gve_num_tx_qpls(const struct gve_queue_config *tx_cfg, + int num_xdp_queues, + bool is_qpl) { - if (!gve_is_qpl(priv)) + if (!is_qpl) return 0; - - return priv->tx_cfg.num_queues + priv->num_xdp_queues; + return tx_cfg->num_queues + num_xdp_queues; } /* Returns the number of XDP tx queue page lists @@ -937,14 +986,13 @@ static inline u32 gve_num_xdp_qpls(struct gve_priv *priv) return priv->num_xdp_queues; } -/* Returns the number of rx queue page lists - */ -static inline u32 gve_num_rx_qpls(struct gve_priv *priv) +/* Returns the number of rx queue page lists */ +static inline u32 gve_num_rx_qpls(const struct gve_queue_config *rx_cfg, + bool is_qpl) { - if (!gve_is_qpl(priv)) + if (!is_qpl) return 0; - - return priv->rx_cfg.num_queues; + return rx_cfg->num_queues; } static inline u32 gve_tx_qpl_id(struct gve_priv *priv, int tx_qid) @@ -957,59 +1005,34 @@ static inline u32 gve_rx_qpl_id(struct gve_priv *priv, int rx_qid) return priv->tx_cfg.max_queues + rx_qid; } -static inline u32 gve_tx_start_qpl_id(struct gve_priv *priv) -{ - return gve_tx_qpl_id(priv, 0); -} - -static inline u32 gve_rx_start_qpl_id(struct gve_priv *priv) +static inline u32 gve_get_rx_qpl_id(const struct gve_queue_config *tx_cfg, int rx_qid) { - return gve_rx_qpl_id(priv, 0); + return tx_cfg->max_queues + rx_qid; } -/* Returns a pointer to the next available tx qpl in the list of qpls - */ -static inline -struct gve_queue_page_list *gve_assign_tx_qpl(struct gve_priv *priv, int tx_qid) +static inline u32 gve_tx_start_qpl_id(struct gve_priv *priv) { - int id = gve_tx_qpl_id(priv, tx_qid); - - /* QPL already in use */ - if (test_bit(id, priv->qpl_cfg.qpl_id_map)) - return NULL; - - set_bit(id, priv->qpl_cfg.qpl_id_map); - return &priv->qpls[id]; + return gve_tx_qpl_id(priv, 0); } -/* Returns a pointer to the next available rx qpl in the list of qpls - */ -static inline -struct gve_queue_page_list *gve_assign_rx_qpl(struct gve_priv *priv, int rx_qid) +static inline u32 gve_rx_start_qpl_id(const struct gve_queue_config *tx_cfg) { - int id = gve_rx_qpl_id(priv, rx_qid); - - /* QPL already in use */ - if (test_bit(id, priv->qpl_cfg.qpl_id_map)) - return NULL; - - set_bit(id, priv->qpl_cfg.qpl_id_map); - return &priv->qpls[id]; + return gve_get_rx_qpl_id(tx_cfg, 0); } -/* Unassigns the qpl with the given id - */ -static inline void gve_unassign_qpl(struct gve_priv *priv, int id) +static inline u32 gve_get_rx_pages_per_qpl_dqo(u32 rx_desc_cnt) { - clear_bit(id, priv->qpl_cfg.qpl_id_map); + /* For DQO, page count should be more than ring size for + * out-of-order completions. Set it to two times of ring size. + */ + return 2 * rx_desc_cnt; } -/* Returns the correct dma direction for tx and rx qpls - */ +/* Returns the correct dma direction for tx and rx qpls */ static inline enum dma_data_direction gve_qpl_dma_dir(struct gve_priv *priv, int id) { - if (id < gve_rx_start_qpl_id(priv)) + if (id < gve_rx_start_qpl_id(&priv->tx_cfg)) return DMA_TO_DEVICE; else return DMA_FROM_DEVICE; @@ -1036,12 +1059,21 @@ static inline u32 gve_xdp_tx_start_queue_id(struct gve_priv *priv) return gve_xdp_tx_queue_id(priv, 0); } +/* gqi napi handler defined in gve_main.c */ +int gve_napi_poll(struct napi_struct *napi, int budget); + /* buffers */ int gve_alloc_page(struct gve_priv *priv, struct device *dev, struct page **page, dma_addr_t *dma, enum dma_data_direction, gfp_t gfp_flags); void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma, enum dma_data_direction); +/* qpls */ +struct gve_queue_page_list *gve_alloc_queue_page_list(struct gve_priv *priv, + u32 id, int pages); +void gve_free_queue_page_list(struct gve_priv *priv, + struct gve_queue_page_list *qpl, + u32 id); /* tx handling */ netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev); int gve_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, @@ -1051,8 +1083,12 @@ int gve_xdp_xmit_one(struct gve_priv *priv, struct gve_tx_ring *tx, void gve_xdp_tx_flush(struct gve_priv *priv, u32 xdp_qid); bool gve_tx_poll(struct gve_notify_block *block, int budget); bool gve_xdp_poll(struct gve_notify_block *block, int budget); -int gve_tx_alloc_rings(struct gve_priv *priv, int start_id, int num_rings); -void gve_tx_free_rings_gqi(struct gve_priv *priv, int start_id, int num_rings); +int gve_tx_alloc_rings_gqi(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *cfg); +void gve_tx_free_rings_gqi(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *cfg); +void gve_tx_start_ring_gqi(struct gve_priv *priv, int idx); +void gve_tx_stop_ring_gqi(struct gve_priv *priv, int idx); u32 gve_tx_load_event_counter(struct gve_priv *priv, struct gve_tx_ring *tx); bool gve_tx_clean_pending(struct gve_priv *priv, struct gve_tx_ring *tx); @@ -1060,11 +1096,31 @@ bool gve_tx_clean_pending(struct gve_priv *priv, struct gve_tx_ring *tx); void gve_rx_write_doorbell(struct gve_priv *priv, struct gve_rx_ring *rx); int gve_rx_poll(struct gve_notify_block *block, int budget); bool gve_rx_work_pending(struct gve_rx_ring *rx); +int gve_rx_alloc_ring_gqi(struct gve_priv *priv, + struct gve_rx_alloc_rings_cfg *cfg, + struct gve_rx_ring *rx, + int idx); +void gve_rx_free_ring_gqi(struct gve_priv *priv, struct gve_rx_ring *rx, + struct gve_rx_alloc_rings_cfg *cfg); int gve_rx_alloc_rings(struct gve_priv *priv); -void gve_rx_free_rings_gqi(struct gve_priv *priv); +int gve_rx_alloc_rings_gqi(struct gve_priv *priv, + struct gve_rx_alloc_rings_cfg *cfg); +void gve_rx_free_rings_gqi(struct gve_priv *priv, + struct gve_rx_alloc_rings_cfg *cfg); +void gve_rx_start_ring_gqi(struct gve_priv *priv, int idx); +void gve_rx_stop_ring_gqi(struct gve_priv *priv, int idx); +u16 gve_get_pkt_buf_size(const struct gve_priv *priv, bool enable_hplit); +bool gve_header_split_supported(const struct gve_priv *priv); +int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split); /* Reset */ void gve_schedule_reset(struct gve_priv *priv); int gve_reset(struct gve_priv *priv, bool attempt_teardown); +void gve_get_curr_alloc_cfgs(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, + struct gve_rx_alloc_rings_cfg *rx_alloc_cfg); +int gve_adjust_config(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, + struct gve_rx_alloc_rings_cfg *rx_alloc_cfg); int gve_adjust_queues(struct gve_priv *priv, struct gve_queue_config new_rx_config, struct gve_queue_config new_tx_config); diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c index 12fbd723ecc6..8ca0def176ef 100644 --- a/drivers/net/ethernet/google/gve/gve_adminq.c +++ b/drivers/net/ethernet/google/gve/gve_adminq.c @@ -32,6 +32,8 @@ struct gve_device_option *gve_get_next_option(struct gve_device_descriptor *desc return option_end > descriptor_end ? NULL : (struct gve_device_option *)option_end; } +#define GVE_DEVICE_OPTION_NO_MIN_RING_SIZE 8 + static void gve_parse_device_option(struct gve_priv *priv, struct gve_device_descriptor *device_descriptor, @@ -40,7 +42,9 @@ void gve_parse_device_option(struct gve_priv *priv, struct gve_device_option_gqi_qpl **dev_op_gqi_qpl, struct gve_device_option_dqo_rda **dev_op_dqo_rda, struct gve_device_option_jumbo_frames **dev_op_jumbo_frames, - struct gve_device_option_dqo_qpl **dev_op_dqo_qpl) + struct gve_device_option_dqo_qpl **dev_op_dqo_qpl, + struct gve_device_option_buffer_sizes **dev_op_buffer_sizes, + struct gve_device_option_modify_ring **dev_op_modify_ring) { u32 req_feat_mask = be32_to_cpu(option->required_features_mask); u16 option_length = be16_to_cpu(option->option_length); @@ -147,6 +151,44 @@ void gve_parse_device_option(struct gve_priv *priv, } *dev_op_jumbo_frames = (void *)(option + 1); break; + case GVE_DEV_OPT_ID_BUFFER_SIZES: + if (option_length < sizeof(**dev_op_buffer_sizes) || + req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_BUFFER_SIZES) { + dev_warn(&priv->pdev->dev, GVE_DEVICE_OPTION_ERROR_FMT, + "Buffer Sizes", + (int)sizeof(**dev_op_buffer_sizes), + GVE_DEV_OPT_REQ_FEAT_MASK_BUFFER_SIZES, + option_length, req_feat_mask); + break; + } + + if (option_length > sizeof(**dev_op_buffer_sizes)) + dev_warn(&priv->pdev->dev, + GVE_DEVICE_OPTION_TOO_BIG_FMT, + "Buffer Sizes"); + *dev_op_buffer_sizes = (void *)(option + 1); + break; + case GVE_DEV_OPT_ID_MODIFY_RING: + if (option_length < GVE_DEVICE_OPTION_NO_MIN_RING_SIZE || + req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_MODIFY_RING) { + dev_warn(&priv->pdev->dev, GVE_DEVICE_OPTION_ERROR_FMT, + "Modify Ring", (int)sizeof(**dev_op_modify_ring), + GVE_DEV_OPT_REQ_FEAT_MASK_MODIFY_RING, + option_length, req_feat_mask); + break; + } + + if (option_length > sizeof(**dev_op_modify_ring)) { + dev_warn(&priv->pdev->dev, + GVE_DEVICE_OPTION_TOO_BIG_FMT, "Modify Ring"); + } + + *dev_op_modify_ring = (void *)(option + 1); + + /* device has not provided min ring size */ + if (option_length == GVE_DEVICE_OPTION_NO_MIN_RING_SIZE) + priv->default_min_ring_size = true; + break; default: /* If we don't recognize the option just continue * without doing anything. @@ -164,7 +206,9 @@ gve_process_device_options(struct gve_priv *priv, struct gve_device_option_gqi_qpl **dev_op_gqi_qpl, struct gve_device_option_dqo_rda **dev_op_dqo_rda, struct gve_device_option_jumbo_frames **dev_op_jumbo_frames, - struct gve_device_option_dqo_qpl **dev_op_dqo_qpl) + struct gve_device_option_dqo_qpl **dev_op_dqo_qpl, + struct gve_device_option_buffer_sizes **dev_op_buffer_sizes, + struct gve_device_option_modify_ring **dev_op_modify_ring) { const int num_options = be16_to_cpu(descriptor->num_device_options); struct gve_device_option *dev_opt; @@ -185,7 +229,8 @@ gve_process_device_options(struct gve_priv *priv, gve_parse_device_option(priv, descriptor, dev_opt, dev_op_gqi_rda, dev_op_gqi_qpl, dev_op_dqo_rda, dev_op_jumbo_frames, - dev_op_dqo_qpl); + dev_op_dqo_qpl, dev_op_buffer_sizes, + dev_op_modify_ring); dev_opt = next_opt; } @@ -546,6 +591,7 @@ static int gve_adminq_create_tx_queue(struct gve_priv *priv, u32 queue_index) cpu_to_be64(tx->q_resources_bus), .tx_ring_addr = cpu_to_be64(tx->bus), .ntfy_id = cpu_to_be32(tx->ntfy_id), + .tx_ring_size = cpu_to_be16(priv->tx_desc_cnt), }; if (gve_is_gqi(priv)) { @@ -554,24 +600,17 @@ static int gve_adminq_create_tx_queue(struct gve_priv *priv, u32 queue_index) cmd.create_tx_queue.queue_page_list_id = cpu_to_be32(qpl_id); } else { - u16 comp_ring_size; u32 qpl_id = 0; - if (priv->queue_format == GVE_DQO_RDA_FORMAT) { + if (priv->queue_format == GVE_DQO_RDA_FORMAT) qpl_id = GVE_RAW_ADDRESSING_QPL_ID; - comp_ring_size = - priv->options_dqo_rda.tx_comp_ring_entries; - } else { + else qpl_id = tx->dqo.qpl->id; - comp_ring_size = priv->tx_desc_cnt; - } cmd.create_tx_queue.queue_page_list_id = cpu_to_be32(qpl_id); - cmd.create_tx_queue.tx_ring_size = - cpu_to_be16(priv->tx_desc_cnt); cmd.create_tx_queue.tx_comp_ring_addr = cpu_to_be64(tx->complq_bus_dqo); cmd.create_tx_queue.tx_comp_ring_size = - cpu_to_be16(comp_ring_size); + cpu_to_be16(priv->tx_desc_cnt); } return gve_adminq_issue_cmd(priv, &cmd); @@ -591,60 +630,73 @@ int gve_adminq_create_tx_queues(struct gve_priv *priv, u32 start_id, u32 num_que return gve_adminq_kick_and_wait(priv); } -static int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_index) +static void gve_adminq_get_create_rx_queue_cmd(struct gve_priv *priv, + union gve_adminq_command *cmd, + u32 queue_index) { struct gve_rx_ring *rx = &priv->rx[queue_index]; - union gve_adminq_command cmd; - memset(&cmd, 0, sizeof(cmd)); - cmd.opcode = cpu_to_be32(GVE_ADMINQ_CREATE_RX_QUEUE); - cmd.create_rx_queue = (struct gve_adminq_create_rx_queue) { + memset(cmd, 0, sizeof(*cmd)); + cmd->opcode = cpu_to_be32(GVE_ADMINQ_CREATE_RX_QUEUE); + cmd->create_rx_queue = (struct gve_adminq_create_rx_queue) { .queue_id = cpu_to_be32(queue_index), .ntfy_id = cpu_to_be32(rx->ntfy_id), .queue_resources_addr = cpu_to_be64(rx->q_resources_bus), + .rx_ring_size = cpu_to_be16(priv->rx_desc_cnt), }; if (gve_is_gqi(priv)) { u32 qpl_id = priv->queue_format == GVE_GQI_RDA_FORMAT ? GVE_RAW_ADDRESSING_QPL_ID : rx->data.qpl->id; - cmd.create_rx_queue.rx_desc_ring_addr = - cpu_to_be64(rx->desc.bus), - cmd.create_rx_queue.rx_data_ring_addr = - cpu_to_be64(rx->data.data_bus), - cmd.create_rx_queue.index = cpu_to_be32(queue_index); - cmd.create_rx_queue.queue_page_list_id = cpu_to_be32(qpl_id); - cmd.create_rx_queue.packet_buffer_size = cpu_to_be16(rx->packet_buffer_size); + cmd->create_rx_queue.rx_desc_ring_addr = + cpu_to_be64(rx->desc.bus); + cmd->create_rx_queue.rx_data_ring_addr = + cpu_to_be64(rx->data.data_bus); + cmd->create_rx_queue.index = cpu_to_be32(queue_index); + cmd->create_rx_queue.queue_page_list_id = cpu_to_be32(qpl_id); + cmd->create_rx_queue.packet_buffer_size = cpu_to_be16(rx->packet_buffer_size); } else { - u16 rx_buff_ring_entries; u32 qpl_id = 0; - if (priv->queue_format == GVE_DQO_RDA_FORMAT) { + if (priv->queue_format == GVE_DQO_RDA_FORMAT) qpl_id = GVE_RAW_ADDRESSING_QPL_ID; - rx_buff_ring_entries = - priv->options_dqo_rda.rx_buff_ring_entries; - } else { + else qpl_id = rx->dqo.qpl->id; - rx_buff_ring_entries = priv->rx_desc_cnt; - } - cmd.create_rx_queue.queue_page_list_id = cpu_to_be32(qpl_id); - cmd.create_rx_queue.rx_ring_size = - cpu_to_be16(priv->rx_desc_cnt); - cmd.create_rx_queue.rx_desc_ring_addr = + cmd->create_rx_queue.queue_page_list_id = cpu_to_be32(qpl_id); + cmd->create_rx_queue.rx_desc_ring_addr = cpu_to_be64(rx->dqo.complq.bus); - cmd.create_rx_queue.rx_data_ring_addr = + cmd->create_rx_queue.rx_data_ring_addr = cpu_to_be64(rx->dqo.bufq.bus); - cmd.create_rx_queue.packet_buffer_size = + cmd->create_rx_queue.packet_buffer_size = cpu_to_be16(priv->data_buffer_size_dqo); - cmd.create_rx_queue.rx_buff_ring_size = - cpu_to_be16(rx_buff_ring_entries); - cmd.create_rx_queue.enable_rsc = + cmd->create_rx_queue.rx_buff_ring_size = + cpu_to_be16(priv->rx_desc_cnt); + cmd->create_rx_queue.enable_rsc = !!(priv->dev->features & NETIF_F_LRO); + if (priv->header_split_enabled) + cmd->create_rx_queue.header_buffer_size = + cpu_to_be16(priv->header_buf_size); } +} + +static int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_index) +{ + union gve_adminq_command cmd; + gve_adminq_get_create_rx_queue_cmd(priv, &cmd, queue_index); return gve_adminq_issue_cmd(priv, &cmd); } +/* Unlike gve_adminq_create_rx_queue, this actually rings the doorbell */ +int gve_adminq_create_single_rx_queue(struct gve_priv *priv, u32 queue_index) +{ + union gve_adminq_command cmd; + + gve_adminq_get_create_rx_queue_cmd(priv, &cmd, queue_index); + return gve_adminq_execute_cmd(priv, &cmd); +} + int gve_adminq_create_rx_queues(struct gve_priv *priv, u32 num_queues) { int err; @@ -691,22 +743,31 @@ int gve_adminq_destroy_tx_queues(struct gve_priv *priv, u32 start_id, u32 num_qu return gve_adminq_kick_and_wait(priv); } +static void gve_adminq_make_destroy_rx_queue_cmd(union gve_adminq_command *cmd, + u32 queue_index) +{ + memset(cmd, 0, sizeof(*cmd)); + cmd->opcode = cpu_to_be32(GVE_ADMINQ_DESTROY_RX_QUEUE); + cmd->destroy_rx_queue = (struct gve_adminq_destroy_rx_queue) { + .queue_id = cpu_to_be32(queue_index), + }; +} + static int gve_adminq_destroy_rx_queue(struct gve_priv *priv, u32 queue_index) { union gve_adminq_command cmd; - int err; - memset(&cmd, 0, sizeof(cmd)); - cmd.opcode = cpu_to_be32(GVE_ADMINQ_DESTROY_RX_QUEUE); - cmd.destroy_rx_queue = (struct gve_adminq_destroy_rx_queue) { - .queue_id = cpu_to_be32(queue_index), - }; + gve_adminq_make_destroy_rx_queue_cmd(&cmd, queue_index); + return gve_adminq_issue_cmd(priv, &cmd); +} - err = gve_adminq_issue_cmd(priv, &cmd); - if (err) - return err; +/* Unlike gve_adminq_destroy_rx_queue, this actually rings the doorbell */ +int gve_adminq_destroy_single_rx_queue(struct gve_priv *priv, u32 queue_index) +{ + union gve_adminq_command cmd; - return 0; + gve_adminq_make_destroy_rx_queue_cmd(&cmd, queue_index); + return gve_adminq_execute_cmd(priv, &cmd); } int gve_adminq_destroy_rx_queues(struct gve_priv *priv, u32 num_queues) @@ -723,31 +784,17 @@ int gve_adminq_destroy_rx_queues(struct gve_priv *priv, u32 num_queues) return gve_adminq_kick_and_wait(priv); } -static int gve_set_desc_cnt(struct gve_priv *priv, - struct gve_device_descriptor *descriptor) -{ - priv->tx_desc_cnt = be16_to_cpu(descriptor->tx_queue_entries); - priv->rx_desc_cnt = be16_to_cpu(descriptor->rx_queue_entries); - return 0; -} - -static int -gve_set_desc_cnt_dqo(struct gve_priv *priv, - const struct gve_device_descriptor *descriptor, - const struct gve_device_option_dqo_rda *dev_op_dqo_rda) +static void gve_set_default_desc_cnt(struct gve_priv *priv, + const struct gve_device_descriptor *descriptor) { priv->tx_desc_cnt = be16_to_cpu(descriptor->tx_queue_entries); priv->rx_desc_cnt = be16_to_cpu(descriptor->rx_queue_entries); - if (priv->queue_format == GVE_DQO_QPL_FORMAT) - return 0; - - priv->options_dqo_rda.tx_comp_ring_entries = - be16_to_cpu(dev_op_dqo_rda->tx_comp_ring_entries); - priv->options_dqo_rda.rx_buff_ring_entries = - be16_to_cpu(dev_op_dqo_rda->rx_buff_ring_entries); - - return 0; + /* set default ranges */ + priv->max_tx_desc_cnt = priv->tx_desc_cnt; + priv->max_rx_desc_cnt = priv->rx_desc_cnt; + priv->min_tx_desc_cnt = priv->tx_desc_cnt; + priv->min_rx_desc_cnt = priv->rx_desc_cnt; } static void gve_enable_supported_features(struct gve_priv *priv, @@ -755,7 +802,11 @@ static void gve_enable_supported_features(struct gve_priv *priv, const struct gve_device_option_jumbo_frames *dev_op_jumbo_frames, const struct gve_device_option_dqo_qpl - *dev_op_dqo_qpl) + *dev_op_dqo_qpl, + const struct gve_device_option_buffer_sizes + *dev_op_buffer_sizes, + const struct gve_device_option_modify_ring + *dev_op_modify_ring) { /* Before control reaches this point, the page-size-capped max MTU from * the gve_device_descriptor field has already been stored in @@ -772,18 +823,47 @@ static void gve_enable_supported_features(struct gve_priv *priv, if (dev_op_dqo_qpl) { priv->tx_pages_per_qpl = be16_to_cpu(dev_op_dqo_qpl->tx_pages_per_qpl); - priv->rx_pages_per_qpl = - be16_to_cpu(dev_op_dqo_qpl->rx_pages_per_qpl); if (priv->tx_pages_per_qpl == 0) priv->tx_pages_per_qpl = DQO_QPL_DEFAULT_TX_PAGES; - if (priv->rx_pages_per_qpl == 0) - priv->rx_pages_per_qpl = DQO_QPL_DEFAULT_RX_PAGES; + } + + if (dev_op_buffer_sizes && + (supported_features_mask & GVE_SUP_BUFFER_SIZES_MASK)) { + priv->max_rx_buffer_size = + be16_to_cpu(dev_op_buffer_sizes->packet_buffer_size); + priv->header_buf_size = + be16_to_cpu(dev_op_buffer_sizes->header_buffer_size); + dev_info(&priv->pdev->dev, + "BUFFER SIZES device option enabled with max_rx_buffer_size of %u, header_buf_size of %u.\n", + priv->max_rx_buffer_size, priv->header_buf_size); + } + + /* Read and store ring size ranges given by device */ + if (dev_op_modify_ring && + (supported_features_mask & GVE_SUP_MODIFY_RING_MASK)) { + priv->modify_ring_size_enabled = true; + + /* max ring size for DQO QPL should not be overwritten because of device limit */ + if (priv->queue_format != GVE_DQO_QPL_FORMAT) { + priv->max_rx_desc_cnt = be16_to_cpu(dev_op_modify_ring->max_rx_ring_size); + priv->max_tx_desc_cnt = be16_to_cpu(dev_op_modify_ring->max_tx_ring_size); + } + if (priv->default_min_ring_size) { + /* If device hasn't provided minimums, use default minimums */ + priv->min_tx_desc_cnt = GVE_DEFAULT_MIN_TX_RING_SIZE; + priv->min_rx_desc_cnt = GVE_DEFAULT_MIN_RX_RING_SIZE; + } else { + priv->min_rx_desc_cnt = be16_to_cpu(dev_op_modify_ring->min_rx_ring_size); + priv->min_tx_desc_cnt = be16_to_cpu(dev_op_modify_ring->min_tx_ring_size); + } } } int gve_adminq_describe_device(struct gve_priv *priv) { + struct gve_device_option_buffer_sizes *dev_op_buffer_sizes = NULL; struct gve_device_option_jumbo_frames *dev_op_jumbo_frames = NULL; + struct gve_device_option_modify_ring *dev_op_modify_ring = NULL; struct gve_device_option_gqi_rda *dev_op_gqi_rda = NULL; struct gve_device_option_gqi_qpl *dev_op_gqi_qpl = NULL; struct gve_device_option_dqo_rda *dev_op_dqo_rda = NULL; @@ -815,8 +895,9 @@ int gve_adminq_describe_device(struct gve_priv *priv) err = gve_process_device_options(priv, descriptor, &dev_op_gqi_rda, &dev_op_gqi_qpl, &dev_op_dqo_rda, - &dev_op_jumbo_frames, - &dev_op_dqo_qpl); + &dev_op_jumbo_frames, &dev_op_dqo_qpl, + &dev_op_buffer_sizes, + &dev_op_modify_ring); if (err) goto free_device_descriptor; @@ -851,15 +932,13 @@ int gve_adminq_describe_device(struct gve_priv *priv) dev_info(&priv->pdev->dev, "Driver is running with GQI QPL queue format.\n"); } - if (gve_is_gqi(priv)) { - err = gve_set_desc_cnt(priv, descriptor); - } else { - /* DQO supports LRO. */ + + /* set default descriptor counts */ + gve_set_default_desc_cnt(priv, descriptor); + + /* DQO supports LRO. */ + if (!gve_is_gqi(priv)) priv->dev->hw_features |= NETIF_F_LRO; - err = gve_set_desc_cnt_dqo(priv, descriptor, dev_op_dqo_rda); - } - if (err) - goto free_device_descriptor; priv->max_registered_pages = be64_to_cpu(descriptor->max_registered_pages); @@ -875,17 +954,11 @@ int gve_adminq_describe_device(struct gve_priv *priv) mac = descriptor->mac; dev_info(&priv->pdev->dev, "MAC addr: %pM\n", mac); priv->tx_pages_per_qpl = be16_to_cpu(descriptor->tx_pages_per_qpl); - priv->rx_data_slot_cnt = be16_to_cpu(descriptor->rx_pages_per_qpl); - - if (gve_is_gqi(priv) && priv->rx_data_slot_cnt < priv->rx_desc_cnt) { - dev_err(&priv->pdev->dev, "rx_data_slot_cnt cannot be smaller than rx_desc_cnt, setting rx_desc_cnt down to %d.\n", - priv->rx_data_slot_cnt); - priv->rx_desc_cnt = priv->rx_data_slot_cnt; - } priv->default_num_queues = be16_to_cpu(descriptor->default_num_queues); gve_enable_supported_features(priv, supported_features_mask, - dev_op_jumbo_frames, dev_op_dqo_qpl); + dev_op_jumbo_frames, dev_op_dqo_qpl, + dev_op_buffer_sizes, dev_op_modify_ring); free_device_descriptor: dma_pool_free(priv->adminq_pool, descriptor, descriptor_bus); diff --git a/drivers/net/ethernet/google/gve/gve_adminq.h b/drivers/net/ethernet/google/gve/gve_adminq.h index 5865ccdccbd0..e64f0dbe744d 100644 --- a/drivers/net/ethernet/google/gve/gve_adminq.h +++ b/drivers/net/ethernet/google/gve/gve_adminq.h @@ -103,8 +103,7 @@ static_assert(sizeof(struct gve_device_option_gqi_qpl) == 4); struct gve_device_option_dqo_rda { __be32 supported_features_mask; - __be16 tx_comp_ring_entries; - __be16 rx_buff_ring_entries; + __be32 reserved; }; static_assert(sizeof(struct gve_device_option_dqo_rda) == 8); @@ -125,6 +124,25 @@ struct gve_device_option_jumbo_frames { static_assert(sizeof(struct gve_device_option_jumbo_frames) == 8); +struct gve_device_option_buffer_sizes { + /* GVE_SUP_BUFFER_SIZES_MASK bit should be set */ + __be32 supported_features_mask; + __be16 packet_buffer_size; + __be16 header_buffer_size; +}; + +static_assert(sizeof(struct gve_device_option_buffer_sizes) == 8); + +struct gve_device_option_modify_ring { + __be32 supported_featured_mask; + __be16 max_rx_ring_size; + __be16 max_tx_ring_size; + __be16 min_rx_ring_size; + __be16 min_tx_ring_size; +}; + +static_assert(sizeof(struct gve_device_option_modify_ring) == 12); + /* Terminology: * * RDA - Raw DMA Addressing - Buffers associated with SKBs are directly DMA @@ -134,25 +152,31 @@ static_assert(sizeof(struct gve_device_option_jumbo_frames) == 8); * the device for read/write and data is copied from/to SKBs. */ enum gve_dev_opt_id { - GVE_DEV_OPT_ID_GQI_RAW_ADDRESSING = 0x1, - GVE_DEV_OPT_ID_GQI_RDA = 0x2, - GVE_DEV_OPT_ID_GQI_QPL = 0x3, - GVE_DEV_OPT_ID_DQO_RDA = 0x4, - GVE_DEV_OPT_ID_DQO_QPL = 0x7, - GVE_DEV_OPT_ID_JUMBO_FRAMES = 0x8, + GVE_DEV_OPT_ID_GQI_RAW_ADDRESSING = 0x1, + GVE_DEV_OPT_ID_GQI_RDA = 0x2, + GVE_DEV_OPT_ID_GQI_QPL = 0x3, + GVE_DEV_OPT_ID_DQO_RDA = 0x4, + GVE_DEV_OPT_ID_MODIFY_RING = 0x6, + GVE_DEV_OPT_ID_DQO_QPL = 0x7, + GVE_DEV_OPT_ID_JUMBO_FRAMES = 0x8, + GVE_DEV_OPT_ID_BUFFER_SIZES = 0xa, }; enum gve_dev_opt_req_feat_mask { - GVE_DEV_OPT_REQ_FEAT_MASK_GQI_RAW_ADDRESSING = 0x0, - GVE_DEV_OPT_REQ_FEAT_MASK_GQI_RDA = 0x0, - GVE_DEV_OPT_REQ_FEAT_MASK_GQI_QPL = 0x0, - GVE_DEV_OPT_REQ_FEAT_MASK_DQO_RDA = 0x0, - GVE_DEV_OPT_REQ_FEAT_MASK_JUMBO_FRAMES = 0x0, - GVE_DEV_OPT_REQ_FEAT_MASK_DQO_QPL = 0x0, + GVE_DEV_OPT_REQ_FEAT_MASK_GQI_RAW_ADDRESSING = 0x0, + GVE_DEV_OPT_REQ_FEAT_MASK_GQI_RDA = 0x0, + GVE_DEV_OPT_REQ_FEAT_MASK_GQI_QPL = 0x0, + GVE_DEV_OPT_REQ_FEAT_MASK_DQO_RDA = 0x0, + GVE_DEV_OPT_REQ_FEAT_MASK_JUMBO_FRAMES = 0x0, + GVE_DEV_OPT_REQ_FEAT_MASK_DQO_QPL = 0x0, + GVE_DEV_OPT_REQ_FEAT_MASK_BUFFER_SIZES = 0x0, + GVE_DEV_OPT_REQ_FEAT_MASK_MODIFY_RING = 0x0, }; enum gve_sup_feature_mask { - GVE_SUP_JUMBO_FRAMES_MASK = 1 << 2, + GVE_SUP_MODIFY_RING_MASK = 1 << 0, + GVE_SUP_JUMBO_FRAMES_MASK = 1 << 2, + GVE_SUP_BUFFER_SIZES_MASK = 1 << 4, }; #define GVE_DEV_OPT_LEN_GQI_RAW_ADDRESSING 0x0 @@ -165,6 +189,7 @@ enum gve_driver_capbility { gve_driver_capability_dqo_qpl = 2, /* reserved for future use */ gve_driver_capability_dqo_rda = 3, gve_driver_capability_alt_miss_compl = 4, + gve_driver_capability_flexible_buffer_size = 5, }; #define GVE_CAP1(a) BIT((int)a) @@ -176,7 +201,8 @@ enum gve_driver_capbility { (GVE_CAP1(gve_driver_capability_gqi_qpl) | \ GVE_CAP1(gve_driver_capability_gqi_rda) | \ GVE_CAP1(gve_driver_capability_dqo_rda) | \ - GVE_CAP1(gve_driver_capability_alt_miss_compl)) + GVE_CAP1(gve_driver_capability_alt_miss_compl) | \ + GVE_CAP1(gve_driver_capability_flexible_buffer_size)) #define GVE_DRIVER_CAPABILITY_FLAGS2 0x0 #define GVE_DRIVER_CAPABILITY_FLAGS3 0x0 @@ -260,7 +286,9 @@ struct gve_adminq_create_rx_queue { __be16 packet_buffer_size; __be16 rx_buff_ring_size; u8 enable_rsc; - u8 padding[5]; + u8 padding1; + __be16 header_buffer_size; + u8 padding2[2]; }; static_assert(sizeof(struct gve_adminq_create_rx_queue) == 56); @@ -423,7 +451,9 @@ int gve_adminq_configure_device_resources(struct gve_priv *priv, int gve_adminq_deconfigure_device_resources(struct gve_priv *priv); int gve_adminq_create_tx_queues(struct gve_priv *priv, u32 start_id, u32 num_queues); int gve_adminq_destroy_tx_queues(struct gve_priv *priv, u32 start_id, u32 num_queues); +int gve_adminq_create_single_rx_queue(struct gve_priv *priv, u32 queue_index); int gve_adminq_create_rx_queues(struct gve_priv *priv, u32 num_queues); +int gve_adminq_destroy_single_rx_queue(struct gve_priv *priv, u32 queue_index); int gve_adminq_destroy_rx_queues(struct gve_priv *priv, u32 queue_id); int gve_adminq_register_page_list(struct gve_priv *priv, struct gve_queue_page_list *qpl); diff --git a/drivers/net/ethernet/google/gve/gve_dqo.h b/drivers/net/ethernet/google/gve/gve_dqo.h index c36b93f0de15..e83773fb891f 100644 --- a/drivers/net/ethernet/google/gve/gve_dqo.h +++ b/drivers/net/ethernet/google/gve/gve_dqo.h @@ -38,10 +38,24 @@ netdev_features_t gve_features_check_dqo(struct sk_buff *skb, netdev_features_t features); bool gve_tx_poll_dqo(struct gve_notify_block *block, bool do_clean); int gve_rx_poll_dqo(struct gve_notify_block *block, int budget); -int gve_tx_alloc_rings_dqo(struct gve_priv *priv); -void gve_tx_free_rings_dqo(struct gve_priv *priv); -int gve_rx_alloc_rings_dqo(struct gve_priv *priv); -void gve_rx_free_rings_dqo(struct gve_priv *priv); +int gve_tx_alloc_rings_dqo(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *cfg); +void gve_tx_free_rings_dqo(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *cfg); +void gve_tx_start_ring_dqo(struct gve_priv *priv, int idx); +void gve_tx_stop_ring_dqo(struct gve_priv *priv, int idx); +int gve_rx_alloc_ring_dqo(struct gve_priv *priv, + struct gve_rx_alloc_rings_cfg *cfg, + struct gve_rx_ring *rx, + int idx); +void gve_rx_free_ring_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, + struct gve_rx_alloc_rings_cfg *cfg); +int gve_rx_alloc_rings_dqo(struct gve_priv *priv, + struct gve_rx_alloc_rings_cfg *cfg); +void gve_rx_free_rings_dqo(struct gve_priv *priv, + struct gve_rx_alloc_rings_cfg *cfg); +void gve_rx_start_ring_dqo(struct gve_priv *priv, int idx); +void gve_rx_stop_ring_dqo(struct gve_priv *priv, int idx); int gve_clean_tx_done_dqo(struct gve_priv *priv, struct gve_tx_ring *tx, struct napi_struct *napi); void gve_rx_post_buffers_dqo(struct gve_rx_ring *rx); @@ -93,4 +107,6 @@ gve_set_itr_coalesce_usecs_dqo(struct gve_priv *priv, gve_write_irq_doorbell_dqo(priv, block, gve_setup_itr_interval_dqo(usecs)); } + +int gve_napi_poll_dqo(struct napi_struct *napi, int budget); #endif /* _GVE_DQO_H_ */ diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c index e5397aa1e48f..fe1741d482b4 100644 --- a/drivers/net/ethernet/google/gve/gve_ethtool.c +++ b/drivers/net/ethernet/google/gve/gve_ethtool.c @@ -4,11 +4,11 @@ * Copyright (C) 2015-2021 Google, Inc. */ -#include <linux/ethtool.h> #include <linux/rtnetlink.h> #include "gve.h" #include "gve_adminq.h" #include "gve_dqo.h" +#include "gve_utils.h" static void gve_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *info) @@ -40,17 +40,18 @@ static u32 gve_get_msglevel(struct net_device *netdev) * as declared in enum xdp_action inside file uapi/linux/bpf.h . */ static const char gve_gstrings_main_stats[][ETH_GSTRING_LEN] = { - "rx_packets", "tx_packets", "rx_bytes", "tx_bytes", - "rx_dropped", "tx_dropped", "tx_timeouts", + "rx_packets", "rx_hsplit_pkt", "tx_packets", "rx_bytes", + "tx_bytes", "rx_dropped", "tx_dropped", "tx_timeouts", "rx_skb_alloc_fail", "rx_buf_alloc_fail", "rx_desc_err_dropped_pkt", + "rx_hsplit_unsplit_pkt", "interface_up_cnt", "interface_down_cnt", "reset_cnt", "page_alloc_fail", "dma_mapping_error", "stats_report_trigger_cnt", }; static const char gve_gstrings_rx_stats[][ETH_GSTRING_LEN] = { - "rx_posted_desc[%u]", "rx_completed_desc[%u]", "rx_consumed_desc[%u]", "rx_bytes[%u]", - "rx_cont_packet_cnt[%u]", "rx_frag_flip_cnt[%u]", "rx_frag_copy_cnt[%u]", - "rx_frag_alloc_cnt[%u]", + "rx_posted_desc[%u]", "rx_completed_desc[%u]", "rx_consumed_desc[%u]", + "rx_bytes[%u]", "rx_hsplit_bytes[%u]", "rx_cont_packet_cnt[%u]", + "rx_frag_flip_cnt[%u]", "rx_frag_copy_cnt[%u]", "rx_frag_alloc_cnt[%u]", "rx_dropped_pkt[%u]", "rx_copybreak_pkt[%u]", "rx_copied_pkt[%u]", "rx_queue_drop_cnt[%u]", "rx_no_buffers_posted[%u]", "rx_drops_packet_over_mru[%u]", "rx_drops_invalid_checksum[%u]", @@ -73,7 +74,7 @@ static const char gve_gstrings_adminq_stats[][ETH_GSTRING_LEN] = { "adminq_create_tx_queue_cnt", "adminq_create_rx_queue_cnt", "adminq_destroy_tx_queue_cnt", "adminq_destroy_rx_queue_cnt", "adminq_dcfg_device_resources_cnt", "adminq_set_driver_parameter_cnt", - "adminq_report_stats_cnt", "adminq_report_link_speed_cnt" + "adminq_report_stats_cnt", "adminq_report_link_speed_cnt", "adminq_get_ptype_map_cnt" }; static const char gve_gstrings_priv_flags[][ETH_GSTRING_LEN] = { @@ -89,42 +90,34 @@ static const char gve_gstrings_priv_flags[][ETH_GSTRING_LEN] = { static void gve_get_strings(struct net_device *netdev, u32 stringset, u8 *data) { struct gve_priv *priv = netdev_priv(netdev); - char *s = (char *)data; + u8 *s = (char *)data; int num_tx_queues; int i, j; num_tx_queues = gve_num_tx_queues(priv); switch (stringset) { case ETH_SS_STATS: - memcpy(s, *gve_gstrings_main_stats, - sizeof(gve_gstrings_main_stats)); - s += sizeof(gve_gstrings_main_stats); - - for (i = 0; i < priv->rx_cfg.num_queues; i++) { - for (j = 0; j < NUM_GVE_RX_CNTS; j++) { - snprintf(s, ETH_GSTRING_LEN, - gve_gstrings_rx_stats[j], i); - s += ETH_GSTRING_LEN; - } - } + for (i = 0; i < ARRAY_SIZE(gve_gstrings_main_stats); i++) + ethtool_puts(&s, gve_gstrings_main_stats[i]); - for (i = 0; i < num_tx_queues; i++) { - for (j = 0; j < NUM_GVE_TX_CNTS; j++) { - snprintf(s, ETH_GSTRING_LEN, - gve_gstrings_tx_stats[j], i); - s += ETH_GSTRING_LEN; - } - } + for (i = 0; i < priv->rx_cfg.num_queues; i++) + for (j = 0; j < NUM_GVE_RX_CNTS; j++) + ethtool_sprintf(&s, gve_gstrings_rx_stats[j], + i); + + for (i = 0; i < num_tx_queues; i++) + for (j = 0; j < NUM_GVE_TX_CNTS; j++) + ethtool_sprintf(&s, gve_gstrings_tx_stats[j], + i); + + for (i = 0; i < ARRAY_SIZE(gve_gstrings_adminq_stats); i++) + ethtool_puts(&s, gve_gstrings_adminq_stats[i]); - memcpy(s, *gve_gstrings_adminq_stats, - sizeof(gve_gstrings_adminq_stats)); - s += sizeof(gve_gstrings_adminq_stats); break; case ETH_SS_PRIV_FLAGS: - memcpy(s, *gve_gstrings_priv_flags, - sizeof(gve_gstrings_priv_flags)); - s += sizeof(gve_gstrings_priv_flags); + for (i = 0; i < ARRAY_SIZE(gve_gstrings_priv_flags); i++) + ethtool_puts(&s, gve_gstrings_priv_flags[i]); break; default: @@ -154,15 +147,19 @@ static void gve_get_ethtool_stats(struct net_device *netdev, struct ethtool_stats *stats, u64 *data) { - u64 tmp_rx_pkts, tmp_rx_bytes, tmp_rx_skb_alloc_fail, - tmp_rx_buf_alloc_fail, tmp_rx_desc_err_dropped_pkt, + u64 tmp_rx_pkts, tmp_rx_hsplit_pkt, tmp_rx_bytes, tmp_rx_hsplit_bytes, + tmp_rx_skb_alloc_fail, tmp_rx_buf_alloc_fail, + tmp_rx_desc_err_dropped_pkt, tmp_rx_hsplit_unsplit_pkt, tmp_tx_pkts, tmp_tx_bytes; - u64 rx_buf_alloc_fail, rx_desc_err_dropped_pkt, rx_pkts, - rx_skb_alloc_fail, rx_bytes, tx_pkts, tx_bytes, tx_dropped; + u64 rx_buf_alloc_fail, rx_desc_err_dropped_pkt, rx_hsplit_unsplit_pkt, + rx_pkts, rx_hsplit_pkt, rx_skb_alloc_fail, rx_bytes, tx_pkts, tx_bytes, + tx_dropped; int stats_idx, base_stats_idx, max_stats_idx; struct stats *report_stats; int *rx_qid_to_stats_idx; int *tx_qid_to_stats_idx; + int num_stopped_rxqs = 0; + int num_stopped_txqs = 0; struct gve_priv *priv; bool skip_nic_stats; unsigned int start; @@ -179,14 +176,27 @@ gve_get_ethtool_stats(struct net_device *netdev, sizeof(int), GFP_KERNEL); if (!rx_qid_to_stats_idx) return; + for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) { + rx_qid_to_stats_idx[ring] = -1; + if (!gve_rx_was_added_to_block(priv, ring)) + num_stopped_rxqs++; + } tx_qid_to_stats_idx = kmalloc_array(num_tx_queues, sizeof(int), GFP_KERNEL); if (!tx_qid_to_stats_idx) { kfree(rx_qid_to_stats_idx); return; } - for (rx_pkts = 0, rx_bytes = 0, rx_skb_alloc_fail = 0, - rx_buf_alloc_fail = 0, rx_desc_err_dropped_pkt = 0, ring = 0; + for (ring = 0; ring < num_tx_queues; ring++) { + tx_qid_to_stats_idx[ring] = -1; + if (!gve_tx_was_added_to_block(priv, ring)) + num_stopped_txqs++; + } + + for (rx_pkts = 0, rx_bytes = 0, rx_hsplit_pkt = 0, + rx_skb_alloc_fail = 0, rx_buf_alloc_fail = 0, + rx_desc_err_dropped_pkt = 0, rx_hsplit_unsplit_pkt = 0, + ring = 0; ring < priv->rx_cfg.num_queues; ring++) { if (priv->rx) { do { @@ -195,18 +205,23 @@ gve_get_ethtool_stats(struct net_device *netdev, start = u64_stats_fetch_begin(&priv->rx[ring].statss); tmp_rx_pkts = rx->rpackets; + tmp_rx_hsplit_pkt = rx->rx_hsplit_pkt; tmp_rx_bytes = rx->rbytes; tmp_rx_skb_alloc_fail = rx->rx_skb_alloc_fail; tmp_rx_buf_alloc_fail = rx->rx_buf_alloc_fail; tmp_rx_desc_err_dropped_pkt = rx->rx_desc_err_dropped_pkt; + tmp_rx_hsplit_unsplit_pkt = + rx->rx_hsplit_unsplit_pkt; } while (u64_stats_fetch_retry(&priv->rx[ring].statss, start)); rx_pkts += tmp_rx_pkts; + rx_hsplit_pkt += tmp_rx_hsplit_pkt; rx_bytes += tmp_rx_bytes; rx_skb_alloc_fail += tmp_rx_skb_alloc_fail; rx_buf_alloc_fail += tmp_rx_buf_alloc_fail; rx_desc_err_dropped_pkt += tmp_rx_desc_err_dropped_pkt; + rx_hsplit_unsplit_pkt += tmp_rx_hsplit_unsplit_pkt; } } for (tx_pkts = 0, tx_bytes = 0, tx_dropped = 0, ring = 0; @@ -227,6 +242,7 @@ gve_get_ethtool_stats(struct net_device *netdev, i = 0; data[i++] = rx_pkts; + data[i++] = rx_hsplit_pkt; data[i++] = tx_pkts; data[i++] = rx_bytes; data[i++] = tx_bytes; @@ -238,6 +254,7 @@ gve_get_ethtool_stats(struct net_device *netdev, data[i++] = rx_skb_alloc_fail; data[i++] = rx_buf_alloc_fail; data[i++] = rx_desc_err_dropped_pkt; + data[i++] = rx_hsplit_unsplit_pkt; data[i++] = priv->interface_up_cnt; data[i++] = priv->interface_down_cnt; data[i++] = priv->reset_cnt; @@ -249,7 +266,13 @@ gve_get_ethtool_stats(struct net_device *netdev, /* For rx cross-reporting stats, start from nic rx stats in report */ base_stats_idx = GVE_TX_STATS_REPORT_NUM * num_tx_queues + GVE_RX_STATS_REPORT_NUM * priv->rx_cfg.num_queues; - max_stats_idx = NIC_RX_STATS_REPORT_NUM * priv->rx_cfg.num_queues + + /* The boundary between driver stats and NIC stats shifts if there are + * stopped queues. + */ + base_stats_idx += NIC_RX_STATS_REPORT_NUM * num_stopped_rxqs + + NIC_TX_STATS_REPORT_NUM * num_stopped_txqs; + max_stats_idx = NIC_RX_STATS_REPORT_NUM * + (priv->rx_cfg.num_queues - num_stopped_rxqs) + base_stats_idx; /* Preprocess the stats report for rx, map queue id to start index */ skip_nic_stats = false; @@ -263,6 +286,10 @@ gve_get_ethtool_stats(struct net_device *netdev, skip_nic_stats = true; break; } + if (queue_id < 0 || queue_id >= priv->rx_cfg.num_queues) { + net_err_ratelimited("Invalid rxq id in NIC stats\n"); + continue; + } rx_qid_to_stats_idx[queue_id] = stats_idx; } /* walk RX rings */ @@ -277,6 +304,7 @@ gve_get_ethtool_stats(struct net_device *netdev, start = u64_stats_fetch_begin(&priv->rx[ring].statss); tmp_rx_bytes = rx->rbytes; + tmp_rx_hsplit_bytes = rx->rx_hsplit_bytes; tmp_rx_skb_alloc_fail = rx->rx_skb_alloc_fail; tmp_rx_buf_alloc_fail = rx->rx_buf_alloc_fail; tmp_rx_desc_err_dropped_pkt = @@ -284,6 +312,7 @@ gve_get_ethtool_stats(struct net_device *netdev, } while (u64_stats_fetch_retry(&priv->rx[ring].statss, start)); data[i++] = tmp_rx_bytes; + data[i++] = tmp_rx_hsplit_bytes; data[i++] = rx->rx_cont_packet_cnt; data[i++] = rx->rx_frag_flip_cnt; data[i++] = rx->rx_frag_copy_cnt; @@ -295,11 +324,11 @@ gve_get_ethtool_stats(struct net_device *netdev, data[i++] = rx->rx_copybreak_pkt; data[i++] = rx->rx_copied_pkt; /* stats from NIC */ - if (skip_nic_stats) { + stats_idx = rx_qid_to_stats_idx[ring]; + if (skip_nic_stats || stats_idx < 0) { /* skip NIC rx stats */ i += NIC_RX_STATS_REPORT_NUM; } else { - stats_idx = rx_qid_to_stats_idx[ring]; for (j = 0; j < NIC_RX_STATS_REPORT_NUM; j++) { u64 value = be64_to_cpu(report_stats[stats_idx + j].value); @@ -325,7 +354,8 @@ gve_get_ethtool_stats(struct net_device *netdev, /* For tx cross-reporting stats, start from nic tx stats in report */ base_stats_idx = max_stats_idx; - max_stats_idx = NIC_TX_STATS_REPORT_NUM * num_tx_queues + + max_stats_idx = NIC_TX_STATS_REPORT_NUM * + (num_tx_queues - num_stopped_txqs) + max_stats_idx; /* Preprocess the stats report for tx, map queue id to start index */ skip_nic_stats = false; @@ -339,6 +369,10 @@ gve_get_ethtool_stats(struct net_device *netdev, skip_nic_stats = true; break; } + if (queue_id < 0 || queue_id >= num_tx_queues) { + net_err_ratelimited("Invalid txq id in NIC stats\n"); + continue; + } tx_qid_to_stats_idx[queue_id] = stats_idx; } /* walk TX rings */ @@ -370,11 +404,11 @@ gve_get_ethtool_stats(struct net_device *netdev, data[i++] = gve_tx_load_event_counter(priv, tx); data[i++] = tx->dma_mapping_error; /* stats from NIC */ - if (skip_nic_stats) { + stats_idx = tx_qid_to_stats_idx[ring]; + if (skip_nic_stats || stats_idx < 0) { /* skip NIC tx stats */ i += NIC_TX_STATS_REPORT_NUM; } else { - stats_idx = tx_qid_to_stats_idx[ring]; for (j = 0; j < NIC_TX_STATS_REPORT_NUM; j++) { u64 value = be64_to_cpu(report_stats[stats_idx + j].value); @@ -415,6 +449,7 @@ gve_get_ethtool_stats(struct net_device *netdev, data[i++] = priv->adminq_set_driver_parameter_cnt; data[i++] = priv->adminq_report_stats_cnt; data[i++] = priv->adminq_report_link_speed_cnt; + data[i++] = priv->adminq_get_ptype_map_cnt; } static void gve_get_channels(struct net_device *netdev, @@ -476,10 +511,94 @@ static void gve_get_ringparam(struct net_device *netdev, { struct gve_priv *priv = netdev_priv(netdev); - cmd->rx_max_pending = priv->rx_desc_cnt; - cmd->tx_max_pending = priv->tx_desc_cnt; + cmd->rx_max_pending = priv->max_rx_desc_cnt; + cmd->tx_max_pending = priv->max_tx_desc_cnt; cmd->rx_pending = priv->rx_desc_cnt; cmd->tx_pending = priv->tx_desc_cnt; + + if (!gve_header_split_supported(priv)) + kernel_cmd->tcp_data_split = ETHTOOL_TCP_DATA_SPLIT_UNKNOWN; + else if (priv->header_split_enabled) + kernel_cmd->tcp_data_split = ETHTOOL_TCP_DATA_SPLIT_ENABLED; + else + kernel_cmd->tcp_data_split = ETHTOOL_TCP_DATA_SPLIT_DISABLED; +} + +static int gve_adjust_ring_sizes(struct gve_priv *priv, + u16 new_tx_desc_cnt, + u16 new_rx_desc_cnt) +{ + struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; + struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; + int err; + + /* get current queue configuration */ + gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); + + /* copy over the new ring_size from ethtool */ + tx_alloc_cfg.ring_size = new_tx_desc_cnt; + rx_alloc_cfg.ring_size = new_rx_desc_cnt; + + if (netif_running(priv->dev)) { + err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); + if (err) + return err; + } + + /* Set new ring_size for the next up */ + priv->tx_desc_cnt = new_tx_desc_cnt; + priv->rx_desc_cnt = new_rx_desc_cnt; + + return 0; +} + +static int gve_validate_req_ring_size(struct gve_priv *priv, u16 new_tx_desc_cnt, + u16 new_rx_desc_cnt) +{ + /* check for valid range */ + if (new_tx_desc_cnt < priv->min_tx_desc_cnt || + new_tx_desc_cnt > priv->max_tx_desc_cnt || + new_rx_desc_cnt < priv->min_rx_desc_cnt || + new_rx_desc_cnt > priv->max_rx_desc_cnt) { + dev_err(&priv->pdev->dev, "Requested descriptor count out of range\n"); + return -EINVAL; + } + + if (!is_power_of_2(new_tx_desc_cnt) || !is_power_of_2(new_rx_desc_cnt)) { + dev_err(&priv->pdev->dev, "Requested descriptor count has to be a power of 2\n"); + return -EINVAL; + } + return 0; +} + +static int gve_set_ringparam(struct net_device *netdev, + struct ethtool_ringparam *cmd, + struct kernel_ethtool_ringparam *kernel_cmd, + struct netlink_ext_ack *extack) +{ + struct gve_priv *priv = netdev_priv(netdev); + u16 new_tx_cnt, new_rx_cnt; + int err; + + err = gve_set_hsplit_config(priv, kernel_cmd->tcp_data_split); + if (err) + return err; + + if (cmd->tx_pending == priv->tx_desc_cnt && cmd->rx_pending == priv->rx_desc_cnt) + return 0; + + if (!priv->modify_ring_size_enabled) { + dev_err(&priv->pdev->dev, "Modify ring size is not supported.\n"); + return -EOPNOTSUPP; + } + + new_tx_cnt = cmd->tx_pending; + new_rx_cnt = cmd->rx_pending; + + if (gve_validate_req_ring_size(priv, new_tx_cnt, new_rx_cnt)) + return -EINVAL; + + return gve_adjust_ring_sizes(priv, new_tx_cnt, new_rx_cnt); } static int gve_user_reset(struct net_device *netdev, u32 *flags) @@ -655,6 +774,7 @@ static int gve_set_coalesce(struct net_device *netdev, const struct ethtool_ops gve_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS, + .supported_ring_params = ETHTOOL_RING_USE_TCP_DATA_SPLIT, .get_drvinfo = gve_get_drvinfo, .get_strings = gve_get_strings, .get_sset_count = gve_get_sset_count, @@ -667,10 +787,12 @@ const struct ethtool_ops gve_ethtool_ops = { .get_coalesce = gve_get_coalesce, .set_coalesce = gve_set_coalesce, .get_ringparam = gve_get_ringparam, + .set_ringparam = gve_set_ringparam, .reset = gve_user_reset, .get_tunable = gve_get_tunable, .set_tunable = gve_set_tunable, .get_priv_flags = gve_get_priv_flags, .set_priv_flags = gve_set_priv_flags, - .get_link_ksettings = gve_get_link_ksettings + .get_link_ksettings = gve_get_link_ksettings, + .get_ts_info = ethtool_op_get_ts_info, }; diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 619bf63ec935..cabf7d4bcecb 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -9,6 +9,7 @@ #include <linux/etherdevice.h> #include <linux/filter.h> #include <linux/interrupt.h> +#include <linux/irq.h> #include <linux/module.h> #include <linux/pci.h> #include <linux/sched.h> @@ -16,12 +17,14 @@ #include <linux/workqueue.h> #include <linux/utsname.h> #include <linux/version.h> +#include <net/netdev_queues.h> #include <net/sch_generic.h> #include <net/xdp_sock_drv.h> #include "gve.h" #include "gve_dqo.h" #include "gve_adminq.h" #include "gve_register.h" +#include "gve_utils.h" #define GVE_DEFAULT_RX_COPYBREAK (256) @@ -252,7 +255,19 @@ static irqreturn_t gve_intr_dqo(int irq, void *arg) return IRQ_HANDLED; } -static int gve_napi_poll(struct napi_struct *napi, int budget) +static int gve_is_napi_on_home_cpu(struct gve_priv *priv, u32 irq) +{ + int cpu_curr = smp_processor_id(); + const struct cpumask *aff_mask; + + aff_mask = irq_get_effective_affinity_mask(irq); + if (unlikely(!aff_mask)) + return 1; + + return cpumask_test_cpu(cpu_curr, aff_mask); +} + +int gve_napi_poll(struct napi_struct *napi, int budget) { struct gve_notify_block *block; __be32 __iomem *irq_doorbell; @@ -302,7 +317,7 @@ static int gve_napi_poll(struct napi_struct *napi, int budget) return work_done; } -static int gve_napi_poll_dqo(struct napi_struct *napi, int budget) +int gve_napi_poll_dqo(struct napi_struct *napi, int budget) { struct gve_notify_block *block = container_of(napi, struct gve_notify_block, napi); @@ -321,8 +336,21 @@ static int gve_napi_poll_dqo(struct napi_struct *napi, int budget) reschedule |= work_done == budget; } - if (reschedule) - return budget; + if (reschedule) { + /* Reschedule by returning budget only if already on the correct + * cpu. + */ + if (likely(gve_is_napi_on_home_cpu(priv, block->irq))) + return budget; + + /* If not on the cpu with which this queue's irq has affinity + * with, we avoid rescheduling napi and arm the irq instead so + * that napi gets rescheduled back eventually onto the right + * cpu. + */ + if (work_done == budget) + work_done--; + } if (likely(napi_complete_done(napi, work_done))) { /* Enable interrupts again. @@ -427,6 +455,7 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv) "Failed to receive msix vector %d\n", i); goto abort_with_some_ntfy_blocks; } + block->irq = priv->msix_vectors[msix_idx].vector; irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, get_cpu_mask(i % active_cpus)); block->irq_db_index = &priv->irq_db_indices[i].index; @@ -440,6 +469,7 @@ abort_with_some_ntfy_blocks: irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, NULL); free_irq(priv->msix_vectors[msix_idx].vector, block); + block->irq = 0; } kvfree(priv->ntfy_blocks); priv->ntfy_blocks = NULL; @@ -473,6 +503,7 @@ static void gve_free_notify_blocks(struct gve_priv *priv) irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, NULL); free_irq(priv->msix_vectors[msix_idx].vector, block); + block->irq = 0; } free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); kvfree(priv->ntfy_blocks); @@ -581,19 +612,75 @@ static void gve_teardown_device_resources(struct gve_priv *priv) gve_clear_device_resources_ok(priv); } -static void gve_add_napi(struct gve_priv *priv, int ntfy_idx, - int (*gve_poll)(struct napi_struct *, int)) +static int gve_unregister_qpl(struct gve_priv *priv, + struct gve_queue_page_list *qpl) { - struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; + int err; + + if (!qpl) + return 0; + + err = gve_adminq_unregister_page_list(priv, qpl->id); + if (err) { + netif_err(priv, drv, priv->dev, + "Failed to unregister queue page list %d\n", + qpl->id); + return err; + } - netif_napi_add(priv->dev, &block->napi, gve_poll); + priv->num_registered_pages -= qpl->num_entries; + return 0; } -static void gve_remove_napi(struct gve_priv *priv, int ntfy_idx) +static int gve_register_qpl(struct gve_priv *priv, + struct gve_queue_page_list *qpl) { - struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; + int pages; + int err; - netif_napi_del(&block->napi); + if (!qpl) + return 0; + + pages = qpl->num_entries; + + if (pages + priv->num_registered_pages > priv->max_registered_pages) { + netif_err(priv, drv, priv->dev, + "Reached max number of registered pages %llu > %llu\n", + pages + priv->num_registered_pages, + priv->max_registered_pages); + return -EINVAL; + } + + err = gve_adminq_register_page_list(priv, qpl); + if (err) { + netif_err(priv, drv, priv->dev, + "failed to register queue page list %d\n", + qpl->id); + return err; + } + + priv->num_registered_pages += pages; + return 0; +} + +static struct gve_queue_page_list *gve_tx_get_qpl(struct gve_priv *priv, int idx) +{ + struct gve_tx_ring *tx = &priv->tx[idx]; + + if (gve_is_gqi(priv)) + return tx->tx_fifo.qpl; + else + return tx->dqo.qpl; +} + +static struct gve_queue_page_list *gve_rx_get_qpl(struct gve_priv *priv, int idx) +{ + struct gve_rx_ring *rx = &priv->rx[idx]; + + if (gve_is_gqi(priv)) + return rx->data.qpl; + else + return rx->dqo.qpl; } static int gve_register_xdp_qpls(struct gve_priv *priv) @@ -602,55 +689,38 @@ static int gve_register_xdp_qpls(struct gve_priv *priv) int err; int i; - start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv)); + start_id = gve_xdp_tx_start_queue_id(priv); for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) { - err = gve_adminq_register_page_list(priv, &priv->qpls[i]); - if (err) { - netif_err(priv, drv, priv->dev, - "failed to register queue page list %d\n", - priv->qpls[i].id); - /* This failure will trigger a reset - no need to clean - * up - */ + err = gve_register_qpl(priv, gve_tx_get_qpl(priv, i)); + /* This failure will trigger a reset - no need to clean up */ + if (err) return err; - } } return 0; } static int gve_register_qpls(struct gve_priv *priv) { - int start_id; + int num_tx_qpls, num_rx_qpls; int err; int i; - start_id = gve_tx_start_qpl_id(priv); - for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) { - err = gve_adminq_register_page_list(priv, &priv->qpls[i]); - if (err) { - netif_err(priv, drv, priv->dev, - "failed to register queue page list %d\n", - priv->qpls[i].id); - /* This failure will trigger a reset - no need to clean - * up - */ + num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_num_xdp_qpls(priv), + gve_is_qpl(priv)); + num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv)); + + for (i = 0; i < num_tx_qpls; i++) { + err = gve_register_qpl(priv, gve_tx_get_qpl(priv, i)); + if (err) return err; - } } - start_id = gve_rx_start_qpl_id(priv); - for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) { - err = gve_adminq_register_page_list(priv, &priv->qpls[i]); - if (err) { - netif_err(priv, drv, priv->dev, - "failed to register queue page list %d\n", - priv->qpls[i].id); - /* This failure will trigger a reset - no need to clean - * up - */ + for (i = 0; i < num_rx_qpls; i++) { + err = gve_register_qpl(priv, gve_rx_get_qpl(priv, i)); + if (err) return err; - } } + return 0; } @@ -660,48 +730,38 @@ static int gve_unregister_xdp_qpls(struct gve_priv *priv) int err; int i; - start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv)); + start_id = gve_xdp_tx_start_queue_id(priv); for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) { - err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id); - /* This failure will trigger a reset - no need to clean up */ - if (err) { - netif_err(priv, drv, priv->dev, - "Failed to unregister queue page list %d\n", - priv->qpls[i].id); + err = gve_unregister_qpl(priv, gve_tx_get_qpl(priv, i)); + /* This failure will trigger a reset - no need to clean */ + if (err) return err; - } } return 0; } static int gve_unregister_qpls(struct gve_priv *priv) { - int start_id; + int num_tx_qpls, num_rx_qpls; int err; int i; - start_id = gve_tx_start_qpl_id(priv); - for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) { - err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id); - /* This failure will trigger a reset - no need to clean up */ - if (err) { - netif_err(priv, drv, priv->dev, - "Failed to unregister queue page list %d\n", - priv->qpls[i].id); + num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_num_xdp_qpls(priv), + gve_is_qpl(priv)); + num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv)); + + for (i = 0; i < num_tx_qpls; i++) { + err = gve_unregister_qpl(priv, gve_tx_get_qpl(priv, i)); + /* This failure will trigger a reset - no need to clean */ + if (err) return err; - } } - start_id = gve_rx_start_qpl_id(priv); - for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) { - err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id); - /* This failure will trigger a reset - no need to clean up */ - if (err) { - netif_err(priv, drv, priv->dev, - "Failed to unregister queue page list %d\n", - priv->qpls[i].id); + for (i = 0; i < num_rx_qpls; i++) { + err = gve_unregister_qpl(priv, gve_rx_get_qpl(priv, i)); + /* This failure will trigger a reset - no need to clean */ + if (err) return err; - } } return 0; } @@ -776,120 +836,122 @@ static int gve_create_rings(struct gve_priv *priv) return 0; } -static void add_napi_init_xdp_sync_stats(struct gve_priv *priv, - int (*napi_poll)(struct napi_struct *napi, - int budget)) +static void init_xdp_sync_stats(struct gve_priv *priv) { int start_id = gve_xdp_tx_start_queue_id(priv); int i; - /* Add xdp tx napi & init sync stats*/ + /* Init stats */ for (i = start_id; i < start_id + priv->num_xdp_queues; i++) { int ntfy_idx = gve_tx_idx_to_ntfy(priv, i); u64_stats_init(&priv->tx[i].statss); priv->tx[i].ntfy_id = ntfy_idx; - gve_add_napi(priv, ntfy_idx, napi_poll); } } -static void add_napi_init_sync_stats(struct gve_priv *priv, - int (*napi_poll)(struct napi_struct *napi, - int budget)) +static void gve_init_sync_stats(struct gve_priv *priv) { int i; - /* Add tx napi & init sync stats*/ - for (i = 0; i < gve_num_tx_queues(priv); i++) { - int ntfy_idx = gve_tx_idx_to_ntfy(priv, i); - + for (i = 0; i < priv->tx_cfg.num_queues; i++) u64_stats_init(&priv->tx[i].statss); - priv->tx[i].ntfy_id = ntfy_idx; - gve_add_napi(priv, ntfy_idx, napi_poll); - } - /* Add rx napi & init sync stats*/ - for (i = 0; i < priv->rx_cfg.num_queues; i++) { - int ntfy_idx = gve_rx_idx_to_ntfy(priv, i); + /* Init stats for XDP TX queues */ + init_xdp_sync_stats(priv); + + for (i = 0; i < priv->rx_cfg.num_queues; i++) u64_stats_init(&priv->rx[i].statss); - priv->rx[i].ntfy_id = ntfy_idx; - gve_add_napi(priv, ntfy_idx, napi_poll); +} + +static void gve_tx_get_curr_alloc_cfg(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *cfg) +{ + cfg->qcfg = &priv->tx_cfg; + cfg->raw_addressing = !gve_is_qpl(priv); + cfg->ring_size = priv->tx_desc_cnt; + cfg->start_idx = 0; + cfg->num_rings = gve_num_tx_queues(priv); + cfg->tx = priv->tx; +} + +static void gve_tx_stop_rings(struct gve_priv *priv, int start_id, int num_rings) +{ + int i; + + if (!priv->tx) + return; + + for (i = start_id; i < start_id + num_rings; i++) { + if (gve_is_gqi(priv)) + gve_tx_stop_ring_gqi(priv, i); + else + gve_tx_stop_ring_dqo(priv, i); } } -static void gve_tx_free_rings(struct gve_priv *priv, int start_id, int num_rings) +static void gve_tx_start_rings(struct gve_priv *priv, int start_id, + int num_rings) { - if (gve_is_gqi(priv)) { - gve_tx_free_rings_gqi(priv, start_id, num_rings); - } else { - gve_tx_free_rings_dqo(priv); + int i; + + for (i = start_id; i < start_id + num_rings; i++) { + if (gve_is_gqi(priv)) + gve_tx_start_ring_gqi(priv, i); + else + gve_tx_start_ring_dqo(priv, i); } } static int gve_alloc_xdp_rings(struct gve_priv *priv) { - int start_id; + struct gve_tx_alloc_rings_cfg cfg = {0}; int err = 0; if (!priv->num_xdp_queues) return 0; - start_id = gve_xdp_tx_start_queue_id(priv); - err = gve_tx_alloc_rings(priv, start_id, priv->num_xdp_queues); + gve_tx_get_curr_alloc_cfg(priv, &cfg); + cfg.start_idx = gve_xdp_tx_start_queue_id(priv); + cfg.num_rings = priv->num_xdp_queues; + + err = gve_tx_alloc_rings_gqi(priv, &cfg); if (err) return err; - add_napi_init_xdp_sync_stats(priv, gve_napi_poll); + + gve_tx_start_rings(priv, cfg.start_idx, cfg.num_rings); + init_xdp_sync_stats(priv); return 0; } -static int gve_alloc_rings(struct gve_priv *priv) +static int gve_queues_mem_alloc(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, + struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) { int err; - /* Setup tx rings */ - priv->tx = kvcalloc(priv->tx_cfg.max_queues, sizeof(*priv->tx), - GFP_KERNEL); - if (!priv->tx) - return -ENOMEM; - if (gve_is_gqi(priv)) - err = gve_tx_alloc_rings(priv, 0, gve_num_tx_queues(priv)); + err = gve_tx_alloc_rings_gqi(priv, tx_alloc_cfg); else - err = gve_tx_alloc_rings_dqo(priv); + err = gve_tx_alloc_rings_dqo(priv, tx_alloc_cfg); if (err) - goto free_tx; - - /* Setup rx rings */ - priv->rx = kvcalloc(priv->rx_cfg.max_queues, sizeof(*priv->rx), - GFP_KERNEL); - if (!priv->rx) { - err = -ENOMEM; - goto free_tx_queue; - } + return err; if (gve_is_gqi(priv)) - err = gve_rx_alloc_rings(priv); + err = gve_rx_alloc_rings_gqi(priv, rx_alloc_cfg); else - err = gve_rx_alloc_rings_dqo(priv); + err = gve_rx_alloc_rings_dqo(priv, rx_alloc_cfg); if (err) - goto free_rx; - - if (gve_is_gqi(priv)) - add_napi_init_sync_stats(priv, gve_napi_poll); - else - add_napi_init_sync_stats(priv, gve_napi_poll_dqo); + goto free_tx; return 0; -free_rx: - kvfree(priv->rx); - priv->rx = NULL; -free_tx_queue: - gve_tx_free_rings(priv, 0, gve_num_tx_queues(priv)); free_tx: - kvfree(priv->tx); - priv->tx = NULL; + if (gve_is_gqi(priv)) + gve_tx_free_rings_gqi(priv, tx_alloc_cfg); + else + gve_tx_free_rings_dqo(priv, tx_alloc_cfg); return err; } @@ -937,52 +999,30 @@ static int gve_destroy_rings(struct gve_priv *priv) return 0; } -static void gve_rx_free_rings(struct gve_priv *priv) -{ - if (gve_is_gqi(priv)) - gve_rx_free_rings_gqi(priv); - else - gve_rx_free_rings_dqo(priv); -} - static void gve_free_xdp_rings(struct gve_priv *priv) { - int ntfy_idx, start_id; - int i; + struct gve_tx_alloc_rings_cfg cfg = {0}; + + gve_tx_get_curr_alloc_cfg(priv, &cfg); + cfg.start_idx = gve_xdp_tx_start_queue_id(priv); + cfg.num_rings = priv->num_xdp_queues; - start_id = gve_xdp_tx_start_queue_id(priv); if (priv->tx) { - for (i = start_id; i < start_id + priv->num_xdp_queues; i++) { - ntfy_idx = gve_tx_idx_to_ntfy(priv, i); - gve_remove_napi(priv, ntfy_idx); - } - gve_tx_free_rings(priv, start_id, priv->num_xdp_queues); + gve_tx_stop_rings(priv, cfg.start_idx, cfg.num_rings); + gve_tx_free_rings_gqi(priv, &cfg); } } -static void gve_free_rings(struct gve_priv *priv) +static void gve_queues_mem_free(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *tx_cfg, + struct gve_rx_alloc_rings_cfg *rx_cfg) { - int num_tx_queues = gve_num_tx_queues(priv); - int ntfy_idx; - int i; - - if (priv->tx) { - for (i = 0; i < num_tx_queues; i++) { - ntfy_idx = gve_tx_idx_to_ntfy(priv, i); - gve_remove_napi(priv, ntfy_idx); - } - gve_tx_free_rings(priv, 0, num_tx_queues); - kvfree(priv->tx); - priv->tx = NULL; - } - if (priv->rx) { - for (i = 0; i < priv->rx_cfg.num_queues; i++) { - ntfy_idx = gve_rx_idx_to_ntfy(priv, i); - gve_remove_napi(priv, ntfy_idx); - } - gve_rx_free_rings(priv); - kvfree(priv->rx); - priv->rx = NULL; + if (gve_is_gqi(priv)) { + gve_tx_free_rings_gqi(priv, tx_cfg); + gve_rx_free_rings_gqi(priv, rx_cfg); + } else { + gve_tx_free_rings_dqo(priv, tx_cfg); + gve_rx_free_rings_dqo(priv, rx_cfg); } } @@ -1004,44 +1044,41 @@ int gve_alloc_page(struct gve_priv *priv, struct device *dev, return 0; } -static int gve_alloc_queue_page_list(struct gve_priv *priv, u32 id, - int pages) +struct gve_queue_page_list *gve_alloc_queue_page_list(struct gve_priv *priv, + u32 id, int pages) { - struct gve_queue_page_list *qpl = &priv->qpls[id]; + struct gve_queue_page_list *qpl; int err; int i; - if (pages + priv->num_registered_pages > priv->max_registered_pages) { - netif_err(priv, drv, priv->dev, - "Reached max number of registered pages %llu > %llu\n", - pages + priv->num_registered_pages, - priv->max_registered_pages); - return -EINVAL; - } + qpl = kvzalloc(sizeof(*qpl), GFP_KERNEL); + if (!qpl) + return NULL; qpl->id = id; qpl->num_entries = 0; qpl->pages = kvcalloc(pages, sizeof(*qpl->pages), GFP_KERNEL); - /* caller handles clean up */ if (!qpl->pages) - return -ENOMEM; + goto abort; + qpl->page_buses = kvcalloc(pages, sizeof(*qpl->page_buses), GFP_KERNEL); - /* caller handles clean up */ if (!qpl->page_buses) - return -ENOMEM; + goto abort; for (i = 0; i < pages; i++) { err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i], &qpl->page_buses[i], gve_qpl_dma_dir(priv, id), GFP_KERNEL); - /* caller handles clean up */ if (err) - return -ENOMEM; + goto abort; qpl->num_entries++; } - priv->num_registered_pages += pages; - return 0; + return qpl; + +abort: + gve_free_queue_page_list(priv, qpl, id); + return NULL; } void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma, @@ -1053,13 +1090,16 @@ void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma, put_page(page); } -static void gve_free_queue_page_list(struct gve_priv *priv, u32 id) +void gve_free_queue_page_list(struct gve_priv *priv, + struct gve_queue_page_list *qpl, + u32 id) { - struct gve_queue_page_list *qpl = &priv->qpls[id]; int i; - if (!qpl->pages) + if (!qpl) return; + if (!qpl->pages) + goto free_qpl; if (!qpl->page_buses) goto free_pages; @@ -1072,115 +1112,8 @@ static void gve_free_queue_page_list(struct gve_priv *priv, u32 id) free_pages: kvfree(qpl->pages); qpl->pages = NULL; - priv->num_registered_pages -= qpl->num_entries; -} - -static int gve_alloc_xdp_qpls(struct gve_priv *priv) -{ - int start_id; - int i, j; - int err; - - start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv)); - for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) { - err = gve_alloc_queue_page_list(priv, i, - priv->tx_pages_per_qpl); - if (err) - goto free_qpls; - } - - return 0; - -free_qpls: - for (j = start_id; j <= i; j++) - gve_free_queue_page_list(priv, j); - return err; -} - -static int gve_alloc_qpls(struct gve_priv *priv) -{ - int max_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues; - int page_count; - int start_id; - int i, j; - int err; - - if (!gve_is_qpl(priv)) - return 0; - - priv->qpls = kvcalloc(max_queues, sizeof(*priv->qpls), GFP_KERNEL); - if (!priv->qpls) - return -ENOMEM; - - start_id = gve_tx_start_qpl_id(priv); - page_count = priv->tx_pages_per_qpl; - for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) { - err = gve_alloc_queue_page_list(priv, i, - page_count); - if (err) - goto free_qpls; - } - - start_id = gve_rx_start_qpl_id(priv); - - /* For GQI_QPL number of pages allocated have 1:1 relationship with - * number of descriptors. For DQO, number of pages required are - * more than descriptors (because of out of order completions). - */ - page_count = priv->queue_format == GVE_GQI_QPL_FORMAT ? - priv->rx_data_slot_cnt : priv->rx_pages_per_qpl; - for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) { - err = gve_alloc_queue_page_list(priv, i, - page_count); - if (err) - goto free_qpls; - } - - priv->qpl_cfg.qpl_map_size = BITS_TO_LONGS(max_queues) * - sizeof(unsigned long) * BITS_PER_BYTE; - priv->qpl_cfg.qpl_id_map = kvcalloc(BITS_TO_LONGS(max_queues), - sizeof(unsigned long), GFP_KERNEL); - if (!priv->qpl_cfg.qpl_id_map) { - err = -ENOMEM; - goto free_qpls; - } - - return 0; - -free_qpls: - for (j = 0; j <= i; j++) - gve_free_queue_page_list(priv, j); - kvfree(priv->qpls); - priv->qpls = NULL; - return err; -} - -static void gve_free_xdp_qpls(struct gve_priv *priv) -{ - int start_id; - int i; - - start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv)); - for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) - gve_free_queue_page_list(priv, i); -} - -static void gve_free_qpls(struct gve_priv *priv) -{ - int max_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues; - int i; - - if (!priv->qpls) - return; - - kvfree(priv->qpl_cfg.qpl_id_map); - priv->qpl_cfg.qpl_id_map = NULL; - - for (i = 0; i < max_queues; i++) - gve_free_queue_page_list(priv, i); - - kvfree(priv->qpls); - priv->qpls = NULL; +free_qpl: + kvfree(qpl); } /* Use this to schedule a reset when the device is capable of continuing @@ -1278,58 +1211,127 @@ static void gve_unreg_xdp_info(struct gve_priv *priv) static void gve_drain_page_cache(struct gve_priv *priv) { - struct page_frag_cache *nc; int i; - for (i = 0; i < priv->rx_cfg.num_queues; i++) { - nc = &priv->rx[i].page_cache; - if (nc->va) { - __page_frag_cache_drain(virt_to_page(nc->va), - nc->pagecnt_bias); - nc->va = NULL; - } - } + for (i = 0; i < priv->rx_cfg.num_queues; i++) + page_frag_cache_drain(&priv->rx[i].page_cache); } -static int gve_open(struct net_device *dev) +static void gve_rx_get_curr_alloc_cfg(struct gve_priv *priv, + struct gve_rx_alloc_rings_cfg *cfg) { - struct gve_priv *priv = netdev_priv(dev); + cfg->qcfg = &priv->rx_cfg; + cfg->qcfg_tx = &priv->tx_cfg; + cfg->raw_addressing = !gve_is_qpl(priv); + cfg->enable_header_split = priv->header_split_enabled; + cfg->ring_size = priv->rx_desc_cnt; + cfg->packet_buffer_size = gve_is_gqi(priv) ? + GVE_DEFAULT_RX_BUFFER_SIZE : + priv->data_buffer_size_dqo; + cfg->rx = priv->rx; +} + +void gve_get_curr_alloc_cfgs(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, + struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) +{ + gve_tx_get_curr_alloc_cfg(priv, tx_alloc_cfg); + gve_rx_get_curr_alloc_cfg(priv, rx_alloc_cfg); +} + +static void gve_rx_start_ring(struct gve_priv *priv, int i) +{ + if (gve_is_gqi(priv)) + gve_rx_start_ring_gqi(priv, i); + else + gve_rx_start_ring_dqo(priv, i); +} + +static void gve_rx_start_rings(struct gve_priv *priv, int num_rings) +{ + int i; + + for (i = 0; i < num_rings; i++) + gve_rx_start_ring(priv, i); +} + +static void gve_rx_stop_ring(struct gve_priv *priv, int i) +{ + if (gve_is_gqi(priv)) + gve_rx_stop_ring_gqi(priv, i); + else + gve_rx_stop_ring_dqo(priv, i); +} + +static void gve_rx_stop_rings(struct gve_priv *priv, int num_rings) +{ + int i; + + if (!priv->rx) + return; + + for (i = 0; i < num_rings; i++) + gve_rx_stop_ring(priv, i); +} + +static void gve_queues_mem_remove(struct gve_priv *priv) +{ + struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; + struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; + + gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); + gve_queues_mem_free(priv, &tx_alloc_cfg, &rx_alloc_cfg); + priv->tx = NULL; + priv->rx = NULL; +} + +/* The passed-in queue memory is stored into priv and the queues are made live. + * No memory is allocated. Passed-in memory is freed on errors. + */ +static int gve_queues_start(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, + struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) +{ + struct net_device *dev = priv->dev; int err; + /* Record new resources into priv */ + priv->tx = tx_alloc_cfg->tx; + priv->rx = rx_alloc_cfg->rx; + + /* Record new configs into priv */ + priv->tx_cfg = *tx_alloc_cfg->qcfg; + priv->rx_cfg = *rx_alloc_cfg->qcfg; + priv->tx_desc_cnt = tx_alloc_cfg->ring_size; + priv->rx_desc_cnt = rx_alloc_cfg->ring_size; + if (priv->xdp_prog) priv->num_xdp_queues = priv->rx_cfg.num_queues; else priv->num_xdp_queues = 0; - err = gve_alloc_qpls(priv); - if (err) - return err; - - err = gve_alloc_rings(priv); - if (err) - goto free_qpls; + gve_tx_start_rings(priv, 0, tx_alloc_cfg->num_rings); + gve_rx_start_rings(priv, rx_alloc_cfg->qcfg->num_queues); + gve_init_sync_stats(priv); err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues); if (err) - goto free_rings; + goto stop_and_free_rings; err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues); if (err) - goto free_rings; + goto stop_and_free_rings; err = gve_reg_xdp_info(priv, dev); if (err) - goto free_rings; + goto stop_and_free_rings; err = gve_register_qpls(priv); if (err) goto reset; - if (!gve_is_gqi(priv)) { - /* Hard code this for now. This may be tuned in the future for - * performance. - */ - priv->data_buffer_size_dqo = GVE_DEFAULT_RX_BUFFER_SIZE; - } + priv->header_split_enabled = rx_alloc_cfg->enable_header_split; + priv->data_buffer_size_dqo = rx_alloc_cfg->packet_buffer_size; + err = gve_create_rings(priv); if (err) goto reset; @@ -1346,32 +1348,49 @@ static int gve_open(struct net_device *dev) priv->interface_up_cnt++; return 0; -free_rings: - gve_free_rings(priv); -free_qpls: - gve_free_qpls(priv); - return err; - reset: - /* This must have been called from a reset due to the rtnl lock - * so just return at this point. - */ if (gve_get_reset_in_progress(priv)) - return err; - /* Otherwise reset before returning */ + goto stop_and_free_rings; gve_reset_and_teardown(priv, true); /* if this fails there is nothing we can do so just ignore the return */ gve_reset_recovery(priv, false); /* return the original error */ return err; +stop_and_free_rings: + gve_tx_stop_rings(priv, 0, gve_num_tx_queues(priv)); + gve_rx_stop_rings(priv, priv->rx_cfg.num_queues); + gve_queues_mem_remove(priv); + return err; } -static int gve_close(struct net_device *dev) +static int gve_open(struct net_device *dev) { + struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; + struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; struct gve_priv *priv = netdev_priv(dev); int err; - netif_carrier_off(dev); + gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); + + err = gve_queues_mem_alloc(priv, &tx_alloc_cfg, &rx_alloc_cfg); + if (err) + return err; + + /* No need to free on error: ownership of resources is lost after + * calling gve_queues_start. + */ + err = gve_queues_start(priv, &tx_alloc_cfg, &rx_alloc_cfg); + if (err) + return err; + + return 0; +} + +static int gve_queues_stop(struct gve_priv *priv) +{ + int err; + + netif_carrier_off(priv->dev); if (gve_get_device_rings_ok(priv)) { gve_turndown(priv); gve_drain_page_cache(priv); @@ -1386,8 +1405,10 @@ static int gve_close(struct net_device *dev) del_timer_sync(&priv->stats_report_timer); gve_unreg_xdp_info(priv); - gve_free_rings(priv); - gve_free_qpls(priv); + + gve_tx_stop_rings(priv, 0, gve_num_tx_queues(priv)); + gve_rx_stop_rings(priv, priv->rx_cfg.num_queues); + priv->interface_down_cnt++; return 0; @@ -1402,6 +1423,19 @@ err: return gve_reset_recovery(priv, false); } +static int gve_close(struct net_device *dev) +{ + struct gve_priv *priv = netdev_priv(dev); + int err; + + err = gve_queues_stop(priv); + if (err) + return err; + + gve_queues_mem_remove(priv); + return 0; +} + static int gve_remove_xdp_queues(struct gve_priv *priv) { int err; @@ -1416,7 +1450,7 @@ static int gve_remove_xdp_queues(struct gve_priv *priv) gve_unreg_xdp_info(priv); gve_free_xdp_rings(priv); - gve_free_xdp_qpls(priv); + priv->num_xdp_queues = 0; return 0; } @@ -1425,15 +1459,11 @@ static int gve_add_xdp_queues(struct gve_priv *priv) { int err; - priv->num_xdp_queues = priv->tx_cfg.num_queues; - - err = gve_alloc_xdp_qpls(priv); - if (err) - goto err; + priv->num_xdp_queues = priv->rx_cfg.num_queues; err = gve_alloc_xdp_rings(priv); if (err) - goto free_xdp_qpls; + goto err; err = gve_reg_xdp_info(priv, priv->dev); if (err) @@ -1451,8 +1481,6 @@ static int gve_add_xdp_queues(struct gve_priv *priv) free_xdp_rings: gve_free_xdp_rings(priv); -free_xdp_qpls: - gve_free_xdp_qpls(priv); err: priv->num_xdp_queues = 0; return err; @@ -1702,42 +1730,69 @@ static int gve_xdp(struct net_device *dev, struct netdev_bpf *xdp) } } +int gve_adjust_config(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, + struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) +{ + int err; + + /* Allocate resources for the new confiugration */ + err = gve_queues_mem_alloc(priv, tx_alloc_cfg, rx_alloc_cfg); + if (err) { + netif_err(priv, drv, priv->dev, + "Adjust config failed to alloc new queues"); + return err; + } + + /* Teardown the device and free existing resources */ + err = gve_close(priv->dev); + if (err) { + netif_err(priv, drv, priv->dev, + "Adjust config failed to close old queues"); + gve_queues_mem_free(priv, tx_alloc_cfg, rx_alloc_cfg); + return err; + } + + /* Bring the device back up again with the new resources. */ + err = gve_queues_start(priv, tx_alloc_cfg, rx_alloc_cfg); + if (err) { + netif_err(priv, drv, priv->dev, + "Adjust config failed to start new queues, !!! DISABLING ALL QUEUES !!!\n"); + /* No need to free on error: ownership of resources is lost after + * calling gve_queues_start. + */ + gve_turndown(priv); + return err; + } + + return 0; +} + int gve_adjust_queues(struct gve_priv *priv, struct gve_queue_config new_rx_config, struct gve_queue_config new_tx_config) { + struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; + struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; int err; - if (netif_carrier_ok(priv->dev)) { - /* To make this process as simple as possible we teardown the - * device, set the new configuration, and then bring the device - * up again. - */ - err = gve_close(priv->dev); - /* we have already tried to reset in close, - * just fail at this point - */ - if (err) - return err; - priv->tx_cfg = new_tx_config; - priv->rx_cfg = new_rx_config; + gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); - err = gve_open(priv->dev); - if (err) - goto err; + /* Relay the new config from ethtool */ + tx_alloc_cfg.qcfg = &new_tx_config; + rx_alloc_cfg.qcfg_tx = &new_tx_config; + rx_alloc_cfg.qcfg = &new_rx_config; + tx_alloc_cfg.num_rings = new_tx_config.num_queues; - return 0; + if (netif_carrier_ok(priv->dev)) { + err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); + return err; } /* Set the config for the next up. */ priv->tx_cfg = new_tx_config; priv->rx_cfg = new_rx_config; return 0; -err: - netif_err(priv, drv, priv->dev, - "Adjust queues failed! !!! DISABLING ALL QUEUES !!!\n"); - gve_turndown(priv); - return err; } static void gve_turndown(struct gve_priv *priv) @@ -1755,12 +1810,16 @@ static void gve_turndown(struct gve_priv *priv) int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; + if (!gve_tx_was_added_to_block(priv, idx)) + continue; napi_disable(&block->napi); } for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; + if (!gve_rx_was_added_to_block(priv, idx)) + continue; napi_disable(&block->napi); } @@ -1783,6 +1842,9 @@ static void gve_turnup(struct gve_priv *priv) int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; + if (!gve_tx_was_added_to_block(priv, idx)) + continue; + napi_enable(&block->napi); if (gve_is_gqi(priv)) { iowrite32be(0, gve_irq_doorbell(priv, block)); @@ -1790,11 +1852,21 @@ static void gve_turnup(struct gve_priv *priv) gve_set_itr_coalesce_usecs_dqo(priv, block, priv->tx_coalesce_usecs); } + + /* Any descs written by the NIC before this barrier will be + * handled by the one-off napi schedule below. Whereas any + * descs after the barrier will generate interrupts. + */ + mb(); + napi_schedule(&block->napi); } for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; + if (!gve_rx_was_added_to_block(priv, idx)) + continue; + napi_enable(&block->napi); if (gve_is_gqi(priv)) { iowrite32be(0, gve_irq_doorbell(priv, block)); @@ -1802,11 +1874,27 @@ static void gve_turnup(struct gve_priv *priv) gve_set_itr_coalesce_usecs_dqo(priv, block, priv->rx_coalesce_usecs); } + + /* Any descs written by the NIC before this barrier will be + * handled by the one-off napi schedule below. Whereas any + * descs after the barrier will generate interrupts. + */ + mb(); + napi_schedule(&block->napi); } gve_set_napi_enabled(priv); } +static void gve_turnup_and_check_status(struct gve_priv *priv) +{ + u32 status; + + gve_turnup(priv); + status = ioread32be(&priv->reg_bar0->device_status); + gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); +} + static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct gve_notify_block *block; @@ -1853,40 +1941,77 @@ out: priv->tx_timeo_cnt++; } +u16 gve_get_pkt_buf_size(const struct gve_priv *priv, bool enable_hsplit) +{ + if (enable_hsplit && priv->max_rx_buffer_size >= GVE_MAX_RX_BUFFER_SIZE) + return GVE_MAX_RX_BUFFER_SIZE; + else + return GVE_DEFAULT_RX_BUFFER_SIZE; +} + +/* header-split is not supported on non-DQO_RDA yet even if device advertises it */ +bool gve_header_split_supported(const struct gve_priv *priv) +{ + return priv->header_buf_size && priv->queue_format == GVE_DQO_RDA_FORMAT; +} + +int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split) +{ + struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; + struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; + bool enable_hdr_split; + int err = 0; + + if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_UNKNOWN) + return 0; + + if (!gve_header_split_supported(priv)) { + dev_err(&priv->pdev->dev, "Header-split not supported\n"); + return -EOPNOTSUPP; + } + + if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_ENABLED) + enable_hdr_split = true; + else + enable_hdr_split = false; + + if (enable_hdr_split == priv->header_split_enabled) + return 0; + + gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); + + rx_alloc_cfg.enable_header_split = enable_hdr_split; + rx_alloc_cfg.packet_buffer_size = gve_get_pkt_buf_size(priv, enable_hdr_split); + + if (netif_running(priv->dev)) + err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); + return err; +} + static int gve_set_features(struct net_device *netdev, netdev_features_t features) { const netdev_features_t orig_features = netdev->features; + struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; + struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; struct gve_priv *priv = netdev_priv(netdev); int err; + gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); + if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) { netdev->features ^= NETIF_F_LRO; if (netif_carrier_ok(netdev)) { - /* To make this process as simple as possible we - * teardown the device, set the new configuration, - * and then bring the device up again. - */ - err = gve_close(netdev); - /* We have already tried to reset in close, just fail - * at this point. - */ - if (err) - goto err; - - err = gve_open(netdev); - if (err) - goto err; + err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); + if (err) { + /* Revert the change on error. */ + netdev->features = orig_features; + return err; + } } } return 0; -err: - /* Reverts the change on error. */ - netdev->features = orig_features; - netif_err(priv, drv, netdev, - "Set features failed! !!! DISABLING ALL QUEUES !!!\n"); - return err; } static const struct net_device_ops gve_netdev_ops = { @@ -2051,6 +2176,8 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) goto err; } + priv->num_registered_pages = 0; + if (skip_describe_device) goto setup_device; @@ -2080,7 +2207,6 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) if (!gve_is_gqi(priv)) netif_set_tso_max_size(priv->dev, GVE_DQO_TX_MAX); - priv->num_registered_pages = 0; priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK; /* gvnic has one Notification Block per MSI-x vector, except for the * management vector @@ -2215,6 +2341,140 @@ static void gve_write_version(u8 __iomem *driver_version_register) writeb('\n', driver_version_register); } +static int gve_rx_queue_stop(struct net_device *dev, void *per_q_mem, int idx) +{ + struct gve_priv *priv = netdev_priv(dev); + struct gve_rx_ring *gve_per_q_mem; + int err; + + if (!priv->rx) + return -EAGAIN; + + /* Destroying queue 0 while other queues exist is not supported in DQO */ + if (!gve_is_gqi(priv) && idx == 0) + return -ERANGE; + + /* Single-queue destruction requires quiescence on all queues */ + gve_turndown(priv); + + /* This failure will trigger a reset - no need to clean up */ + err = gve_adminq_destroy_single_rx_queue(priv, idx); + if (err) + return err; + + if (gve_is_qpl(priv)) { + /* This failure will trigger a reset - no need to clean up */ + err = gve_unregister_qpl(priv, gve_rx_get_qpl(priv, idx)); + if (err) + return err; + } + + gve_rx_stop_ring(priv, idx); + + /* Turn the unstopped queues back up */ + gve_turnup_and_check_status(priv); + + gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; + *gve_per_q_mem = priv->rx[idx]; + memset(&priv->rx[idx], 0, sizeof(priv->rx[idx])); + return 0; +} + +static void gve_rx_queue_mem_free(struct net_device *dev, void *per_q_mem) +{ + struct gve_priv *priv = netdev_priv(dev); + struct gve_rx_alloc_rings_cfg cfg = {0}; + struct gve_rx_ring *gve_per_q_mem; + + gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; + gve_rx_get_curr_alloc_cfg(priv, &cfg); + + if (gve_is_gqi(priv)) + gve_rx_free_ring_gqi(priv, gve_per_q_mem, &cfg); + else + gve_rx_free_ring_dqo(priv, gve_per_q_mem, &cfg); +} + +static int gve_rx_queue_mem_alloc(struct net_device *dev, void *per_q_mem, + int idx) +{ + struct gve_priv *priv = netdev_priv(dev); + struct gve_rx_alloc_rings_cfg cfg = {0}; + struct gve_rx_ring *gve_per_q_mem; + int err; + + if (!priv->rx) + return -EAGAIN; + + gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; + gve_rx_get_curr_alloc_cfg(priv, &cfg); + + if (gve_is_gqi(priv)) + err = gve_rx_alloc_ring_gqi(priv, &cfg, gve_per_q_mem, idx); + else + err = gve_rx_alloc_ring_dqo(priv, &cfg, gve_per_q_mem, idx); + + return err; +} + +static int gve_rx_queue_start(struct net_device *dev, void *per_q_mem, int idx) +{ + struct gve_priv *priv = netdev_priv(dev); + struct gve_rx_ring *gve_per_q_mem; + int err; + + if (!priv->rx) + return -EAGAIN; + + gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; + priv->rx[idx] = *gve_per_q_mem; + + /* Single-queue creation requires quiescence on all queues */ + gve_turndown(priv); + + gve_rx_start_ring(priv, idx); + + if (gve_is_qpl(priv)) { + /* This failure will trigger a reset - no need to clean up */ + err = gve_register_qpl(priv, gve_rx_get_qpl(priv, idx)); + if (err) + goto abort; + } + + /* This failure will trigger a reset - no need to clean up */ + err = gve_adminq_create_single_rx_queue(priv, idx); + if (err) + goto abort; + + if (gve_is_gqi(priv)) + gve_rx_write_doorbell(priv, &priv->rx[idx]); + else + gve_rx_post_buffers_dqo(&priv->rx[idx]); + + /* Turn the unstopped queues back up */ + gve_turnup_and_check_status(priv); + return 0; + +abort: + gve_rx_stop_ring(priv, idx); + + /* All failures in this func result in a reset, by clearing the struct + * at idx, we prevent a double free when that reset runs. The reset, + * which needs the rtnl lock, will not run till this func returns and + * its caller gives up the lock. + */ + memset(&priv->rx[idx], 0, sizeof(priv->rx[idx])); + return err; +} + +static const struct netdev_queue_mgmt_ops gve_queue_mgmt_ops = { + .ndo_queue_mem_size = sizeof(struct gve_rx_ring), + .ndo_queue_mem_alloc = gve_rx_queue_mem_alloc, + .ndo_queue_mem_free = gve_rx_queue_mem_free, + .ndo_queue_start = gve_rx_queue_start, + .ndo_queue_stop = gve_rx_queue_stop, +}; + static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { int max_tx_queues, max_rx_queues; @@ -2269,6 +2529,7 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_drvdata(pdev, dev); dev->ethtool_ops = &gve_ethtool_ops; dev->netdev_ops = &gve_netdev_ops; + dev->queue_mgmt_ops = &gve_queue_mgmt_ops; /* Set default and supported features. * @@ -2297,6 +2558,8 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) priv->service_task_flags = 0x0; priv->state_flags = 0x0; priv->ethtool_flags = 0x0; + priv->data_buffer_size_dqo = GVE_DEFAULT_RX_BUFFER_SIZE; + priv->max_rx_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE; gve_set_probe_in_progress(priv); priv->gve_wq = alloc_ordered_workqueue("gve", 0); diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c index 76615d47e055..acb73d4d0de6 100644 --- a/drivers/net/ethernet/google/gve/gve_rx.c +++ b/drivers/net/ethernet/google/gve/gve_rx.c @@ -23,11 +23,16 @@ static void gve_rx_free_buffer(struct device *dev, gve_free_page(dev, page_info->page, dma, DMA_FROM_DEVICE); } -static void gve_rx_unfill_pages(struct gve_priv *priv, struct gve_rx_ring *rx) +static void gve_rx_unfill_pages(struct gve_priv *priv, + struct gve_rx_ring *rx, + struct gve_rx_alloc_rings_cfg *cfg) { u32 slots = rx->mask + 1; int i; + if (!rx->data.page_info) + return; + if (rx->data.raw_addressing) { for (i = 0; i < slots; i++) gve_rx_free_buffer(&priv->pdev->dev, &rx->data.page_info[i], @@ -36,8 +41,6 @@ static void gve_rx_unfill_pages(struct gve_priv *priv, struct gve_rx_ring *rx) for (i = 0; i < slots; i++) page_ref_sub(rx->data.page_info[i].page, rx->data.page_info[i].pagecnt_bias - 1); - gve_unassign_qpl(priv, rx->data.qpl->id); - rx->data.qpl = NULL; for (i = 0; i < rx->qpl_copy_pool_mask + 1; i++) { page_ref_sub(rx->qpl_copy_pool[i].page, @@ -49,33 +52,92 @@ static void gve_rx_unfill_pages(struct gve_priv *priv, struct gve_rx_ring *rx) rx->data.page_info = NULL; } -static void gve_rx_free_ring(struct gve_priv *priv, int idx) +static void gve_rx_ctx_clear(struct gve_rx_ctx *ctx) +{ + ctx->skb_head = NULL; + ctx->skb_tail = NULL; + ctx->total_size = 0; + ctx->frag_cnt = 0; + ctx->drop_pkt = false; +} + +static void gve_rx_init_ring_state_gqi(struct gve_rx_ring *rx) +{ + rx->desc.seqno = 1; + rx->cnt = 0; + gve_rx_ctx_clear(&rx->ctx); +} + +static void gve_rx_reset_ring_gqi(struct gve_priv *priv, int idx) { struct gve_rx_ring *rx = &priv->rx[idx]; + const u32 slots = priv->rx_desc_cnt; + size_t size; + + /* Reset desc ring */ + if (rx->desc.desc_ring) { + size = slots * sizeof(rx->desc.desc_ring[0]); + memset(rx->desc.desc_ring, 0, size); + } + + /* Reset q_resources */ + if (rx->q_resources) + memset(rx->q_resources, 0, sizeof(*rx->q_resources)); + + gve_rx_init_ring_state_gqi(rx); +} + +void gve_rx_stop_ring_gqi(struct gve_priv *priv, int idx) +{ + int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); + + if (!gve_rx_was_added_to_block(priv, idx)) + return; + + gve_remove_napi(priv, ntfy_idx); + gve_rx_remove_from_block(priv, idx); + gve_rx_reset_ring_gqi(priv, idx); +} + +void gve_rx_free_ring_gqi(struct gve_priv *priv, struct gve_rx_ring *rx, + struct gve_rx_alloc_rings_cfg *cfg) +{ struct device *dev = &priv->pdev->dev; u32 slots = rx->mask + 1; + int idx = rx->q_num; size_t bytes; + u32 qpl_id; - gve_rx_remove_from_block(priv, idx); - - bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt; - dma_free_coherent(dev, bytes, rx->desc.desc_ring, rx->desc.bus); - rx->desc.desc_ring = NULL; + if (rx->desc.desc_ring) { + bytes = sizeof(struct gve_rx_desc) * cfg->ring_size; + dma_free_coherent(dev, bytes, rx->desc.desc_ring, rx->desc.bus); + rx->desc.desc_ring = NULL; + } - dma_free_coherent(dev, sizeof(*rx->q_resources), - rx->q_resources, rx->q_resources_bus); - rx->q_resources = NULL; + if (rx->q_resources) { + dma_free_coherent(dev, sizeof(*rx->q_resources), + rx->q_resources, rx->q_resources_bus); + rx->q_resources = NULL; + } - gve_rx_unfill_pages(priv, rx); + gve_rx_unfill_pages(priv, rx, cfg); - bytes = sizeof(*rx->data.data_ring) * slots; - dma_free_coherent(dev, bytes, rx->data.data_ring, - rx->data.data_bus); - rx->data.data_ring = NULL; + if (rx->data.data_ring) { + bytes = sizeof(*rx->data.data_ring) * slots; + dma_free_coherent(dev, bytes, rx->data.data_ring, + rx->data.data_bus); + rx->data.data_ring = NULL; + } kvfree(rx->qpl_copy_pool); rx->qpl_copy_pool = NULL; + if (rx->data.qpl) { + qpl_id = gve_get_rx_qpl_id(cfg->qcfg_tx, idx); + gve_free_queue_page_list(priv, rx->data.qpl, qpl_id); + rx->data.qpl = NULL; + } + netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx); } @@ -93,7 +155,8 @@ static void gve_setup_rx_buffer(struct gve_rx_slot_page_info *page_info, static int gve_rx_alloc_buffer(struct gve_priv *priv, struct device *dev, struct gve_rx_slot_page_info *page_info, - union gve_rx_data_slot *data_slot) + union gve_rx_data_slot *data_slot, + struct gve_rx_ring *rx) { struct page *page; dma_addr_t dma; @@ -101,14 +164,19 @@ static int gve_rx_alloc_buffer(struct gve_priv *priv, struct device *dev, err = gve_alloc_page(priv, dev, &page, &dma, DMA_FROM_DEVICE, GFP_ATOMIC); - if (err) + if (err) { + u64_stats_update_begin(&rx->statss); + rx->rx_buf_alloc_fail++; + u64_stats_update_end(&rx->statss); return err; + } gve_setup_rx_buffer(page_info, dma, page, &data_slot->addr); return 0; } -static int gve_prefill_rx_pages(struct gve_rx_ring *rx) +static int gve_rx_prefill_pages(struct gve_rx_ring *rx, + struct gve_rx_alloc_rings_cfg *cfg) { struct gve_priv *priv = rx->gve; u32 slots; @@ -126,14 +194,6 @@ static int gve_prefill_rx_pages(struct gve_rx_ring *rx) if (!rx->data.page_info) return -ENOMEM; - if (!rx->data.raw_addressing) { - rx->data.qpl = gve_assign_rx_qpl(priv, rx->q_num); - if (!rx->data.qpl) { - kvfree(rx->data.page_info); - rx->data.page_info = NULL; - return -ENOMEM; - } - } for (i = 0; i < slots; i++) { if (!rx->data.raw_addressing) { struct page *page = rx->data.qpl->pages[i]; @@ -143,8 +203,9 @@ static int gve_prefill_rx_pages(struct gve_rx_ring *rx) &rx->data.data_ring[i].qpl_offset); continue; } - err = gve_rx_alloc_buffer(priv, &priv->pdev->dev, &rx->data.page_info[i], - &rx->data.data_ring[i]); + err = gve_rx_alloc_buffer(priv, &priv->pdev->dev, + &rx->data.page_info[i], + &rx->data.data_ring[i], rx); if (err) goto alloc_err_rda; } @@ -185,9 +246,6 @@ alloc_err_qpl: page_ref_sub(rx->data.page_info[i].page, rx->data.page_info[i].pagecnt_bias - 1); - gve_unassign_qpl(priv, rx->data.qpl->id); - rx->data.qpl = NULL; - return err; alloc_err_rda: @@ -198,22 +256,25 @@ alloc_err_rda: return err; } -static void gve_rx_ctx_clear(struct gve_rx_ctx *ctx) +void gve_rx_start_ring_gqi(struct gve_priv *priv, int idx) { - ctx->skb_head = NULL; - ctx->skb_tail = NULL; - ctx->total_size = 0; - ctx->frag_cnt = 0; - ctx->drop_pkt = false; + int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); + + gve_rx_add_to_block(priv, idx); + gve_add_napi(priv, ntfy_idx, gve_napi_poll); } -static int gve_rx_alloc_ring(struct gve_priv *priv, int idx) +int gve_rx_alloc_ring_gqi(struct gve_priv *priv, + struct gve_rx_alloc_rings_cfg *cfg, + struct gve_rx_ring *rx, + int idx) { - struct gve_rx_ring *rx = &priv->rx[idx]; struct device *hdev = &priv->pdev->dev; + u32 slots = cfg->ring_size; int filled_pages; + int qpl_page_cnt; + u32 qpl_id = 0; size_t bytes; - u32 slots; int err; netif_dbg(priv, drv, priv->dev, "allocating rx ring\n"); @@ -223,9 +284,8 @@ static int gve_rx_alloc_ring(struct gve_priv *priv, int idx) rx->gve = priv; rx->q_num = idx; - slots = priv->rx_data_slot_cnt; rx->mask = slots - 1; - rx->data.raw_addressing = priv->queue_format == GVE_GQI_RDA_FORMAT; + rx->data.raw_addressing = cfg->raw_addressing; /* alloc rx data ring */ bytes = sizeof(*rx->data.data_ring) * slots; @@ -246,10 +306,22 @@ static int gve_rx_alloc_ring(struct gve_priv *priv, int idx) goto abort_with_slots; } - filled_pages = gve_prefill_rx_pages(rx); + if (!rx->data.raw_addressing) { + qpl_id = gve_get_rx_qpl_id(cfg->qcfg_tx, rx->q_num); + qpl_page_cnt = cfg->ring_size; + + rx->data.qpl = gve_alloc_queue_page_list(priv, qpl_id, + qpl_page_cnt); + if (!rx->data.qpl) { + err = -ENOMEM; + goto abort_with_copy_pool; + } + } + + filled_pages = gve_rx_prefill_pages(rx, cfg); if (filled_pages < 0) { err = -ENOMEM; - goto abort_with_copy_pool; + goto abort_with_qpl; } rx->fill_cnt = filled_pages; /* Ensure data ring slots (packet buffers) are visible. */ @@ -269,23 +341,18 @@ static int gve_rx_alloc_ring(struct gve_priv *priv, int idx) (unsigned long)rx->data.data_bus); /* alloc rx desc ring */ - bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt; + bytes = sizeof(struct gve_rx_desc) * cfg->ring_size; rx->desc.desc_ring = dma_alloc_coherent(hdev, bytes, &rx->desc.bus, GFP_KERNEL); if (!rx->desc.desc_ring) { err = -ENOMEM; goto abort_with_q_resources; } - rx->cnt = 0; - rx->db_threshold = priv->rx_desc_cnt / 2; - rx->desc.seqno = 1; + rx->db_threshold = slots / 2; + gve_rx_init_ring_state_gqi(rx); - /* Allocating half-page buffers allows page-flipping which is faster - * than copying or allocating new pages. - */ rx->packet_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE; gve_rx_ctx_clear(&rx->ctx); - gve_rx_add_to_block(priv, idx); return 0; @@ -294,7 +361,12 @@ abort_with_q_resources: rx->q_resources, rx->q_resources_bus); rx->q_resources = NULL; abort_filled: - gve_rx_unfill_pages(priv, rx); + gve_rx_unfill_pages(priv, rx, cfg); +abort_with_qpl: + if (!rx->data.raw_addressing) { + gve_free_queue_page_list(priv, rx->data.qpl, qpl_id); + rx->data.qpl = NULL; + } abort_with_copy_pool: kvfree(rx->qpl_copy_pool); rx->qpl_copy_pool = NULL; @@ -306,36 +378,52 @@ abort_with_slots: return err; } -int gve_rx_alloc_rings(struct gve_priv *priv) +int gve_rx_alloc_rings_gqi(struct gve_priv *priv, + struct gve_rx_alloc_rings_cfg *cfg) { + struct gve_rx_ring *rx; int err = 0; - int i; + int i, j; - for (i = 0; i < priv->rx_cfg.num_queues; i++) { - err = gve_rx_alloc_ring(priv, i); + rx = kvcalloc(cfg->qcfg->max_queues, sizeof(struct gve_rx_ring), + GFP_KERNEL); + if (!rx) + return -ENOMEM; + + for (i = 0; i < cfg->qcfg->num_queues; i++) { + err = gve_rx_alloc_ring_gqi(priv, cfg, &rx[i], i); if (err) { netif_err(priv, drv, priv->dev, "Failed to alloc rx ring=%d: err=%d\n", i, err); - break; + goto cleanup; } } - /* Unallocate if there was an error */ - if (err) { - int j; - for (j = 0; j < i; j++) - gve_rx_free_ring(priv, j); - } + cfg->rx = rx; + return 0; + +cleanup: + for (j = 0; j < i; j++) + gve_rx_free_ring_gqi(priv, &rx[j], cfg); + kvfree(rx); return err; } -void gve_rx_free_rings_gqi(struct gve_priv *priv) +void gve_rx_free_rings_gqi(struct gve_priv *priv, + struct gve_rx_alloc_rings_cfg *cfg) { + struct gve_rx_ring *rx = cfg->rx; int i; - for (i = 0; i < priv->rx_cfg.num_queues; i++) - gve_rx_free_ring(priv, i); + if (!rx) + return; + + for (i = 0; i < cfg->qcfg->num_queues; i++) + gve_rx_free_ring_gqi(priv, &rx[i], cfg); + + kvfree(rx); + cfg->rx = NULL; } void gve_rx_write_doorbell(struct gve_priv *priv, struct gve_rx_ring *rx) @@ -896,10 +984,7 @@ static bool gve_rx_refill_buffers(struct gve_priv *priv, struct gve_rx_ring *rx) gve_rx_free_buffer(dev, page_info, data_slot); page_info->page = NULL; if (gve_rx_alloc_buffer(priv, dev, page_info, - data_slot)) { - u64_stats_update_begin(&rx->statss); - rx->rx_buf_alloc_fail++; - u64_stats_update_end(&rx->statss); + data_slot, rx)) { break; } } diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c index f281e42a7ef9..c1c912de59c7 100644 --- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c @@ -178,7 +178,7 @@ static int gve_alloc_page_dqo(struct gve_rx_ring *rx, return err; } else { idx = rx->dqo.next_qpl_page_idx; - if (idx >= priv->rx_pages_per_qpl) { + if (idx >= gve_get_rx_pages_per_qpl_dqo(priv->rx_desc_cnt)) { net_err_ratelimited("%s: Out of QPL pages\n", priv->dev->name); return -ENOMEM; @@ -199,20 +199,120 @@ static int gve_alloc_page_dqo(struct gve_rx_ring *rx, return 0; } -static void gve_rx_free_ring_dqo(struct gve_priv *priv, int idx) +static void gve_rx_free_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx) +{ + struct device *hdev = &priv->pdev->dev; + int buf_count = rx->dqo.bufq.mask + 1; + + if (rx->dqo.hdr_bufs.data) { + dma_free_coherent(hdev, priv->header_buf_size * buf_count, + rx->dqo.hdr_bufs.data, rx->dqo.hdr_bufs.addr); + rx->dqo.hdr_bufs.data = NULL; + } +} + +static void gve_rx_init_ring_state_dqo(struct gve_rx_ring *rx, + const u32 buffer_queue_slots, + const u32 completion_queue_slots) +{ + int i; + + /* Set buffer queue state */ + rx->dqo.bufq.mask = buffer_queue_slots - 1; + rx->dqo.bufq.head = 0; + rx->dqo.bufq.tail = 0; + + /* Set completion queue state */ + rx->dqo.complq.num_free_slots = completion_queue_slots; + rx->dqo.complq.mask = completion_queue_slots - 1; + rx->dqo.complq.cur_gen_bit = 0; + rx->dqo.complq.head = 0; + + /* Set RX SKB context */ + rx->ctx.skb_head = NULL; + rx->ctx.skb_tail = NULL; + + /* Set up linked list of buffer IDs */ + if (rx->dqo.buf_states) { + for (i = 0; i < rx->dqo.num_buf_states - 1; i++) + rx->dqo.buf_states[i].next = i + 1; + rx->dqo.buf_states[rx->dqo.num_buf_states - 1].next = -1; + } + + rx->dqo.free_buf_states = 0; + rx->dqo.recycled_buf_states.head = -1; + rx->dqo.recycled_buf_states.tail = -1; + rx->dqo.used_buf_states.head = -1; + rx->dqo.used_buf_states.tail = -1; +} + +static void gve_rx_reset_ring_dqo(struct gve_priv *priv, int idx) { struct gve_rx_ring *rx = &priv->rx[idx]; + size_t size; + int i; + + const u32 buffer_queue_slots = priv->rx_desc_cnt; + const u32 completion_queue_slots = priv->rx_desc_cnt; + + /* Reset buffer queue */ + if (rx->dqo.bufq.desc_ring) { + size = sizeof(rx->dqo.bufq.desc_ring[0]) * + buffer_queue_slots; + memset(rx->dqo.bufq.desc_ring, 0, size); + } + + /* Reset completion queue */ + if (rx->dqo.complq.desc_ring) { + size = sizeof(rx->dqo.complq.desc_ring[0]) * + completion_queue_slots; + memset(rx->dqo.complq.desc_ring, 0, size); + } + + /* Reset q_resources */ + if (rx->q_resources) + memset(rx->q_resources, 0, sizeof(*rx->q_resources)); + + /* Reset buf states */ + if (rx->dqo.buf_states) { + for (i = 0; i < rx->dqo.num_buf_states; i++) { + struct gve_rx_buf_state_dqo *bs = &rx->dqo.buf_states[i]; + + if (bs->page_info.page) + gve_free_page_dqo(priv, bs, !rx->dqo.qpl); + } + } + + gve_rx_init_ring_state_dqo(rx, buffer_queue_slots, + completion_queue_slots); +} + +void gve_rx_stop_ring_dqo(struct gve_priv *priv, int idx) +{ + int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); + + if (!gve_rx_was_added_to_block(priv, idx)) + return; + + gve_remove_napi(priv, ntfy_idx); + gve_rx_remove_from_block(priv, idx); + gve_rx_reset_ring_dqo(priv, idx); +} + +void gve_rx_free_ring_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, + struct gve_rx_alloc_rings_cfg *cfg) +{ struct device *hdev = &priv->pdev->dev; size_t completion_queue_slots; size_t buffer_queue_slots; + int idx = rx->q_num; size_t size; + u32 qpl_id; int i; completion_queue_slots = rx->dqo.complq.mask + 1; buffer_queue_slots = rx->dqo.bufq.mask + 1; - gve_rx_remove_from_block(priv, idx); - if (rx->q_resources) { dma_free_coherent(hdev, sizeof(*rx->q_resources), rx->q_resources, rx->q_resources_bus); @@ -225,8 +325,10 @@ static void gve_rx_free_ring_dqo(struct gve_priv *priv, int idx) if (bs->page_info.page) gve_free_page_dqo(priv, bs, !rx->dqo.qpl); } + if (rx->dqo.qpl) { - gve_unassign_qpl(priv, rx->dqo.qpl->id); + qpl_id = gve_get_rx_qpl_id(cfg->qcfg_tx, rx->q_num); + gve_free_queue_page_list(priv, rx->dqo.qpl, qpl_id); rx->dqo.qpl = NULL; } @@ -248,50 +350,64 @@ static void gve_rx_free_ring_dqo(struct gve_priv *priv, int idx) kvfree(rx->dqo.buf_states); rx->dqo.buf_states = NULL; + gve_rx_free_hdr_bufs(priv, rx); + netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx); } -static int gve_rx_alloc_ring_dqo(struct gve_priv *priv, int idx) +static int gve_rx_alloc_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx, + const u32 buf_count) { - struct gve_rx_ring *rx = &priv->rx[idx]; struct device *hdev = &priv->pdev->dev; + + rx->dqo.hdr_bufs.data = dma_alloc_coherent(hdev, priv->header_buf_size * buf_count, + &rx->dqo.hdr_bufs.addr, GFP_KERNEL); + if (!rx->dqo.hdr_bufs.data) + return -ENOMEM; + + return 0; +} + +void gve_rx_start_ring_dqo(struct gve_priv *priv, int idx) +{ + int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); + + gve_rx_add_to_block(priv, idx); + gve_add_napi(priv, ntfy_idx, gve_napi_poll_dqo); +} + +int gve_rx_alloc_ring_dqo(struct gve_priv *priv, + struct gve_rx_alloc_rings_cfg *cfg, + struct gve_rx_ring *rx, + int idx) +{ + struct device *hdev = &priv->pdev->dev; + int qpl_page_cnt; size_t size; - int i; + u32 qpl_id; - const u32 buffer_queue_slots = - priv->queue_format == GVE_DQO_RDA_FORMAT ? - priv->options_dqo_rda.rx_buff_ring_entries : priv->rx_desc_cnt; - const u32 completion_queue_slots = priv->rx_desc_cnt; + const u32 buffer_queue_slots = cfg->ring_size; + const u32 completion_queue_slots = cfg->ring_size; netif_dbg(priv, drv, priv->dev, "allocating rx ring DQO\n"); memset(rx, 0, sizeof(*rx)); rx->gve = priv; rx->q_num = idx; - rx->dqo.bufq.mask = buffer_queue_slots - 1; - rx->dqo.complq.num_free_slots = completion_queue_slots; - rx->dqo.complq.mask = completion_queue_slots - 1; - rx->ctx.skb_head = NULL; - rx->ctx.skb_tail = NULL; - rx->dqo.num_buf_states = priv->queue_format == GVE_DQO_RDA_FORMAT ? + rx->dqo.num_buf_states = cfg->raw_addressing ? min_t(s16, S16_MAX, buffer_queue_slots * 4) : - priv->rx_pages_per_qpl; + gve_get_rx_pages_per_qpl_dqo(cfg->ring_size); rx->dqo.buf_states = kvcalloc(rx->dqo.num_buf_states, sizeof(rx->dqo.buf_states[0]), GFP_KERNEL); if (!rx->dqo.buf_states) return -ENOMEM; - /* Set up linked list of buffer IDs */ - for (i = 0; i < rx->dqo.num_buf_states - 1; i++) - rx->dqo.buf_states[i].next = i + 1; - - rx->dqo.buf_states[rx->dqo.num_buf_states - 1].next = -1; - rx->dqo.recycled_buf_states.head = -1; - rx->dqo.recycled_buf_states.tail = -1; - rx->dqo.used_buf_states.head = -1; - rx->dqo.used_buf_states.tail = -1; + /* Allocate header buffers for header-split */ + if (cfg->enable_header_split) + if (gve_rx_alloc_hdr_bufs(priv, rx, buffer_queue_slots)) + goto err; /* Allocate RX completion queue */ size = sizeof(rx->dqo.complq.desc_ring[0]) * @@ -308,8 +424,12 @@ static int gve_rx_alloc_ring_dqo(struct gve_priv *priv, int idx) if (!rx->dqo.bufq.desc_ring) goto err; - if (priv->queue_format != GVE_DQO_RDA_FORMAT) { - rx->dqo.qpl = gve_assign_rx_qpl(priv, rx->q_num); + if (!cfg->raw_addressing) { + qpl_id = gve_get_rx_qpl_id(cfg->qcfg_tx, rx->q_num); + qpl_page_cnt = gve_get_rx_pages_per_qpl_dqo(cfg->ring_size); + + rx->dqo.qpl = gve_alloc_queue_page_list(priv, qpl_id, + qpl_page_cnt); if (!rx->dqo.qpl) goto err; rx->dqo.next_qpl_page_idx = 0; @@ -320,12 +440,13 @@ static int gve_rx_alloc_ring_dqo(struct gve_priv *priv, int idx) if (!rx->q_resources) goto err; - gve_rx_add_to_block(priv, idx); + gve_rx_init_ring_state_dqo(rx, buffer_queue_slots, + completion_queue_slots); return 0; err: - gve_rx_free_ring_dqo(priv, idx); + gve_rx_free_ring_dqo(priv, rx, cfg); return -ENOMEM; } @@ -337,13 +458,20 @@ void gve_rx_write_doorbell_dqo(const struct gve_priv *priv, int queue_idx) iowrite32(rx->dqo.bufq.tail, &priv->db_bar2[index]); } -int gve_rx_alloc_rings_dqo(struct gve_priv *priv) +int gve_rx_alloc_rings_dqo(struct gve_priv *priv, + struct gve_rx_alloc_rings_cfg *cfg) { - int err = 0; + struct gve_rx_ring *rx; + int err; int i; - for (i = 0; i < priv->rx_cfg.num_queues; i++) { - err = gve_rx_alloc_ring_dqo(priv, i); + rx = kvcalloc(cfg->qcfg->max_queues, sizeof(struct gve_rx_ring), + GFP_KERNEL); + if (!rx) + return -ENOMEM; + + for (i = 0; i < cfg->qcfg->num_queues; i++) { + err = gve_rx_alloc_ring_dqo(priv, cfg, &rx[i], i); if (err) { netif_err(priv, drv, priv->dev, "Failed to alloc rx ring=%d: err=%d\n", @@ -352,21 +480,30 @@ int gve_rx_alloc_rings_dqo(struct gve_priv *priv) } } + cfg->rx = rx; return 0; err: for (i--; i >= 0; i--) - gve_rx_free_ring_dqo(priv, i); - + gve_rx_free_ring_dqo(priv, &rx[i], cfg); + kvfree(rx); return err; } -void gve_rx_free_rings_dqo(struct gve_priv *priv) +void gve_rx_free_rings_dqo(struct gve_priv *priv, + struct gve_rx_alloc_rings_cfg *cfg) { + struct gve_rx_ring *rx = cfg->rx; int i; - for (i = 0; i < priv->rx_cfg.num_queues; i++) - gve_rx_free_ring_dqo(priv, i); + if (!rx) + return; + + for (i = 0; i < cfg->qcfg->num_queues; i++) + gve_rx_free_ring_dqo(priv, &rx[i], cfg); + + kvfree(rx); + cfg->rx = NULL; } void gve_rx_post_buffers_dqo(struct gve_rx_ring *rx) @@ -404,6 +541,10 @@ void gve_rx_post_buffers_dqo(struct gve_rx_ring *rx) desc->buf_id = cpu_to_le16(buf_state - rx->dqo.buf_states); desc->buf_addr = cpu_to_le64(buf_state->addr + buf_state->page_info.page_offset); + if (rx->dqo.hdr_bufs.data) + desc->header_buf_addr = + cpu_to_le64(rx->dqo.hdr_bufs.addr + + priv->header_buf_size * bufq->tail); bufq->tail = (bufq->tail + 1) & bufq->mask; complq->num_free_slots--; @@ -419,7 +560,7 @@ void gve_rx_post_buffers_dqo(struct gve_rx_ring *rx) static void gve_try_recycle_buf(struct gve_priv *priv, struct gve_rx_ring *rx, struct gve_rx_buf_state_dqo *buf_state) { - const int data_buffer_size = priv->data_buffer_size_dqo; + const u16 data_buffer_size = priv->data_buffer_size_dqo; int pagecount; /* Can't reuse if we only fit one buffer per page */ @@ -606,13 +747,16 @@ static int gve_rx_append_frags(struct napi_struct *napi, */ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, const struct gve_rx_compl_desc_dqo *compl_desc, - int queue_idx) + u32 desc_idx, int queue_idx) { const u16 buffer_id = le16_to_cpu(compl_desc->buf_id); + const bool hbo = compl_desc->header_buffer_overflow; const bool eop = compl_desc->end_of_packet != 0; + const bool hsplit = compl_desc->split_header; struct gve_rx_buf_state_dqo *buf_state; struct gve_priv *priv = rx->gve; u16 buf_len; + u16 hdr_len; if (unlikely(buffer_id >= rx->dqo.num_buf_states)) { net_err_ratelimited("%s: Invalid RX buffer_id=%u\n", @@ -633,12 +777,35 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, } buf_len = compl_desc->packet_len; + hdr_len = compl_desc->header_len; /* Page might have not been used for awhile and was likely last written * by a different thread. */ prefetch(buf_state->page_info.page); + /* Copy the header into the skb in the case of header split */ + if (hsplit) { + int unsplit = 0; + + if (hdr_len && !hbo) { + rx->ctx.skb_head = gve_rx_copy_data(priv->dev, napi, + rx->dqo.hdr_bufs.data + + desc_idx * priv->header_buf_size, + hdr_len); + if (unlikely(!rx->ctx.skb_head)) + goto error; + rx->ctx.skb_tail = rx->ctx.skb_head; + } else { + unsplit = 1; + } + u64_stats_update_begin(&rx->statss); + rx->rx_hsplit_pkt++; + rx->rx_hsplit_unsplit_pkt += unsplit; + rx->rx_hsplit_bytes += hdr_len; + u64_stats_update_end(&rx->statss); + } + /* Sync the portion of dma buffer for CPU to read. */ dma_sync_single_range_for_cpu(&priv->pdev->dev, buf_state->addr, buf_state->page_info.page_offset, @@ -781,7 +948,7 @@ int gve_rx_poll_dqo(struct gve_notify_block *block, int budget) /* Do not read data until we own the descriptor */ dma_rmb(); - err = gve_rx_dqo(napi, rx, compl_desc, rx->q_num); + err = gve_rx_dqo(napi, rx, compl_desc, complq->head, rx->q_num); if (err < 0) { gve_rx_free_skb(rx); u64_stats_update_begin(&rx->statss); diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c index 07ba124780df..24a64ec1073e 100644 --- a/drivers/net/ethernet/google/gve/gve_tx.c +++ b/drivers/net/ethernet/google/gve/gve_tx.c @@ -196,29 +196,40 @@ static int gve_clean_xdp_done(struct gve_priv *priv, struct gve_tx_ring *tx, static int gve_clean_tx_done(struct gve_priv *priv, struct gve_tx_ring *tx, u32 to_do, bool try_to_wake); -static void gve_tx_free_ring(struct gve_priv *priv, int idx) +void gve_tx_stop_ring_gqi(struct gve_priv *priv, int idx) { + int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); struct gve_tx_ring *tx = &priv->tx[idx]; + + if (!gve_tx_was_added_to_block(priv, idx)) + return; + + gve_remove_napi(priv, ntfy_idx); + gve_clean_tx_done(priv, tx, priv->tx_desc_cnt, false); + netdev_tx_reset_queue(tx->netdev_txq); + gve_tx_remove_from_block(priv, idx); +} + +static void gve_tx_free_ring_gqi(struct gve_priv *priv, struct gve_tx_ring *tx, + struct gve_tx_alloc_rings_cfg *cfg) +{ struct device *hdev = &priv->pdev->dev; + int idx = tx->q_num; size_t bytes; + u32 qpl_id; u32 slots; - gve_tx_remove_from_block(priv, idx); slots = tx->mask + 1; - if (tx->q_num < priv->tx_cfg.num_queues) { - gve_clean_tx_done(priv, tx, priv->tx_desc_cnt, false); - netdev_tx_reset_queue(tx->netdev_txq); - } else { - gve_clean_xdp_done(priv, tx, priv->tx_desc_cnt); - } - dma_free_coherent(hdev, sizeof(*tx->q_resources), tx->q_resources, tx->q_resources_bus); tx->q_resources = NULL; - if (!tx->raw_addressing) { - gve_tx_fifo_release(priv, &tx->tx_fifo); - gve_unassign_qpl(priv, tx->tx_fifo.qpl->id); + if (tx->tx_fifo.qpl) { + if (tx->tx_fifo.base) + gve_tx_fifo_release(priv, &tx->tx_fifo); + + qpl_id = gve_tx_qpl_id(priv, tx->q_num); + gve_free_queue_page_list(priv, tx->tx_fifo.qpl, qpl_id); tx->tx_fifo.qpl = NULL; } @@ -232,11 +243,25 @@ static void gve_tx_free_ring(struct gve_priv *priv, int idx) netif_dbg(priv, drv, priv->dev, "freed tx queue %d\n", idx); } -static int gve_tx_alloc_ring(struct gve_priv *priv, int idx) +void gve_tx_start_ring_gqi(struct gve_priv *priv, int idx) { + int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); struct gve_tx_ring *tx = &priv->tx[idx]; + + gve_tx_add_to_block(priv, idx); + + tx->netdev_txq = netdev_get_tx_queue(priv->dev, idx); + gve_add_napi(priv, ntfy_idx, gve_napi_poll); +} + +static int gve_tx_alloc_ring_gqi(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *cfg, + struct gve_tx_ring *tx, + int idx) +{ struct device *hdev = &priv->pdev->dev; - u32 slots = priv->tx_desc_cnt; + int qpl_page_cnt; + u32 qpl_id = 0; size_t bytes; /* Make sure everything is zeroed to start */ @@ -245,25 +270,30 @@ static int gve_tx_alloc_ring(struct gve_priv *priv, int idx) spin_lock_init(&tx->xdp_lock); tx->q_num = idx; - tx->mask = slots - 1; + tx->mask = cfg->ring_size - 1; /* alloc metadata */ - tx->info = vcalloc(slots, sizeof(*tx->info)); + tx->info = vcalloc(cfg->ring_size, sizeof(*tx->info)); if (!tx->info) return -ENOMEM; /* alloc tx queue */ - bytes = sizeof(*tx->desc) * slots; + bytes = sizeof(*tx->desc) * cfg->ring_size; tx->desc = dma_alloc_coherent(hdev, bytes, &tx->bus, GFP_KERNEL); if (!tx->desc) goto abort_with_info; - tx->raw_addressing = priv->queue_format == GVE_GQI_RDA_FORMAT; - tx->dev = &priv->pdev->dev; + tx->raw_addressing = cfg->raw_addressing; + tx->dev = hdev; if (!tx->raw_addressing) { - tx->tx_fifo.qpl = gve_assign_tx_qpl(priv, idx); + qpl_id = gve_tx_qpl_id(priv, tx->q_num); + qpl_page_cnt = priv->tx_pages_per_qpl; + + tx->tx_fifo.qpl = gve_alloc_queue_page_list(priv, qpl_id, + qpl_page_cnt); if (!tx->tx_fifo.qpl) goto abort_with_desc; + /* map Tx FIFO */ if (gve_tx_fifo_init(priv, &tx->tx_fifo)) goto abort_with_qpl; @@ -277,20 +307,16 @@ static int gve_tx_alloc_ring(struct gve_priv *priv, int idx) if (!tx->q_resources) goto abort_with_fifo; - netif_dbg(priv, drv, priv->dev, "tx[%d]->bus=%lx\n", idx, - (unsigned long)tx->bus); - if (idx < priv->tx_cfg.num_queues) - tx->netdev_txq = netdev_get_tx_queue(priv->dev, idx); - gve_tx_add_to_block(priv, idx); - return 0; abort_with_fifo: if (!tx->raw_addressing) gve_tx_fifo_release(priv, &tx->tx_fifo); abort_with_qpl: - if (!tx->raw_addressing) - gve_unassign_qpl(priv, tx->tx_fifo.qpl->id); + if (!tx->raw_addressing) { + gve_free_queue_page_list(priv, tx->tx_fifo.qpl, qpl_id); + tx->tx_fifo.qpl = NULL; + } abort_with_desc: dma_free_coherent(hdev, bytes, tx->desc, tx->bus); tx->desc = NULL; @@ -300,36 +326,67 @@ abort_with_info: return -ENOMEM; } -int gve_tx_alloc_rings(struct gve_priv *priv, int start_id, int num_rings) +int gve_tx_alloc_rings_gqi(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *cfg) { + struct gve_tx_ring *tx = cfg->tx; int err = 0; - int i; + int i, j; + + if (cfg->start_idx + cfg->num_rings > cfg->qcfg->max_queues) { + netif_err(priv, drv, priv->dev, + "Cannot alloc more than the max num of Tx rings\n"); + return -EINVAL; + } - for (i = start_id; i < start_id + num_rings; i++) { - err = gve_tx_alloc_ring(priv, i); + if (cfg->start_idx == 0) { + tx = kvcalloc(cfg->qcfg->max_queues, sizeof(struct gve_tx_ring), + GFP_KERNEL); + if (!tx) + return -ENOMEM; + } else if (!tx) { + netif_err(priv, drv, priv->dev, + "Cannot alloc tx rings from a nonzero start idx without tx array\n"); + return -EINVAL; + } + + for (i = cfg->start_idx; i < cfg->start_idx + cfg->num_rings; i++) { + err = gve_tx_alloc_ring_gqi(priv, cfg, &tx[i], i); if (err) { netif_err(priv, drv, priv->dev, "Failed to alloc tx ring=%d: err=%d\n", i, err); - break; + goto cleanup; } } - /* Unallocate if there was an error */ - if (err) { - int j; - for (j = start_id; j < i; j++) - gve_tx_free_ring(priv, j); - } + cfg->tx = tx; + return 0; + +cleanup: + for (j = 0; j < i; j++) + gve_tx_free_ring_gqi(priv, &tx[j], cfg); + if (cfg->start_idx == 0) + kvfree(tx); return err; } -void gve_tx_free_rings_gqi(struct gve_priv *priv, int start_id, int num_rings) +void gve_tx_free_rings_gqi(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *cfg) { + struct gve_tx_ring *tx = cfg->tx; int i; - for (i = start_id; i < start_id + num_rings; i++) - gve_tx_free_ring(priv, i); + if (!tx) + return; + + for (i = cfg->start_idx; i < cfg->start_idx + cfg->num_rings; i++) + gve_tx_free_ring_gqi(priv, &tx[i], cfg); + + if (cfg->start_idx == 0) { + kvfree(tx); + cfg->tx = NULL; + } } /* gve_tx_avail - Calculates the number of slots available in the ring diff --git a/drivers/net/ethernet/google/gve/gve_tx_dqo.c b/drivers/net/ethernet/google/gve/gve_tx_dqo.c index f59c4710f118..fe1b26a4d736 100644 --- a/drivers/net/ethernet/google/gve/gve_tx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_tx_dqo.c @@ -188,13 +188,28 @@ static void gve_tx_clean_pending_packets(struct gve_tx_ring *tx) } } -static void gve_tx_free_ring_dqo(struct gve_priv *priv, int idx) +void gve_tx_stop_ring_dqo(struct gve_priv *priv, int idx) { + int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); struct gve_tx_ring *tx = &priv->tx[idx]; - struct device *hdev = &priv->pdev->dev; - size_t bytes; + if (!gve_tx_was_added_to_block(priv, idx)) + return; + + gve_remove_napi(priv, ntfy_idx); + gve_clean_tx_done_dqo(priv, tx, /*napi=*/NULL); + netdev_tx_reset_queue(tx->netdev_txq); + gve_tx_clean_pending_packets(tx); gve_tx_remove_from_block(priv, idx); +} + +static void gve_tx_free_ring_dqo(struct gve_priv *priv, struct gve_tx_ring *tx, + struct gve_tx_alloc_rings_cfg *cfg) +{ + struct device *hdev = &priv->pdev->dev; + int idx = tx->q_num; + size_t bytes; + u32 qpl_id; if (tx->q_resources) { dma_free_coherent(hdev, sizeof(*tx->q_resources), @@ -223,7 +238,8 @@ static void gve_tx_free_ring_dqo(struct gve_priv *priv, int idx) tx->dqo.tx_qpl_buf_next = NULL; if (tx->dqo.qpl) { - gve_unassign_qpl(priv, tx->dqo.qpl->id); + qpl_id = gve_tx_qpl_id(priv, tx->q_num); + gve_free_queue_page_list(priv, tx->dqo.qpl, qpl_id); tx->dqo.qpl = NULL; } @@ -253,25 +269,37 @@ static int gve_tx_qpl_buf_init(struct gve_tx_ring *tx) return 0; } -static int gve_tx_alloc_ring_dqo(struct gve_priv *priv, int idx) +void gve_tx_start_ring_dqo(struct gve_priv *priv, int idx) { + int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); struct gve_tx_ring *tx = &priv->tx[idx]; + + gve_tx_add_to_block(priv, idx); + + tx->netdev_txq = netdev_get_tx_queue(priv->dev, idx); + gve_add_napi(priv, ntfy_idx, gve_napi_poll_dqo); +} + +static int gve_tx_alloc_ring_dqo(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *cfg, + struct gve_tx_ring *tx, + int idx) +{ struct device *hdev = &priv->pdev->dev; int num_pending_packets; + int qpl_page_cnt; size_t bytes; + u32 qpl_id; int i; memset(tx, 0, sizeof(*tx)); tx->q_num = idx; - tx->dev = &priv->pdev->dev; - tx->netdev_txq = netdev_get_tx_queue(priv->dev, idx); + tx->dev = hdev; atomic_set_release(&tx->dqo_compl.hw_tx_head, 0); /* Queue sizes must be a power of 2 */ - tx->mask = priv->tx_desc_cnt - 1; - tx->dqo.complq_mask = priv->queue_format == GVE_DQO_RDA_FORMAT ? - priv->options_dqo_rda.tx_comp_ring_entries - 1 : - tx->mask; + tx->mask = cfg->ring_size - 1; + tx->dqo.complq_mask = tx->mask; /* The max number of pending packets determines the maximum number of * descriptors which maybe written to the completion queue. @@ -327,8 +355,12 @@ static int gve_tx_alloc_ring_dqo(struct gve_priv *priv, int idx) if (!tx->q_resources) goto err; - if (gve_is_qpl(priv)) { - tx->dqo.qpl = gve_assign_tx_qpl(priv, idx); + if (!cfg->raw_addressing) { + qpl_id = gve_tx_qpl_id(priv, tx->q_num); + qpl_page_cnt = priv->tx_pages_per_qpl; + + tx->dqo.qpl = gve_alloc_queue_page_list(priv, qpl_id, + qpl_page_cnt); if (!tx->dqo.qpl) goto err; @@ -336,22 +368,39 @@ static int gve_tx_alloc_ring_dqo(struct gve_priv *priv, int idx) goto err; } - gve_tx_add_to_block(priv, idx); - return 0; err: - gve_tx_free_ring_dqo(priv, idx); + gve_tx_free_ring_dqo(priv, tx, cfg); return -ENOMEM; } -int gve_tx_alloc_rings_dqo(struct gve_priv *priv) +int gve_tx_alloc_rings_dqo(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *cfg) { + struct gve_tx_ring *tx = cfg->tx; int err = 0; - int i; + int i, j; + + if (cfg->start_idx + cfg->num_rings > cfg->qcfg->max_queues) { + netif_err(priv, drv, priv->dev, + "Cannot alloc more than the max num of Tx rings\n"); + return -EINVAL; + } - for (i = 0; i < priv->tx_cfg.num_queues; i++) { - err = gve_tx_alloc_ring_dqo(priv, i); + if (cfg->start_idx == 0) { + tx = kvcalloc(cfg->qcfg->max_queues, sizeof(struct gve_tx_ring), + GFP_KERNEL); + if (!tx) + return -ENOMEM; + } else if (!tx) { + netif_err(priv, drv, priv->dev, + "Cannot alloc tx rings from a nonzero start idx without tx array\n"); + return -EINVAL; + } + + for (i = cfg->start_idx; i < cfg->start_idx + cfg->num_rings; i++) { + err = gve_tx_alloc_ring_dqo(priv, cfg, &tx[i], i); if (err) { netif_err(priv, drv, priv->dev, "Failed to alloc tx ring=%d: err=%d\n", @@ -360,27 +409,32 @@ int gve_tx_alloc_rings_dqo(struct gve_priv *priv) } } + cfg->tx = tx; return 0; err: - for (i--; i >= 0; i--) - gve_tx_free_ring_dqo(priv, i); - + for (j = 0; j < i; j++) + gve_tx_free_ring_dqo(priv, &tx[j], cfg); + if (cfg->start_idx == 0) + kvfree(tx); return err; } -void gve_tx_free_rings_dqo(struct gve_priv *priv) +void gve_tx_free_rings_dqo(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *cfg) { + struct gve_tx_ring *tx = cfg->tx; int i; - for (i = 0; i < priv->tx_cfg.num_queues; i++) { - struct gve_tx_ring *tx = &priv->tx[i]; + if (!tx) + return; - gve_clean_tx_done_dqo(priv, tx, /*napi=*/NULL); - netdev_tx_reset_queue(tx->netdev_txq); - gve_tx_clean_pending_packets(tx); + for (i = cfg->start_idx; i < cfg->start_idx + cfg->num_rings; i++) + gve_tx_free_ring_dqo(priv, &tx[i], cfg); - gve_tx_free_ring_dqo(priv, i); + if (cfg->start_idx == 0) { + kvfree(tx); + cfg->tx = NULL; } } diff --git a/drivers/net/ethernet/google/gve/gve_utils.c b/drivers/net/ethernet/google/gve/gve_utils.c index 26e08d753270..2349750075a5 100644 --- a/drivers/net/ethernet/google/gve/gve_utils.c +++ b/drivers/net/ethernet/google/gve/gve_utils.c @@ -8,6 +8,14 @@ #include "gve_adminq.h" #include "gve_utils.h" +bool gve_tx_was_added_to_block(struct gve_priv *priv, int queue_idx) +{ + struct gve_notify_block *block = + &priv->ntfy_blocks[gve_tx_idx_to_ntfy(priv, queue_idx)]; + + return block->tx != NULL; +} + void gve_tx_remove_from_block(struct gve_priv *priv, int queue_idx) { struct gve_notify_block *block = @@ -30,6 +38,14 @@ void gve_tx_add_to_block(struct gve_priv *priv, int queue_idx) queue_idx); } +bool gve_rx_was_added_to_block(struct gve_priv *priv, int queue_idx) +{ + struct gve_notify_block *block = + &priv->ntfy_blocks[gve_rx_idx_to_ntfy(priv, queue_idx)]; + + return block->rx != NULL; +} + void gve_rx_remove_from_block(struct gve_priv *priv, int queue_idx) { struct gve_notify_block *block = @@ -48,11 +64,9 @@ void gve_rx_add_to_block(struct gve_priv *priv, int queue_idx) rx->ntfy_id = ntfy_idx; } -struct sk_buff *gve_rx_copy(struct net_device *dev, struct napi_struct *napi, - struct gve_rx_slot_page_info *page_info, u16 len) +struct sk_buff *gve_rx_copy_data(struct net_device *dev, struct napi_struct *napi, + u8 *data, u16 len) { - void *va = page_info->page_address + page_info->page_offset + - page_info->pad; struct sk_buff *skb; skb = napi_alloc_skb(napi, len); @@ -60,12 +74,21 @@ struct sk_buff *gve_rx_copy(struct net_device *dev, struct napi_struct *napi, return NULL; __skb_put(skb, len); - skb_copy_to_linear_data_offset(skb, 0, va, len); + skb_copy_to_linear_data_offset(skb, 0, data, len); skb->protocol = eth_type_trans(skb, dev); return skb; } +struct sk_buff *gve_rx_copy(struct net_device *dev, struct napi_struct *napi, + struct gve_rx_slot_page_info *page_info, u16 len) +{ + void *va = page_info->page_address + page_info->page_offset + + page_info->pad; + + return gve_rx_copy_data(dev, napi, va, len); +} + void gve_dec_pagecnt_bias(struct gve_rx_slot_page_info *page_info) { page_info->pagecnt_bias--; @@ -81,3 +104,18 @@ void gve_dec_pagecnt_bias(struct gve_rx_slot_page_info *page_info) page_ref_add(page_info->page, INT_MAX - pagecount); } } + +void gve_add_napi(struct gve_priv *priv, int ntfy_idx, + int (*gve_poll)(struct napi_struct *, int)) +{ + struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; + + netif_napi_add(priv->dev, &block->napi, gve_poll); +} + +void gve_remove_napi(struct gve_priv *priv, int ntfy_idx) +{ + struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; + + netif_napi_del(&block->napi); +} diff --git a/drivers/net/ethernet/google/gve/gve_utils.h b/drivers/net/ethernet/google/gve/gve_utils.h index 324fd98a6112..bf2e9a0adb36 100644 --- a/drivers/net/ethernet/google/gve/gve_utils.h +++ b/drivers/net/ethernet/google/gve/gve_utils.h @@ -11,17 +11,25 @@ #include "gve.h" +bool gve_tx_was_added_to_block(struct gve_priv *priv, int queue_idx); void gve_tx_remove_from_block(struct gve_priv *priv, int queue_idx); void gve_tx_add_to_block(struct gve_priv *priv, int queue_idx); +bool gve_rx_was_added_to_block(struct gve_priv *priv, int queue_idx); void gve_rx_remove_from_block(struct gve_priv *priv, int queue_idx); void gve_rx_add_to_block(struct gve_priv *priv, int queue_idx); +struct sk_buff *gve_rx_copy_data(struct net_device *dev, struct napi_struct *napi, + u8 *data, u16 len); + struct sk_buff *gve_rx_copy(struct net_device *dev, struct napi_struct *napi, struct gve_rx_slot_page_info *page_info, u16 len); /* Decrement pagecnt_bias. Set it back to INT_MAX if it reached zero. */ void gve_dec_pagecnt_bias(struct gve_rx_slot_page_info *page_info); +void gve_add_napi(struct gve_priv *priv, int ntfy_idx, + int (*gve_poll)(struct napi_struct *, int)); +void gve_remove_napi(struct gve_priv *priv, int ntfy_idx); #endif /* _GVE_UTILS_H */ |