diff options
Diffstat (limited to 'drivers/net/ethernet/google/gve/gve_main.c')
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_main.c | 1222 |
1 files changed, 658 insertions, 564 deletions
diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 166bd827a6d7..e1ffbd561fac 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: (GPL-2.0 OR MIT) /* Google virtual Ethernet (gve) driver * - * Copyright (C) 2015-2021 Google, Inc. + * Copyright (C) 2015-2024 Google LLC */ #include <linux/bpf.h> @@ -9,6 +9,7 @@ #include <linux/etherdevice.h> #include <linux/filter.h> #include <linux/interrupt.h> +#include <linux/irq.h> #include <linux/module.h> #include <linux/pci.h> #include <linux/sched.h> @@ -16,6 +17,7 @@ #include <linux/workqueue.h> #include <linux/utsname.h> #include <linux/version.h> +#include <net/netdev_queues.h> #include <net/sch_generic.h> #include <net/xdp_sock_drv.h> #include "gve.h" @@ -139,6 +141,86 @@ static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s) } } +static int gve_alloc_flow_rule_caches(struct gve_priv *priv) +{ + struct gve_flow_rules_cache *flow_rules_cache = &priv->flow_rules_cache; + int err = 0; + + if (!priv->max_flow_rules) + return 0; + + flow_rules_cache->rules_cache = + kvcalloc(GVE_FLOW_RULES_CACHE_SIZE, sizeof(*flow_rules_cache->rules_cache), + GFP_KERNEL); + if (!flow_rules_cache->rules_cache) { + dev_err(&priv->pdev->dev, "Cannot alloc flow rules cache\n"); + return -ENOMEM; + } + + flow_rules_cache->rule_ids_cache = + kvcalloc(GVE_FLOW_RULE_IDS_CACHE_SIZE, sizeof(*flow_rules_cache->rule_ids_cache), + GFP_KERNEL); + if (!flow_rules_cache->rule_ids_cache) { + dev_err(&priv->pdev->dev, "Cannot alloc flow rule ids cache\n"); + err = -ENOMEM; + goto free_rules_cache; + } + + return 0; + +free_rules_cache: + kvfree(flow_rules_cache->rules_cache); + flow_rules_cache->rules_cache = NULL; + return err; +} + +static void gve_free_flow_rule_caches(struct gve_priv *priv) +{ + struct gve_flow_rules_cache *flow_rules_cache = &priv->flow_rules_cache; + + kvfree(flow_rules_cache->rule_ids_cache); + flow_rules_cache->rule_ids_cache = NULL; + kvfree(flow_rules_cache->rules_cache); + flow_rules_cache->rules_cache = NULL; +} + +static int gve_alloc_rss_config_cache(struct gve_priv *priv) +{ + struct gve_rss_config *rss_config = &priv->rss_config; + + if (!priv->cache_rss_config) + return 0; + + rss_config->hash_key = kcalloc(priv->rss_key_size, + sizeof(rss_config->hash_key[0]), + GFP_KERNEL); + if (!rss_config->hash_key) + return -ENOMEM; + + rss_config->hash_lut = kcalloc(priv->rss_lut_size, + sizeof(rss_config->hash_lut[0]), + GFP_KERNEL); + if (!rss_config->hash_lut) + goto free_rss_key_cache; + + return 0; + +free_rss_key_cache: + kfree(rss_config->hash_key); + rss_config->hash_key = NULL; + return -ENOMEM; +} + +static void gve_free_rss_config_cache(struct gve_priv *priv) +{ + struct gve_rss_config *rss_config = &priv->rss_config; + + kfree(rss_config->hash_key); + kfree(rss_config->hash_lut); + + memset(rss_config, 0, sizeof(*rss_config)); +} + static int gve_alloc_counter_array(struct gve_priv *priv) { priv->counter_array = @@ -220,7 +302,7 @@ static void gve_free_stats_report(struct gve_priv *priv) if (!priv->stats_report) return; - del_timer_sync(&priv->stats_report_timer); + timer_delete_sync(&priv->stats_report_timer); dma_free_coherent(&priv->pdev->dev, priv->stats_report_len, priv->stats_report, priv->stats_report_bus); priv->stats_report = NULL; @@ -253,6 +335,18 @@ static irqreturn_t gve_intr_dqo(int irq, void *arg) return IRQ_HANDLED; } +static int gve_is_napi_on_home_cpu(struct gve_priv *priv, u32 irq) +{ + int cpu_curr = smp_processor_id(); + const struct cpumask *aff_mask; + + aff_mask = irq_get_effective_affinity_mask(irq); + if (unlikely(!aff_mask)) + return 1; + + return cpumask_test_cpu(cpu_curr, aff_mask); +} + int gve_napi_poll(struct napi_struct *napi, int budget) { struct gve_notify_block *block; @@ -276,6 +370,14 @@ int gve_napi_poll(struct napi_struct *napi, int budget) if (block->rx) { work_done = gve_rx_poll(block, budget); + + /* Poll XSK TX as part of RX NAPI. Setup re-poll based on max of + * TX and RX work done. + */ + if (priv->xdp_prog) + work_done = max_t(int, work_done, + gve_xsk_tx_poll(block, budget)); + reschedule |= work_done == budget; } @@ -322,8 +424,21 @@ int gve_napi_poll_dqo(struct napi_struct *napi, int budget) reschedule |= work_done == budget; } - if (reschedule) - return budget; + if (reschedule) { + /* Reschedule by returning budget only if already on the correct + * cpu. + */ + if (likely(gve_is_napi_on_home_cpu(priv, block->irq))) + return budget; + + /* If not on the cpu with which this queue's irq has affinity + * with, we avoid rescheduling napi and arm the irq instead so + * that napi gets rescheduled back eventually onto the right + * cpu. + */ + if (work_done == budget) + work_done--; + } if (likely(napi_complete_done(napi, work_done))) { /* Enable interrupts again. @@ -428,6 +543,7 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv) "Failed to receive msix vector %d\n", i); goto abort_with_some_ntfy_blocks; } + block->irq = priv->msix_vectors[msix_idx].vector; irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, get_cpu_mask(i % active_cpus)); block->irq_db_index = &priv->irq_db_indices[i].index; @@ -441,6 +557,7 @@ abort_with_some_ntfy_blocks: irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, NULL); free_irq(priv->msix_vectors[msix_idx].vector, block); + block->irq = 0; } kvfree(priv->ntfy_blocks); priv->ntfy_blocks = NULL; @@ -474,6 +591,7 @@ static void gve_free_notify_blocks(struct gve_priv *priv) irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, NULL); free_irq(priv->msix_vectors[msix_idx].vector, block); + block->irq = 0; } free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); kvfree(priv->ntfy_blocks); @@ -491,9 +609,15 @@ static int gve_setup_device_resources(struct gve_priv *priv) { int err; - err = gve_alloc_counter_array(priv); + err = gve_alloc_flow_rule_caches(priv); if (err) return err; + err = gve_alloc_rss_config_cache(priv); + if (err) + goto abort_with_flow_rule_caches; + err = gve_alloc_counter_array(priv); + if (err) + goto abort_with_rss_config_cache; err = gve_alloc_notify_blocks(priv); if (err) goto abort_with_counter; @@ -527,6 +651,12 @@ static int gve_setup_device_resources(struct gve_priv *priv) } } + err = gve_init_rss_config(priv, priv->rx_cfg.num_queues); + if (err) { + dev_err(&priv->pdev->dev, "Failed to init RSS config"); + goto abort_with_ptype_lut; + } + err = gve_adminq_report_stats(priv, priv->stats_report_len, priv->stats_report_bus, GVE_STATS_REPORT_TIMER_PERIOD); @@ -545,6 +675,10 @@ abort_with_ntfy_blocks: gve_free_notify_blocks(priv); abort_with_counter: gve_free_counter_array(priv); +abort_with_rss_config_cache: + gve_free_rss_config_cache(priv); +abort_with_flow_rule_caches: + gve_free_flow_rule_caches(priv); return err; } @@ -557,6 +691,12 @@ static void gve_teardown_device_resources(struct gve_priv *priv) /* Tell device its resources are being freed */ if (gve_get_device_resources_ok(priv)) { + err = gve_flow_rules_reset(priv); + if (err) { + dev_err(&priv->pdev->dev, + "Failed to reset flow rules: err=%d\n", err); + gve_trigger_reset(priv); + } /* detach the stats report */ err = gve_adminq_report_stats(priv, 0, 0x0, GVE_STATS_REPORT_TIMER_PERIOD); if (err) { @@ -576,43 +716,44 @@ static void gve_teardown_device_resources(struct gve_priv *priv) kvfree(priv->ptype_lut_dqo); priv->ptype_lut_dqo = NULL; + gve_free_flow_rule_caches(priv); + gve_free_rss_config_cache(priv); gve_free_counter_array(priv); gve_free_notify_blocks(priv); gve_free_stats_report(priv); gve_clear_device_resources_ok(priv); } -static int gve_unregister_qpl(struct gve_priv *priv, u32 i) +static int gve_unregister_qpl(struct gve_priv *priv, + struct gve_queue_page_list *qpl) { int err; - err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id); + if (!qpl) + return 0; + + err = gve_adminq_unregister_page_list(priv, qpl->id); if (err) { netif_err(priv, drv, priv->dev, "Failed to unregister queue page list %d\n", - priv->qpls[i].id); + qpl->id); return err; } - priv->num_registered_pages -= priv->qpls[i].num_entries; + priv->num_registered_pages -= qpl->num_entries; return 0; } -static int gve_register_qpl(struct gve_priv *priv, u32 i) +static int gve_register_qpl(struct gve_priv *priv, + struct gve_queue_page_list *qpl) { - int num_rx_qpls; int pages; int err; - /* Rx QPLs succeed Tx QPLs in the priv->qpls array. */ - num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv)); - if (i >= gve_rx_start_qpl_id(&priv->tx_cfg) + num_rx_qpls) { - netif_err(priv, drv, priv->dev, - "Cannot register nonexisting QPL at index %d\n", i); - return -EINVAL; - } + if (!qpl) + return 0; - pages = priv->qpls[i].num_entries; + pages = qpl->num_entries; if (pages + priv->num_registered_pages > priv->max_registered_pages) { netif_err(priv, drv, priv->dev, @@ -622,14 +763,11 @@ static int gve_register_qpl(struct gve_priv *priv, u32 i) return -EINVAL; } - err = gve_adminq_register_page_list(priv, &priv->qpls[i]); + err = gve_adminq_register_page_list(priv, qpl); if (err) { netif_err(priv, drv, priv->dev, "failed to register queue page list %d\n", - priv->qpls[i].id); - /* This failure will trigger a reset - no need to clean - * up - */ + qpl->id); return err; } @@ -637,43 +775,43 @@ static int gve_register_qpl(struct gve_priv *priv, u32 i) return 0; } -static int gve_register_xdp_qpls(struct gve_priv *priv) +static struct gve_queue_page_list *gve_tx_get_qpl(struct gve_priv *priv, int idx) { - int start_id; - int err; - int i; + struct gve_tx_ring *tx = &priv->tx[idx]; - start_id = gve_xdp_tx_start_queue_id(priv); - for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) { - err = gve_register_qpl(priv, i); - /* This failure will trigger a reset - no need to clean up */ - if (err) - return err; - } - return 0; + if (gve_is_gqi(priv)) + return tx->tx_fifo.qpl; + else + return tx->dqo.qpl; +} + +static struct gve_queue_page_list *gve_rx_get_qpl(struct gve_priv *priv, int idx) +{ + struct gve_rx_ring *rx = &priv->rx[idx]; + + if (gve_is_gqi(priv)) + return rx->data.qpl; + else + return rx->dqo.qpl; } static int gve_register_qpls(struct gve_priv *priv) { int num_tx_qpls, num_rx_qpls; - int start_id; int err; int i; - num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_num_xdp_qpls(priv), - gve_is_qpl(priv)); + num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_is_qpl(priv)); num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv)); for (i = 0; i < num_tx_qpls; i++) { - err = gve_register_qpl(priv, i); + err = gve_register_qpl(priv, gve_tx_get_qpl(priv, i)); if (err) return err; } - /* there might be a gap between the tx and rx qpl ids */ - start_id = gve_rx_start_qpl_id(&priv->tx_cfg); for (i = 0; i < num_rx_qpls; i++) { - err = gve_register_qpl(priv, start_id + i); + err = gve_register_qpl(priv, gve_rx_get_qpl(priv, i)); if (err) return err; } @@ -681,43 +819,24 @@ static int gve_register_qpls(struct gve_priv *priv) return 0; } -static int gve_unregister_xdp_qpls(struct gve_priv *priv) -{ - int start_id; - int err; - int i; - - start_id = gve_xdp_tx_start_queue_id(priv); - for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) { - err = gve_unregister_qpl(priv, i); - /* This failure will trigger a reset - no need to clean */ - if (err) - return err; - } - return 0; -} - static int gve_unregister_qpls(struct gve_priv *priv) { int num_tx_qpls, num_rx_qpls; - int start_id; int err; int i; - num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_num_xdp_qpls(priv), - gve_is_qpl(priv)); + num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_is_qpl(priv)); num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv)); for (i = 0; i < num_tx_qpls; i++) { - err = gve_unregister_qpl(priv, i); + err = gve_unregister_qpl(priv, gve_tx_get_qpl(priv, i)); /* This failure will trigger a reset - no need to clean */ if (err) return err; } - start_id = gve_rx_start_qpl_id(&priv->tx_cfg); for (i = 0; i < num_rx_qpls; i++) { - err = gve_unregister_qpl(priv, start_id + i); + err = gve_unregister_qpl(priv, gve_rx_get_qpl(priv, i)); /* This failure will trigger a reset - no need to clean */ if (err) return err; @@ -725,27 +844,6 @@ static int gve_unregister_qpls(struct gve_priv *priv) return 0; } -static int gve_create_xdp_rings(struct gve_priv *priv) -{ - int err; - - err = gve_adminq_create_tx_queues(priv, - gve_xdp_tx_start_queue_id(priv), - priv->num_xdp_queues); - if (err) { - netif_err(priv, drv, priv->dev, "failed to create %d XDP tx queues\n", - priv->num_xdp_queues); - /* This failure will trigger a reset - no need to clean - * up - */ - return err; - } - netif_dbg(priv, drv, priv->dev, "created %d XDP tx queues\n", - priv->num_xdp_queues); - - return 0; -} - static int gve_create_rings(struct gve_priv *priv) { int num_tx_queues = gve_num_tx_queues(priv); @@ -801,7 +899,7 @@ static void init_xdp_sync_stats(struct gve_priv *priv) int i; /* Init stats */ - for (i = start_id; i < start_id + priv->num_xdp_queues; i++) { + for (i = start_id; i < start_id + priv->tx_cfg.num_xdp_queues; i++) { int ntfy_idx = gve_tx_idx_to_ntfy(priv, i); u64_stats_init(&priv->tx[i].statss); @@ -828,22 +926,19 @@ static void gve_tx_get_curr_alloc_cfg(struct gve_priv *priv, { cfg->qcfg = &priv->tx_cfg; cfg->raw_addressing = !gve_is_qpl(priv); - cfg->qpls = priv->qpls; - cfg->qpl_cfg = &priv->qpl_cfg; cfg->ring_size = priv->tx_desc_cnt; - cfg->start_idx = 0; - cfg->num_rings = gve_num_tx_queues(priv); + cfg->num_xdp_rings = cfg->qcfg->num_xdp_queues; cfg->tx = priv->tx; } -static void gve_tx_stop_rings(struct gve_priv *priv, int start_id, int num_rings) +static void gve_tx_stop_rings(struct gve_priv *priv, int num_rings) { int i; if (!priv->tx) return; - for (i = start_id; i < start_id + num_rings; i++) { + for (i = 0; i < num_rings; i++) { if (gve_is_gqi(priv)) gve_tx_stop_ring_gqi(priv, i); else @@ -851,12 +946,11 @@ static void gve_tx_stop_rings(struct gve_priv *priv, int start_id, int num_rings } } -static void gve_tx_start_rings(struct gve_priv *priv, int start_id, - int num_rings) +static void gve_tx_start_rings(struct gve_priv *priv, int num_rings) { int i; - for (i = start_id; i < start_id + num_rings; i++) { + for (i = 0; i < num_rings; i++) { if (gve_is_gqi(priv)) gve_tx_start_ring_gqi(priv, i); else @@ -864,31 +958,9 @@ static void gve_tx_start_rings(struct gve_priv *priv, int start_id, } } -static int gve_alloc_xdp_rings(struct gve_priv *priv) -{ - struct gve_tx_alloc_rings_cfg cfg = {0}; - int err = 0; - - if (!priv->num_xdp_queues) - return 0; - - gve_tx_get_curr_alloc_cfg(priv, &cfg); - cfg.start_idx = gve_xdp_tx_start_queue_id(priv); - cfg.num_rings = priv->num_xdp_queues; - - err = gve_tx_alloc_rings_gqi(priv, &cfg); - if (err) - return err; - - gve_tx_start_rings(priv, cfg.start_idx, cfg.num_rings); - init_xdp_sync_stats(priv); - - return 0; -} - -static int gve_alloc_rings(struct gve_priv *priv, - struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, - struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) +static int gve_queues_mem_alloc(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, + struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) { int err; @@ -916,26 +988,6 @@ free_tx: return err; } -static int gve_destroy_xdp_rings(struct gve_priv *priv) -{ - int start_id; - int err; - - start_id = gve_xdp_tx_start_queue_id(priv); - err = gve_adminq_destroy_tx_queues(priv, - start_id, - priv->num_xdp_queues); - if (err) { - netif_err(priv, drv, priv->dev, - "failed to destroy XDP queues\n"); - /* This failure will trigger a reset - no need to clean up */ - return err; - } - netif_dbg(priv, drv, priv->dev, "destroyed XDP queues\n"); - - return 0; -} - static int gve_destroy_rings(struct gve_priv *priv) { int num_tx_queues = gve_num_tx_queues(priv); @@ -960,23 +1012,9 @@ static int gve_destroy_rings(struct gve_priv *priv) return 0; } -static void gve_free_xdp_rings(struct gve_priv *priv) -{ - struct gve_tx_alloc_rings_cfg cfg = {0}; - - gve_tx_get_curr_alloc_cfg(priv, &cfg); - cfg.start_idx = gve_xdp_tx_start_queue_id(priv); - cfg.num_rings = priv->num_xdp_queues; - - if (priv->tx) { - gve_tx_stop_rings(priv, cfg.start_idx, cfg.num_rings); - gve_tx_free_rings_gqi(priv, &cfg); - } -} - -static void gve_free_rings(struct gve_priv *priv, - struct gve_tx_alloc_rings_cfg *tx_cfg, - struct gve_rx_alloc_rings_cfg *rx_cfg) +static void gve_queues_mem_free(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *tx_cfg, + struct gve_rx_alloc_rings_cfg *rx_cfg) { if (gve_is_gqi(priv)) { gve_tx_free_rings_gqi(priv, tx_cfg); @@ -1005,35 +1043,41 @@ int gve_alloc_page(struct gve_priv *priv, struct device *dev, return 0; } -static int gve_alloc_queue_page_list(struct gve_priv *priv, - struct gve_queue_page_list *qpl, - u32 id, int pages) +struct gve_queue_page_list *gve_alloc_queue_page_list(struct gve_priv *priv, + u32 id, int pages) { + struct gve_queue_page_list *qpl; int err; int i; + qpl = kvzalloc(sizeof(*qpl), GFP_KERNEL); + if (!qpl) + return NULL; + qpl->id = id; qpl->num_entries = 0; qpl->pages = kvcalloc(pages, sizeof(*qpl->pages), GFP_KERNEL); - /* caller handles clean up */ if (!qpl->pages) - return -ENOMEM; + goto abort; + qpl->page_buses = kvcalloc(pages, sizeof(*qpl->page_buses), GFP_KERNEL); - /* caller handles clean up */ if (!qpl->page_buses) - return -ENOMEM; + goto abort; for (i = 0; i < pages; i++) { err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i], &qpl->page_buses[i], gve_qpl_dma_dir(priv, id), GFP_KERNEL); - /* caller handles clean up */ if (err) - return -ENOMEM; + goto abort; qpl->num_entries++; } - return 0; + return qpl; + +abort: + gve_free_queue_page_list(priv, qpl, id); + return NULL; } void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma, @@ -1045,14 +1089,16 @@ void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma, put_page(page); } -static void gve_free_queue_page_list(struct gve_priv *priv, - struct gve_queue_page_list *qpl, - int id) +void gve_free_queue_page_list(struct gve_priv *priv, + struct gve_queue_page_list *qpl, + u32 id) { int i; - if (!qpl->pages) + if (!qpl) return; + if (!qpl->pages) + goto free_qpl; if (!qpl->page_buses) goto free_pages; @@ -1065,120 +1111,8 @@ static void gve_free_queue_page_list(struct gve_priv *priv, free_pages: kvfree(qpl->pages); qpl->pages = NULL; -} - -static void gve_free_n_qpls(struct gve_priv *priv, - struct gve_queue_page_list *qpls, - int start_id, - int num_qpls) -{ - int i; - - for (i = start_id; i < start_id + num_qpls; i++) - gve_free_queue_page_list(priv, &qpls[i], i); -} - -static int gve_alloc_n_qpls(struct gve_priv *priv, - struct gve_queue_page_list *qpls, - int page_count, - int start_id, - int num_qpls) -{ - int err; - int i; - - for (i = start_id; i < start_id + num_qpls; i++) { - err = gve_alloc_queue_page_list(priv, &qpls[i], i, page_count); - if (err) - goto free_qpls; - } - - return 0; - -free_qpls: - /* Must include the failing QPL too for gve_alloc_queue_page_list fails - * without cleaning up. - */ - gve_free_n_qpls(priv, qpls, start_id, i - start_id + 1); - return err; -} - -static int gve_alloc_qpls(struct gve_priv *priv, - struct gve_qpls_alloc_cfg *cfg) -{ - int max_queues = cfg->tx_cfg->max_queues + cfg->rx_cfg->max_queues; - int rx_start_id, tx_num_qpls, rx_num_qpls; - struct gve_queue_page_list *qpls; - int page_count; - int err; - - if (cfg->raw_addressing) - return 0; - - qpls = kvcalloc(max_queues, sizeof(*qpls), GFP_KERNEL); - if (!qpls) - return -ENOMEM; - - cfg->qpl_cfg->qpl_map_size = BITS_TO_LONGS(max_queues) * - sizeof(unsigned long) * BITS_PER_BYTE; - cfg->qpl_cfg->qpl_id_map = kvcalloc(BITS_TO_LONGS(max_queues), - sizeof(unsigned long), GFP_KERNEL); - if (!cfg->qpl_cfg->qpl_id_map) { - err = -ENOMEM; - goto free_qpl_array; - } - - /* Allocate TX QPLs */ - page_count = priv->tx_pages_per_qpl; - tx_num_qpls = gve_num_tx_qpls(cfg->tx_cfg, cfg->num_xdp_queues, - gve_is_qpl(priv)); - err = gve_alloc_n_qpls(priv, qpls, page_count, 0, tx_num_qpls); - if (err) - goto free_qpl_map; - - /* Allocate RX QPLs */ - rx_start_id = gve_rx_start_qpl_id(cfg->tx_cfg); - /* For GQI_QPL number of pages allocated have 1:1 relationship with - * number of descriptors. For DQO, number of pages required are - * more than descriptors (because of out of order completions). - */ - page_count = cfg->is_gqi ? priv->rx_data_slot_cnt : priv->rx_pages_per_qpl; - rx_num_qpls = gve_num_rx_qpls(cfg->rx_cfg, gve_is_qpl(priv)); - err = gve_alloc_n_qpls(priv, qpls, page_count, rx_start_id, rx_num_qpls); - if (err) - goto free_tx_qpls; - - cfg->qpls = qpls; - return 0; - -free_tx_qpls: - gve_free_n_qpls(priv, qpls, 0, tx_num_qpls); -free_qpl_map: - kvfree(cfg->qpl_cfg->qpl_id_map); - cfg->qpl_cfg->qpl_id_map = NULL; -free_qpl_array: - kvfree(qpls); - return err; -} - -static void gve_free_qpls(struct gve_priv *priv, - struct gve_qpls_alloc_cfg *cfg) -{ - int max_queues = cfg->tx_cfg->max_queues + cfg->rx_cfg->max_queues; - struct gve_queue_page_list *qpls = cfg->qpls; - int i; - - if (!qpls) - return; - - kvfree(cfg->qpl_cfg->qpl_id_map); - cfg->qpl_cfg->qpl_id_map = NULL; - - for (i = 0; i < max_queues; i++) - gve_free_queue_page_list(priv, &qpls[i], i); - - kvfree(qpls); - cfg->qpls = NULL; +free_qpl: + kvfree(qpl); } /* Use this to schedule a reset when the device is capable of continuing @@ -1204,7 +1138,7 @@ static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev) int i, j; u32 tx_qid; - if (!priv->num_xdp_queues) + if (!priv->tx_cfg.num_xdp_queues) return 0; for (i = 0; i < priv->rx_cfg.num_queues; i++) { @@ -1215,8 +1149,14 @@ static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev) napi->napi_id); if (err) goto err; - err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq, - MEM_TYPE_PAGE_SHARED, NULL); + if (gve_is_qpl(priv)) + err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq, + MEM_TYPE_PAGE_SHARED, + NULL); + else + err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq, + MEM_TYPE_PAGE_POOL, + rx->dqo.page_pool); if (err) goto err; rx->xsk_pool = xsk_get_pool_from_qid(dev, i); @@ -1234,7 +1174,7 @@ static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev) } } - for (i = 0; i < priv->num_xdp_queues; i++) { + for (i = 0; i < priv->tx_cfg.num_xdp_queues; i++) { tx_qid = gve_xdp_tx_queue_id(priv, i); priv->tx[tx_qid].xsk_pool = xsk_get_pool_from_qid(dev, i); } @@ -1255,7 +1195,7 @@ static void gve_unreg_xdp_info(struct gve_priv *priv) { int i, tx_qid; - if (!priv->num_xdp_queues) + if (!priv->tx_cfg.num_xdp_queues || !priv->rx || !priv->tx) return; for (i = 0; i < priv->rx_cfg.num_queues; i++) { @@ -1268,7 +1208,7 @@ static void gve_unreg_xdp_info(struct gve_priv *priv) } } - for (i = 0; i < priv->num_xdp_queues; i++) { + for (i = 0; i < priv->tx_cfg.num_xdp_queues; i++) { tx_qid = gve_xdp_tx_queue_id(priv, i); priv->tx[tx_qid].xsk_pool = NULL; } @@ -1282,118 +1222,69 @@ static void gve_drain_page_cache(struct gve_priv *priv) page_frag_cache_drain(&priv->rx[i].page_cache); } -static void gve_qpls_get_curr_alloc_cfg(struct gve_priv *priv, - struct gve_qpls_alloc_cfg *cfg) -{ - cfg->raw_addressing = !gve_is_qpl(priv); - cfg->is_gqi = gve_is_gqi(priv); - cfg->num_xdp_queues = priv->num_xdp_queues; - cfg->qpl_cfg = &priv->qpl_cfg; - cfg->tx_cfg = &priv->tx_cfg; - cfg->rx_cfg = &priv->rx_cfg; - cfg->qpls = priv->qpls; -} - static void gve_rx_get_curr_alloc_cfg(struct gve_priv *priv, struct gve_rx_alloc_rings_cfg *cfg) { - cfg->qcfg = &priv->rx_cfg; + cfg->qcfg_rx = &priv->rx_cfg; cfg->qcfg_tx = &priv->tx_cfg; cfg->raw_addressing = !gve_is_qpl(priv); cfg->enable_header_split = priv->header_split_enabled; - cfg->qpls = priv->qpls; - cfg->qpl_cfg = &priv->qpl_cfg; cfg->ring_size = priv->rx_desc_cnt; - cfg->packet_buffer_size = gve_is_gqi(priv) ? - GVE_DEFAULT_RX_BUFFER_SIZE : - priv->data_buffer_size_dqo; + cfg->packet_buffer_size = priv->rx_cfg.packet_buffer_size; cfg->rx = priv->rx; + cfg->xdp = !!cfg->qcfg_tx->num_xdp_queues; } -static void gve_get_curr_alloc_cfgs(struct gve_priv *priv, - struct gve_qpls_alloc_cfg *qpls_alloc_cfg, - struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, - struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) +void gve_get_curr_alloc_cfgs(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, + struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) { - gve_qpls_get_curr_alloc_cfg(priv, qpls_alloc_cfg); gve_tx_get_curr_alloc_cfg(priv, tx_alloc_cfg); gve_rx_get_curr_alloc_cfg(priv, rx_alloc_cfg); } -static void gve_rx_start_rings(struct gve_priv *priv, int num_rings) +static void gve_rx_start_ring(struct gve_priv *priv, int i) { - int i; - - for (i = 0; i < num_rings; i++) { - if (gve_is_gqi(priv)) - gve_rx_start_ring_gqi(priv, i); - else - gve_rx_start_ring_dqo(priv, i); - } + if (gve_is_gqi(priv)) + gve_rx_start_ring_gqi(priv, i); + else + gve_rx_start_ring_dqo(priv, i); } -static void gve_rx_stop_rings(struct gve_priv *priv, int num_rings) +static void gve_rx_start_rings(struct gve_priv *priv, int num_rings) { int i; - if (!priv->rx) - return; - - for (i = 0; i < num_rings; i++) { - if (gve_is_gqi(priv)) - gve_rx_stop_ring_gqi(priv, i); - else - gve_rx_stop_ring_dqo(priv, i); - } + for (i = 0; i < num_rings; i++) + gve_rx_start_ring(priv, i); } -static void gve_queues_mem_free(struct gve_priv *priv, - struct gve_qpls_alloc_cfg *qpls_alloc_cfg, - struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, - struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) +static void gve_rx_stop_ring(struct gve_priv *priv, int i) { - gve_free_rings(priv, tx_alloc_cfg, rx_alloc_cfg); - gve_free_qpls(priv, qpls_alloc_cfg); + if (gve_is_gqi(priv)) + gve_rx_stop_ring_gqi(priv, i); + else + gve_rx_stop_ring_dqo(priv, i); } -static int gve_queues_mem_alloc(struct gve_priv *priv, - struct gve_qpls_alloc_cfg *qpls_alloc_cfg, - struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, - struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) +static void gve_rx_stop_rings(struct gve_priv *priv, int num_rings) { - int err; - - err = gve_alloc_qpls(priv, qpls_alloc_cfg); - if (err) { - netif_err(priv, drv, priv->dev, "Failed to alloc QPLs\n"); - return err; - } - tx_alloc_cfg->qpls = qpls_alloc_cfg->qpls; - rx_alloc_cfg->qpls = qpls_alloc_cfg->qpls; - err = gve_alloc_rings(priv, tx_alloc_cfg, rx_alloc_cfg); - if (err) { - netif_err(priv, drv, priv->dev, "Failed to alloc rings\n"); - goto free_qpls; - } + int i; - return 0; + if (!priv->rx) + return; -free_qpls: - gve_free_qpls(priv, qpls_alloc_cfg); - return err; + for (i = 0; i < num_rings; i++) + gve_rx_stop_ring(priv, i); } static void gve_queues_mem_remove(struct gve_priv *priv) { struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; - struct gve_qpls_alloc_cfg qpls_alloc_cfg = {0}; - gve_get_curr_alloc_cfgs(priv, &qpls_alloc_cfg, - &tx_alloc_cfg, &rx_alloc_cfg); - gve_queues_mem_free(priv, &qpls_alloc_cfg, - &tx_alloc_cfg, &rx_alloc_cfg); - priv->qpls = NULL; + gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); + gve_queues_mem_free(priv, &tx_alloc_cfg, &rx_alloc_cfg); priv->tx = NULL; priv->rx = NULL; } @@ -1402,7 +1293,6 @@ static void gve_queues_mem_remove(struct gve_priv *priv) * No memory is allocated. Passed-in memory is freed on errors. */ static int gve_queues_start(struct gve_priv *priv, - struct gve_qpls_alloc_cfg *qpls_alloc_cfg, struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) { @@ -1410,24 +1300,18 @@ static int gve_queues_start(struct gve_priv *priv, int err; /* Record new resources into priv */ - priv->qpls = qpls_alloc_cfg->qpls; priv->tx = tx_alloc_cfg->tx; priv->rx = rx_alloc_cfg->rx; /* Record new configs into priv */ - priv->qpl_cfg = *qpls_alloc_cfg->qpl_cfg; priv->tx_cfg = *tx_alloc_cfg->qcfg; - priv->rx_cfg = *rx_alloc_cfg->qcfg; + priv->tx_cfg.num_xdp_queues = tx_alloc_cfg->num_xdp_rings; + priv->rx_cfg = *rx_alloc_cfg->qcfg_rx; priv->tx_desc_cnt = tx_alloc_cfg->ring_size; priv->rx_desc_cnt = rx_alloc_cfg->ring_size; - if (priv->xdp_prog) - priv->num_xdp_queues = priv->rx_cfg.num_queues; - else - priv->num_xdp_queues = 0; - - gve_tx_start_rings(priv, 0, tx_alloc_cfg->num_rings); - gve_rx_start_rings(priv, rx_alloc_cfg->qcfg->num_queues); + gve_tx_start_rings(priv, gve_num_tx_queues(priv)); + gve_rx_start_rings(priv, rx_alloc_cfg->qcfg_rx->num_queues); gve_init_sync_stats(priv); err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues); @@ -1441,12 +1325,18 @@ static int gve_queues_start(struct gve_priv *priv, if (err) goto stop_and_free_rings; + if (rx_alloc_cfg->reset_rss) { + err = gve_init_rss_config(priv, priv->rx_cfg.num_queues); + if (err) + goto reset; + } + err = gve_register_qpls(priv); if (err) goto reset; priv->header_split_enabled = rx_alloc_cfg->enable_header_split; - priv->data_buffer_size_dqo = rx_alloc_cfg->packet_buffer_size; + priv->rx_cfg.packet_buffer_size = rx_alloc_cfg->packet_buffer_size; err = gve_create_rings(priv); if (err) @@ -1473,7 +1363,7 @@ reset: /* return the original error */ return err; stop_and_free_rings: - gve_tx_stop_rings(priv, 0, gve_num_tx_queues(priv)); + gve_tx_stop_rings(priv, gve_num_tx_queues(priv)); gve_rx_stop_rings(priv, priv->rx_cfg.num_queues); gve_queues_mem_remove(priv); return err; @@ -1483,23 +1373,19 @@ static int gve_open(struct net_device *dev) { struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; - struct gve_qpls_alloc_cfg qpls_alloc_cfg = {0}; struct gve_priv *priv = netdev_priv(dev); int err; - gve_get_curr_alloc_cfgs(priv, &qpls_alloc_cfg, - &tx_alloc_cfg, &rx_alloc_cfg); + gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); - err = gve_queues_mem_alloc(priv, &qpls_alloc_cfg, - &tx_alloc_cfg, &rx_alloc_cfg); + err = gve_queues_mem_alloc(priv, &tx_alloc_cfg, &rx_alloc_cfg); if (err) return err; /* No need to free on error: ownership of resources is lost after * calling gve_queues_start. */ - err = gve_queues_start(priv, &qpls_alloc_cfg, - &tx_alloc_cfg, &rx_alloc_cfg); + err = gve_queues_start(priv, &tx_alloc_cfg, &rx_alloc_cfg); if (err) return err; @@ -1522,11 +1408,11 @@ static int gve_queues_stop(struct gve_priv *priv) goto err; gve_clear_device_rings_ok(priv); } - del_timer_sync(&priv->stats_report_timer); + timer_delete_sync(&priv->stats_report_timer); gve_unreg_xdp_info(priv); - gve_tx_stop_rings(priv, 0, gve_num_tx_queues(priv)); + gve_tx_stop_rings(priv, gve_num_tx_queues(priv)); gve_rx_stop_rings(priv, priv->rx_cfg.num_queues); priv->interface_down_cnt++; @@ -1556,69 +1442,6 @@ static int gve_close(struct net_device *dev) return 0; } -static int gve_remove_xdp_queues(struct gve_priv *priv) -{ - int qpl_start_id; - int err; - - qpl_start_id = gve_xdp_tx_start_queue_id(priv); - - err = gve_destroy_xdp_rings(priv); - if (err) - return err; - - err = gve_unregister_xdp_qpls(priv); - if (err) - return err; - - gve_unreg_xdp_info(priv); - gve_free_xdp_rings(priv); - - gve_free_n_qpls(priv, priv->qpls, qpl_start_id, gve_num_xdp_qpls(priv)); - priv->num_xdp_queues = 0; - return 0; -} - -static int gve_add_xdp_queues(struct gve_priv *priv) -{ - int start_id; - int err; - - priv->num_xdp_queues = priv->rx_cfg.num_queues; - - start_id = gve_xdp_tx_start_queue_id(priv); - err = gve_alloc_n_qpls(priv, priv->qpls, priv->tx_pages_per_qpl, - start_id, gve_num_xdp_qpls(priv)); - if (err) - goto err; - - err = gve_alloc_xdp_rings(priv); - if (err) - goto free_xdp_qpls; - - err = gve_reg_xdp_info(priv, priv->dev); - if (err) - goto free_xdp_rings; - - err = gve_register_xdp_qpls(priv); - if (err) - goto free_xdp_rings; - - err = gve_create_xdp_rings(priv); - if (err) - goto free_xdp_rings; - - return 0; - -free_xdp_rings: - gve_free_xdp_rings(priv); -free_xdp_qpls: - gve_free_n_qpls(priv, priv->qpls, start_id, gve_num_xdp_qpls(priv)); -err: - priv->num_xdp_queues = 0; - return err; -} - static void gve_handle_link_status(struct gve_priv *priv, bool link_status) { if (!gve_get_napi_enabled(priv)) @@ -1636,6 +1459,19 @@ static void gve_handle_link_status(struct gve_priv *priv, bool link_status) } } +static int gve_configure_rings_xdp(struct gve_priv *priv, + u16 num_xdp_rings) +{ + struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; + struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; + + gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); + tx_alloc_cfg.num_xdp_rings = num_xdp_rings; + + rx_alloc_cfg.xdp = !!num_xdp_rings; + return gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); +} + static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog, struct netlink_ext_ack *extack) { @@ -1644,33 +1480,30 @@ static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog, u32 status; old_prog = READ_ONCE(priv->xdp_prog); - if (!netif_carrier_ok(priv->dev)) { + if (!netif_running(priv->dev)) { WRITE_ONCE(priv->xdp_prog, prog); if (old_prog) bpf_prog_put(old_prog); + + /* Update priv XDP queue configuration */ + priv->tx_cfg.num_xdp_queues = priv->xdp_prog ? + priv->rx_cfg.num_queues : 0; return 0; } - gve_turndown(priv); - if (!old_prog && prog) { - // Allocate XDP TX queues if an XDP program is - // being installed - err = gve_add_xdp_queues(priv); - if (err) - goto out; - } else if (old_prog && !prog) { - // Remove XDP TX queues if an XDP program is - // being uninstalled - err = gve_remove_xdp_queues(priv); - if (err) - goto out; - } + if (!old_prog && prog) + err = gve_configure_rings_xdp(priv, priv->rx_cfg.num_queues); + else if (old_prog && !prog) + err = gve_configure_rings_xdp(priv, 0); + + if (err) + goto out; + WRITE_ONCE(priv->xdp_prog, prog); if (old_prog) bpf_prog_put(old_prog); out: - gve_turnup(priv); status = ioread32be(&priv->reg_bar0->device_status); gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); return err; @@ -1701,8 +1534,8 @@ static int gve_xsk_pool_enable(struct net_device *dev, if (err) return err; - /* If XDP prog is not installed, return */ - if (!priv->xdp_prog) + /* If XDP prog is not installed or interface is down, return. */ + if (!priv->xdp_prog || !netif_running(dev)) return 0; rx = &priv->rx[qid]; @@ -1747,21 +1580,16 @@ static int gve_xsk_pool_disable(struct net_device *dev, if (qid >= priv->rx_cfg.num_queues) return -EINVAL; - /* If XDP prog is not installed, unmap DMA and return */ - if (!priv->xdp_prog) - goto done; - - tx_qid = gve_xdp_tx_queue_id(priv, qid); - if (!netif_running(dev)) { - priv->rx[qid].xsk_pool = NULL; - xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq); - priv->tx[tx_qid].xsk_pool = NULL; + /* If XDP prog is not installed or interface is down, unmap DMA and + * return. + */ + if (!priv->xdp_prog || !netif_running(dev)) goto done; - } napi_rx = &priv->ntfy_blocks[priv->rx[qid].ntfy_id].napi; napi_disable(napi_rx); /* make sure current rx poll is done */ + tx_qid = gve_xdp_tx_queue_id(priv, qid); napi_tx = &priv->ntfy_blocks[priv->tx[tx_qid].ntfy_id].napi; napi_disable(napi_tx); /* make sure current tx poll is done */ @@ -1787,24 +1615,20 @@ done: static int gve_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) { struct gve_priv *priv = netdev_priv(dev); - int tx_queue_id = gve_xdp_tx_queue_id(priv, queue_id); + struct napi_struct *napi; + + if (!gve_get_napi_enabled(priv)) + return -ENETDOWN; if (queue_id >= priv->rx_cfg.num_queues || !priv->xdp_prog) return -EINVAL; - if (flags & XDP_WAKEUP_TX) { - struct gve_tx_ring *tx = &priv->tx[tx_queue_id]; - struct napi_struct *napi = - &priv->ntfy_blocks[tx->ntfy_id].napi; - - if (!napi_if_scheduled_mark_missed(napi)) { - /* Call local_bh_enable to trigger SoftIRQ processing */ - local_bh_disable(); - napi_schedule(napi); - local_bh_enable(); - } - - tx->xdp_xsk_wakeup++; + napi = &priv->ntfy_blocks[gve_rx_idx_to_ntfy(priv, queue_id)].napi; + if (!napi_if_scheduled_mark_missed(napi)) { + /* Call local_bh_enable to trigger SoftIRQ processing */ + local_bh_disable(); + napi_schedule(napi); + local_bh_enable(); } return 0; @@ -1813,6 +1637,7 @@ static int gve_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) static int verify_xdp_configuration(struct net_device *dev) { struct gve_priv *priv = netdev_priv(dev); + u16 max_xdp_mtu; if (dev->features & NETIF_F_LRO) { netdev_warn(dev, "XDP is not supported when LRO is on.\n"); @@ -1825,7 +1650,11 @@ static int verify_xdp_configuration(struct net_device *dev) return -EOPNOTSUPP; } - if (dev->mtu > GVE_DEFAULT_RX_BUFFER_SIZE - sizeof(struct ethhdr) - GVE_RX_PAD) { + max_xdp_mtu = priv->rx_cfg.packet_buffer_size - sizeof(struct ethhdr); + if (priv->queue_format == GVE_GQI_QPL_FORMAT) + max_xdp_mtu -= GVE_RX_PAD; + + if (dev->mtu > max_xdp_mtu) { netdev_warn(dev, "XDP is not supported for mtu %d.\n", dev->mtu); return -EOPNOTSUPP; @@ -1863,16 +1692,42 @@ static int gve_xdp(struct net_device *dev, struct netdev_bpf *xdp) } } -static int gve_adjust_config(struct gve_priv *priv, - struct gve_qpls_alloc_cfg *qpls_alloc_cfg, - struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, - struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) +int gve_init_rss_config(struct gve_priv *priv, u16 num_queues) +{ + struct gve_rss_config *rss_config = &priv->rss_config; + struct ethtool_rxfh_param rxfh = {0}; + u16 i; + + if (!priv->cache_rss_config) + return 0; + + for (i = 0; i < priv->rss_lut_size; i++) + rss_config->hash_lut[i] = + ethtool_rxfh_indir_default(i, num_queues); + + netdev_rss_key_fill(rss_config->hash_key, priv->rss_key_size); + + rxfh.hfunc = ETH_RSS_HASH_TOP; + + return gve_adminq_configure_rss(priv, &rxfh); +} + +int gve_flow_rules_reset(struct gve_priv *priv) +{ + if (!priv->max_flow_rules) + return 0; + + return gve_adminq_reset_flow_rules(priv); +} + +int gve_adjust_config(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, + struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) { int err; /* Allocate resources for the new confiugration */ - err = gve_queues_mem_alloc(priv, qpls_alloc_cfg, - tx_alloc_cfg, rx_alloc_cfg); + err = gve_queues_mem_alloc(priv, tx_alloc_cfg, rx_alloc_cfg); if (err) { netif_err(priv, drv, priv->dev, "Adjust config failed to alloc new queues"); @@ -1884,14 +1739,12 @@ static int gve_adjust_config(struct gve_priv *priv, if (err) { netif_err(priv, drv, priv->dev, "Adjust config failed to close old queues"); - gve_queues_mem_free(priv, qpls_alloc_cfg, - tx_alloc_cfg, rx_alloc_cfg); + gve_queues_mem_free(priv, tx_alloc_cfg, rx_alloc_cfg); return err; } /* Bring the device back up again with the new resources. */ - err = gve_queues_start(priv, qpls_alloc_cfg, - tx_alloc_cfg, rx_alloc_cfg); + err = gve_queues_start(priv, tx_alloc_cfg, rx_alloc_cfg); if (err) { netif_err(priv, drv, priv->dev, "Adjust config failed to start new queues, !!! DISABLING ALL QUEUES !!!\n"); @@ -1906,40 +1759,32 @@ static int gve_adjust_config(struct gve_priv *priv, } int gve_adjust_queues(struct gve_priv *priv, - struct gve_queue_config new_rx_config, - struct gve_queue_config new_tx_config) + struct gve_rx_queue_config new_rx_config, + struct gve_tx_queue_config new_tx_config, + bool reset_rss) { struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; - struct gve_qpls_alloc_cfg qpls_alloc_cfg = {0}; - struct gve_qpl_config new_qpl_cfg; int err; - gve_get_curr_alloc_cfgs(priv, &qpls_alloc_cfg, - &tx_alloc_cfg, &rx_alloc_cfg); - - /* qpl_cfg is not read-only, it contains a map that gets updated as - * rings are allocated, which is why we cannot use the yet unreleased - * one in priv. - */ - qpls_alloc_cfg.qpl_cfg = &new_qpl_cfg; - tx_alloc_cfg.qpl_cfg = &new_qpl_cfg; - rx_alloc_cfg.qpl_cfg = &new_qpl_cfg; + gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); /* Relay the new config from ethtool */ - qpls_alloc_cfg.tx_cfg = &new_tx_config; tx_alloc_cfg.qcfg = &new_tx_config; rx_alloc_cfg.qcfg_tx = &new_tx_config; - qpls_alloc_cfg.rx_cfg = &new_rx_config; - rx_alloc_cfg.qcfg = &new_rx_config; - tx_alloc_cfg.num_rings = new_tx_config.num_queues; + rx_alloc_cfg.qcfg_rx = &new_rx_config; + rx_alloc_cfg.reset_rss = reset_rss; - if (netif_carrier_ok(priv->dev)) { - err = gve_adjust_config(priv, &qpls_alloc_cfg, - &tx_alloc_cfg, &rx_alloc_cfg); + if (netif_running(priv->dev)) { + err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); return err; } /* Set the config for the next up. */ + if (reset_rss) { + err = gve_init_rss_config(priv, new_rx_config.num_queues); + if (err) + return err; + } priv->tx_cfg = new_tx_config; priv->rx_cfg = new_rx_config; @@ -1961,20 +1806,37 @@ static void gve_turndown(struct gve_priv *priv) int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; - napi_disable(&block->napi); + if (!gve_tx_was_added_to_block(priv, idx)) + continue; + + if (idx < priv->tx_cfg.num_queues) + netif_queue_set_napi(priv->dev, idx, + NETDEV_QUEUE_TYPE_TX, NULL); + + napi_disable_locked(&block->napi); } for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; - napi_disable(&block->napi); + if (!gve_rx_was_added_to_block(priv, idx)) + continue; + + netif_queue_set_napi(priv->dev, idx, NETDEV_QUEUE_TYPE_RX, + NULL); + napi_disable_locked(&block->napi); } /* Stop tx queues */ netif_tx_disable(priv->dev); + xdp_features_clear_redirect_target_locked(priv->dev); + gve_clear_napi_enabled(priv); gve_clear_report_stats(priv); + + /* Make sure that all traffic is finished processing. */ + synchronize_net(); } static void gve_turnup(struct gve_priv *priv) @@ -1989,30 +1851,71 @@ static void gve_turnup(struct gve_priv *priv) int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; - napi_enable(&block->napi); + if (!gve_tx_was_added_to_block(priv, idx)) + continue; + + napi_enable_locked(&block->napi); + + if (idx < priv->tx_cfg.num_queues) + netif_queue_set_napi(priv->dev, idx, + NETDEV_QUEUE_TYPE_TX, + &block->napi); + if (gve_is_gqi(priv)) { iowrite32be(0, gve_irq_doorbell(priv, block)); } else { gve_set_itr_coalesce_usecs_dqo(priv, block, priv->tx_coalesce_usecs); } + + /* Any descs written by the NIC before this barrier will be + * handled by the one-off napi schedule below. Whereas any + * descs after the barrier will generate interrupts. + */ + mb(); + napi_schedule(&block->napi); } for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; - napi_enable(&block->napi); + if (!gve_rx_was_added_to_block(priv, idx)) + continue; + + napi_enable_locked(&block->napi); + netif_queue_set_napi(priv->dev, idx, NETDEV_QUEUE_TYPE_RX, + &block->napi); + if (gve_is_gqi(priv)) { iowrite32be(0, gve_irq_doorbell(priv, block)); } else { gve_set_itr_coalesce_usecs_dqo(priv, block, priv->rx_coalesce_usecs); } + + /* Any descs written by the NIC before this barrier will be + * handled by the one-off napi schedule below. Whereas any + * descs after the barrier will generate interrupts. + */ + mb(); + napi_schedule(&block->napi); } + if (priv->tx_cfg.num_xdp_queues && gve_supports_xdp_xmit(priv)) + xdp_features_set_redirect_target_locked(priv->dev, false); + gve_set_napi_enabled(priv); } +static void gve_turnup_and_check_status(struct gve_priv *priv) +{ + u32 status; + + gve_turnup(priv); + status = ioread32be(&priv->reg_bar0->device_status); + gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); +} + static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct gve_notify_block *block; @@ -2077,7 +1980,6 @@ int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split) { struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; - struct gve_qpls_alloc_cfg qpls_alloc_cfg = {0}; bool enable_hdr_split; int err = 0; @@ -2097,15 +1999,13 @@ int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split) if (enable_hdr_split == priv->header_split_enabled) return 0; - gve_get_curr_alloc_cfgs(priv, &qpls_alloc_cfg, - &tx_alloc_cfg, &rx_alloc_cfg); + gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); rx_alloc_cfg.enable_header_split = enable_hdr_split; rx_alloc_cfg.packet_buffer_size = gve_get_pkt_buf_size(priv, enable_hdr_split); if (netif_running(priv->dev)) - err = gve_adjust_config(priv, &qpls_alloc_cfg, - &tx_alloc_cfg, &rx_alloc_cfg); + err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); return err; } @@ -2115,35 +2015,30 @@ static int gve_set_features(struct net_device *netdev, const netdev_features_t orig_features = netdev->features; struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; - struct gve_qpls_alloc_cfg qpls_alloc_cfg = {0}; struct gve_priv *priv = netdev_priv(netdev); - struct gve_qpl_config new_qpl_cfg; int err; - gve_get_curr_alloc_cfgs(priv, &qpls_alloc_cfg, - &tx_alloc_cfg, &rx_alloc_cfg); - /* qpl_cfg is not read-only, it contains a map that gets updated as - * rings are allocated, which is why we cannot use the yet unreleased - * one in priv. - */ - qpls_alloc_cfg.qpl_cfg = &new_qpl_cfg; - tx_alloc_cfg.qpl_cfg = &new_qpl_cfg; - rx_alloc_cfg.qpl_cfg = &new_qpl_cfg; + gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) { netdev->features ^= NETIF_F_LRO; - if (netif_carrier_ok(netdev)) { - err = gve_adjust_config(priv, &qpls_alloc_cfg, - &tx_alloc_cfg, &rx_alloc_cfg); - if (err) { - /* Revert the change on error. */ - netdev->features = orig_features; - return err; - } + if (netif_running(netdev)) { + err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); + if (err) + goto revert_features; } } + if ((netdev->features & NETIF_F_NTUPLE) && !(features & NETIF_F_NTUPLE)) { + err = gve_flow_rules_reset(priv); + if (err) + goto revert_features; + } return 0; + +revert_features: + netdev->features = orig_features; + return err; } static const struct net_device_ops gve_netdev_ops = { @@ -2182,7 +2077,9 @@ static void gve_handle_reset(struct gve_priv *priv) if (gve_get_do_reset(priv)) { rtnl_lock(); + netdev_lock(priv->dev); gve_reset(priv, false); + netdev_unlock(priv->dev); rtnl_unlock(); } } @@ -2278,14 +2175,17 @@ static void gve_service_task(struct work_struct *work) static void gve_set_netdev_xdp_features(struct gve_priv *priv) { + xdp_features_t xdp_features; + if (priv->queue_format == GVE_GQI_QPL_FORMAT) { - priv->dev->xdp_features = NETDEV_XDP_ACT_BASIC; - priv->dev->xdp_features |= NETDEV_XDP_ACT_REDIRECT; - priv->dev->xdp_features |= NETDEV_XDP_ACT_NDO_XMIT; - priv->dev->xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY; + xdp_features = NETDEV_XDP_ACT_BASIC; + xdp_features |= NETDEV_XDP_ACT_REDIRECT; + xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY; } else { - priv->dev->xdp_features = 0; + xdp_features = 0; } + + xdp_set_features_flag_locked(priv->dev, xdp_features); } static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) @@ -2359,6 +2259,7 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues, priv->rx_cfg.num_queues); } + priv->tx_cfg.num_xdp_queues = 0; dev_info(&priv->pdev->dev, "TX queues %d, RX queues %d\n", priv->tx_cfg.num_queues, priv->rx_cfg.num_queues); @@ -2422,7 +2323,7 @@ err: int gve_reset(struct gve_priv *priv, bool attempt_teardown) { - bool was_up = netif_carrier_ok(priv->dev); + bool was_up = netif_running(priv->dev); int err; dev_info(&priv->pdev->dev, "Performing reset\n"); @@ -2473,6 +2374,188 @@ static void gve_write_version(u8 __iomem *driver_version_register) writeb('\n', driver_version_register); } +static int gve_rx_queue_stop(struct net_device *dev, void *per_q_mem, int idx) +{ + struct gve_priv *priv = netdev_priv(dev); + struct gve_rx_ring *gve_per_q_mem; + int err; + + if (!priv->rx) + return -EAGAIN; + + /* Destroying queue 0 while other queues exist is not supported in DQO */ + if (!gve_is_gqi(priv) && idx == 0) + return -ERANGE; + + /* Single-queue destruction requires quiescence on all queues */ + gve_turndown(priv); + + /* This failure will trigger a reset - no need to clean up */ + err = gve_adminq_destroy_single_rx_queue(priv, idx); + if (err) + return err; + + if (gve_is_qpl(priv)) { + /* This failure will trigger a reset - no need to clean up */ + err = gve_unregister_qpl(priv, gve_rx_get_qpl(priv, idx)); + if (err) + return err; + } + + gve_rx_stop_ring(priv, idx); + + /* Turn the unstopped queues back up */ + gve_turnup_and_check_status(priv); + + gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; + *gve_per_q_mem = priv->rx[idx]; + memset(&priv->rx[idx], 0, sizeof(priv->rx[idx])); + return 0; +} + +static void gve_rx_queue_mem_free(struct net_device *dev, void *per_q_mem) +{ + struct gve_priv *priv = netdev_priv(dev); + struct gve_rx_alloc_rings_cfg cfg = {0}; + struct gve_rx_ring *gve_per_q_mem; + + gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; + gve_rx_get_curr_alloc_cfg(priv, &cfg); + + if (gve_is_gqi(priv)) + gve_rx_free_ring_gqi(priv, gve_per_q_mem, &cfg); + else + gve_rx_free_ring_dqo(priv, gve_per_q_mem, &cfg); +} + +static int gve_rx_queue_mem_alloc(struct net_device *dev, void *per_q_mem, + int idx) +{ + struct gve_priv *priv = netdev_priv(dev); + struct gve_rx_alloc_rings_cfg cfg = {0}; + struct gve_rx_ring *gve_per_q_mem; + int err; + + if (!priv->rx) + return -EAGAIN; + + gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; + gve_rx_get_curr_alloc_cfg(priv, &cfg); + + if (gve_is_gqi(priv)) + err = gve_rx_alloc_ring_gqi(priv, &cfg, gve_per_q_mem, idx); + else + err = gve_rx_alloc_ring_dqo(priv, &cfg, gve_per_q_mem, idx); + + return err; +} + +static int gve_rx_queue_start(struct net_device *dev, void *per_q_mem, int idx) +{ + struct gve_priv *priv = netdev_priv(dev); + struct gve_rx_ring *gve_per_q_mem; + int err; + + if (!priv->rx) + return -EAGAIN; + + gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; + priv->rx[idx] = *gve_per_q_mem; + + /* Single-queue creation requires quiescence on all queues */ + gve_turndown(priv); + + gve_rx_start_ring(priv, idx); + + if (gve_is_qpl(priv)) { + /* This failure will trigger a reset - no need to clean up */ + err = gve_register_qpl(priv, gve_rx_get_qpl(priv, idx)); + if (err) + goto abort; + } + + /* This failure will trigger a reset - no need to clean up */ + err = gve_adminq_create_single_rx_queue(priv, idx); + if (err) + goto abort; + + if (gve_is_gqi(priv)) + gve_rx_write_doorbell(priv, &priv->rx[idx]); + else + gve_rx_post_buffers_dqo(&priv->rx[idx]); + + /* Turn the unstopped queues back up */ + gve_turnup_and_check_status(priv); + return 0; + +abort: + gve_rx_stop_ring(priv, idx); + + /* All failures in this func result in a reset, by clearing the struct + * at idx, we prevent a double free when that reset runs. The reset, + * which needs the rtnl lock, will not run till this func returns and + * its caller gives up the lock. + */ + memset(&priv->rx[idx], 0, sizeof(priv->rx[idx])); + return err; +} + +static const struct netdev_queue_mgmt_ops gve_queue_mgmt_ops = { + .ndo_queue_mem_size = sizeof(struct gve_rx_ring), + .ndo_queue_mem_alloc = gve_rx_queue_mem_alloc, + .ndo_queue_mem_free = gve_rx_queue_mem_free, + .ndo_queue_start = gve_rx_queue_start, + .ndo_queue_stop = gve_rx_queue_stop, +}; + +static void gve_get_rx_queue_stats(struct net_device *dev, int idx, + struct netdev_queue_stats_rx *rx_stats) +{ + struct gve_priv *priv = netdev_priv(dev); + struct gve_rx_ring *rx = &priv->rx[idx]; + unsigned int start; + + do { + start = u64_stats_fetch_begin(&rx->statss); + rx_stats->packets = rx->rpackets; + rx_stats->bytes = rx->rbytes; + rx_stats->alloc_fail = rx->rx_skb_alloc_fail + + rx->rx_buf_alloc_fail; + } while (u64_stats_fetch_retry(&rx->statss, start)); +} + +static void gve_get_tx_queue_stats(struct net_device *dev, int idx, + struct netdev_queue_stats_tx *tx_stats) +{ + struct gve_priv *priv = netdev_priv(dev); + struct gve_tx_ring *tx = &priv->tx[idx]; + unsigned int start; + + do { + start = u64_stats_fetch_begin(&tx->statss); + tx_stats->packets = tx->pkt_done; + tx_stats->bytes = tx->bytes_done; + } while (u64_stats_fetch_retry(&tx->statss, start)); +} + +static void gve_get_base_stats(struct net_device *dev, + struct netdev_queue_stats_rx *rx, + struct netdev_queue_stats_tx *tx) +{ + rx->packets = 0; + rx->bytes = 0; + rx->alloc_fail = 0; + + tx->packets = 0; + tx->bytes = 0; +} + +static const struct netdev_stat_ops gve_stat_ops = { + .get_queue_stats_rx = gve_get_rx_queue_stats, + .get_queue_stats_tx = gve_get_tx_queue_stats, + .get_base_stats = gve_get_base_stats, +}; + static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { int max_tx_queues, max_rx_queues; @@ -2527,6 +2610,8 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_drvdata(pdev, dev); dev->ethtool_ops = &gve_ethtool_ops; dev->netdev_ops = &gve_netdev_ops; + dev->queue_mgmt_ops = &gve_queue_mgmt_ops; + dev->stat_ops = &gve_stat_ops; /* Set default and supported features. * @@ -2555,7 +2640,7 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) priv->service_task_flags = 0x0; priv->state_flags = 0x0; priv->ethtool_flags = 0x0; - priv->data_buffer_size_dqo = GVE_DEFAULT_RX_BUFFER_SIZE; + priv->rx_cfg.packet_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE; priv->max_rx_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE; gve_set_probe_in_progress(priv); @@ -2574,6 +2659,9 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) goto abort_with_wq; + if (!gve_is_gqi(priv) && !gve_is_qpl(priv)) + dev->netmem_tx = true; + err = register_netdev(dev); if (err) goto abort_with_gve_init; @@ -2628,9 +2716,10 @@ static void gve_shutdown(struct pci_dev *pdev) { struct net_device *netdev = pci_get_drvdata(pdev); struct gve_priv *priv = netdev_priv(netdev); - bool was_up = netif_carrier_ok(priv->dev); + bool was_up = netif_running(priv->dev); rtnl_lock(); + netdev_lock(netdev); if (was_up && gve_close(priv->dev)) { /* If the dev was up, attempt to close, if close fails, reset */ gve_reset_and_teardown(priv, was_up); @@ -2638,6 +2727,7 @@ static void gve_shutdown(struct pci_dev *pdev) /* If the dev wasn't up or close worked, finish tearing down */ gve_teardown_priv_resources(priv); } + netdev_unlock(netdev); rtnl_unlock(); } @@ -2646,10 +2736,11 @@ static int gve_suspend(struct pci_dev *pdev, pm_message_t state) { struct net_device *netdev = pci_get_drvdata(pdev); struct gve_priv *priv = netdev_priv(netdev); - bool was_up = netif_carrier_ok(priv->dev); + bool was_up = netif_running(priv->dev); priv->suspend_cnt++; rtnl_lock(); + netdev_lock(netdev); if (was_up && gve_close(priv->dev)) { /* If the dev was up, attempt to close, if close fails, reset */ gve_reset_and_teardown(priv, was_up); @@ -2658,6 +2749,7 @@ static int gve_suspend(struct pci_dev *pdev, pm_message_t state) gve_teardown_priv_resources(priv); } priv->up_before_suspend = was_up; + netdev_unlock(netdev); rtnl_unlock(); return 0; } @@ -2670,7 +2762,9 @@ static int gve_resume(struct pci_dev *pdev) priv->resume_cnt++; rtnl_lock(); + netdev_lock(netdev); err = gve_reset_recovery(priv, priv->up_before_suspend); + netdev_unlock(netdev); rtnl_unlock(); return err; } |