diff options
Diffstat (limited to 'drivers/net/ethernet/google/gve/gve_main.c')
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_main.c | 1275 |
1 files changed, 845 insertions, 430 deletions
diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 619bf63ec935..92237fb0b60c 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: (GPL-2.0 OR MIT) /* Google virtual Ethernet (gve) driver * - * Copyright (C) 2015-2021 Google, Inc. + * Copyright (C) 2015-2024 Google LLC */ #include <linux/bpf.h> @@ -9,6 +9,7 @@ #include <linux/etherdevice.h> #include <linux/filter.h> #include <linux/interrupt.h> +#include <linux/irq.h> #include <linux/module.h> #include <linux/pci.h> #include <linux/sched.h> @@ -16,12 +17,14 @@ #include <linux/workqueue.h> #include <linux/utsname.h> #include <linux/version.h> +#include <net/netdev_queues.h> #include <net/sch_generic.h> #include <net/xdp_sock_drv.h> #include "gve.h" #include "gve_dqo.h" #include "gve_adminq.h" #include "gve_register.h" +#include "gve_utils.h" #define GVE_DEFAULT_RX_COPYBREAK (256) @@ -138,6 +141,49 @@ static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s) } } +static int gve_alloc_flow_rule_caches(struct gve_priv *priv) +{ + struct gve_flow_rules_cache *flow_rules_cache = &priv->flow_rules_cache; + int err = 0; + + if (!priv->max_flow_rules) + return 0; + + flow_rules_cache->rules_cache = + kvcalloc(GVE_FLOW_RULES_CACHE_SIZE, sizeof(*flow_rules_cache->rules_cache), + GFP_KERNEL); + if (!flow_rules_cache->rules_cache) { + dev_err(&priv->pdev->dev, "Cannot alloc flow rules cache\n"); + return -ENOMEM; + } + + flow_rules_cache->rule_ids_cache = + kvcalloc(GVE_FLOW_RULE_IDS_CACHE_SIZE, sizeof(*flow_rules_cache->rule_ids_cache), + GFP_KERNEL); + if (!flow_rules_cache->rule_ids_cache) { + dev_err(&priv->pdev->dev, "Cannot alloc flow rule ids cache\n"); + err = -ENOMEM; + goto free_rules_cache; + } + + return 0; + +free_rules_cache: + kvfree(flow_rules_cache->rules_cache); + flow_rules_cache->rules_cache = NULL; + return err; +} + +static void gve_free_flow_rule_caches(struct gve_priv *priv) +{ + struct gve_flow_rules_cache *flow_rules_cache = &priv->flow_rules_cache; + + kvfree(flow_rules_cache->rule_ids_cache); + flow_rules_cache->rule_ids_cache = NULL; + kvfree(flow_rules_cache->rules_cache); + flow_rules_cache->rules_cache = NULL; +} + static int gve_alloc_counter_array(struct gve_priv *priv) { priv->counter_array = @@ -252,7 +298,19 @@ static irqreturn_t gve_intr_dqo(int irq, void *arg) return IRQ_HANDLED; } -static int gve_napi_poll(struct napi_struct *napi, int budget) +static int gve_is_napi_on_home_cpu(struct gve_priv *priv, u32 irq) +{ + int cpu_curr = smp_processor_id(); + const struct cpumask *aff_mask; + + aff_mask = irq_get_effective_affinity_mask(irq); + if (unlikely(!aff_mask)) + return 1; + + return cpumask_test_cpu(cpu_curr, aff_mask); +} + +int gve_napi_poll(struct napi_struct *napi, int budget) { struct gve_notify_block *block; __be32 __iomem *irq_doorbell; @@ -275,6 +333,14 @@ static int gve_napi_poll(struct napi_struct *napi, int budget) if (block->rx) { work_done = gve_rx_poll(block, budget); + + /* Poll XSK TX as part of RX NAPI. Setup re-poll based on max of + * TX and RX work done. + */ + if (priv->xdp_prog) + work_done = max_t(int, work_done, + gve_xsk_tx_poll(block, budget)); + reschedule |= work_done == budget; } @@ -302,7 +368,7 @@ static int gve_napi_poll(struct napi_struct *napi, int budget) return work_done; } -static int gve_napi_poll_dqo(struct napi_struct *napi, int budget) +int gve_napi_poll_dqo(struct napi_struct *napi, int budget) { struct gve_notify_block *block = container_of(napi, struct gve_notify_block, napi); @@ -321,8 +387,21 @@ static int gve_napi_poll_dqo(struct napi_struct *napi, int budget) reschedule |= work_done == budget; } - if (reschedule) - return budget; + if (reschedule) { + /* Reschedule by returning budget only if already on the correct + * cpu. + */ + if (likely(gve_is_napi_on_home_cpu(priv, block->irq))) + return budget; + + /* If not on the cpu with which this queue's irq has affinity + * with, we avoid rescheduling napi and arm the irq instead so + * that napi gets rescheduled back eventually onto the right + * cpu. + */ + if (work_done == budget) + work_done--; + } if (likely(napi_complete_done(napi, work_done))) { /* Enable interrupts again. @@ -427,6 +506,7 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv) "Failed to receive msix vector %d\n", i); goto abort_with_some_ntfy_blocks; } + block->irq = priv->msix_vectors[msix_idx].vector; irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, get_cpu_mask(i % active_cpus)); block->irq_db_index = &priv->irq_db_indices[i].index; @@ -440,6 +520,7 @@ abort_with_some_ntfy_blocks: irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, NULL); free_irq(priv->msix_vectors[msix_idx].vector, block); + block->irq = 0; } kvfree(priv->ntfy_blocks); priv->ntfy_blocks = NULL; @@ -473,6 +554,7 @@ static void gve_free_notify_blocks(struct gve_priv *priv) irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, NULL); free_irq(priv->msix_vectors[msix_idx].vector, block); + block->irq = 0; } free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); kvfree(priv->ntfy_blocks); @@ -490,9 +572,12 @@ static int gve_setup_device_resources(struct gve_priv *priv) { int err; - err = gve_alloc_counter_array(priv); + err = gve_alloc_flow_rule_caches(priv); if (err) return err; + err = gve_alloc_counter_array(priv); + if (err) + goto abort_with_flow_rule_caches; err = gve_alloc_notify_blocks(priv); if (err) goto abort_with_counter; @@ -544,6 +629,8 @@ abort_with_ntfy_blocks: gve_free_notify_blocks(priv); abort_with_counter: gve_free_counter_array(priv); +abort_with_flow_rule_caches: + gve_free_flow_rule_caches(priv); return err; } @@ -556,6 +643,12 @@ static void gve_teardown_device_resources(struct gve_priv *priv) /* Tell device its resources are being freed */ if (gve_get_device_resources_ok(priv)) { + err = gve_flow_rules_reset(priv); + if (err) { + dev_err(&priv->pdev->dev, + "Failed to reset flow rules: err=%d\n", err); + gve_trigger_reset(priv); + } /* detach the stats report */ err = gve_adminq_report_stats(priv, 0, 0x0, GVE_STATS_REPORT_TIMER_PERIOD); if (err) { @@ -575,25 +668,82 @@ static void gve_teardown_device_resources(struct gve_priv *priv) kvfree(priv->ptype_lut_dqo); priv->ptype_lut_dqo = NULL; + gve_free_flow_rule_caches(priv); gve_free_counter_array(priv); gve_free_notify_blocks(priv); gve_free_stats_report(priv); gve_clear_device_resources_ok(priv); } -static void gve_add_napi(struct gve_priv *priv, int ntfy_idx, - int (*gve_poll)(struct napi_struct *, int)) +static int gve_unregister_qpl(struct gve_priv *priv, + struct gve_queue_page_list *qpl) +{ + int err; + + if (!qpl) + return 0; + + err = gve_adminq_unregister_page_list(priv, qpl->id); + if (err) { + netif_err(priv, drv, priv->dev, + "Failed to unregister queue page list %d\n", + qpl->id); + return err; + } + + priv->num_registered_pages -= qpl->num_entries; + return 0; +} + +static int gve_register_qpl(struct gve_priv *priv, + struct gve_queue_page_list *qpl) +{ + int pages; + int err; + + if (!qpl) + return 0; + + pages = qpl->num_entries; + + if (pages + priv->num_registered_pages > priv->max_registered_pages) { + netif_err(priv, drv, priv->dev, + "Reached max number of registered pages %llu > %llu\n", + pages + priv->num_registered_pages, + priv->max_registered_pages); + return -EINVAL; + } + + err = gve_adminq_register_page_list(priv, qpl); + if (err) { + netif_err(priv, drv, priv->dev, + "failed to register queue page list %d\n", + qpl->id); + return err; + } + + priv->num_registered_pages += pages; + return 0; +} + +static struct gve_queue_page_list *gve_tx_get_qpl(struct gve_priv *priv, int idx) { - struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; + struct gve_tx_ring *tx = &priv->tx[idx]; - netif_napi_add(priv->dev, &block->napi, gve_poll); + if (gve_is_gqi(priv)) + return tx->tx_fifo.qpl; + else + return tx->dqo.qpl; } -static void gve_remove_napi(struct gve_priv *priv, int ntfy_idx) +static struct gve_queue_page_list *gve_rx_get_qpl(struct gve_priv *priv, int idx) { - struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; + struct gve_rx_ring *rx = &priv->rx[idx]; - netif_napi_del(&block->napi); + if (gve_is_gqi(priv)) + return rx->data.qpl; + else + return rx->dqo.qpl; } static int gve_register_xdp_qpls(struct gve_priv *priv) @@ -602,55 +752,38 @@ static int gve_register_xdp_qpls(struct gve_priv *priv) int err; int i; - start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv)); + start_id = gve_xdp_tx_start_queue_id(priv); for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) { - err = gve_adminq_register_page_list(priv, &priv->qpls[i]); - if (err) { - netif_err(priv, drv, priv->dev, - "failed to register queue page list %d\n", - priv->qpls[i].id); - /* This failure will trigger a reset - no need to clean - * up - */ + err = gve_register_qpl(priv, gve_tx_get_qpl(priv, i)); + /* This failure will trigger a reset - no need to clean up */ + if (err) return err; - } } return 0; } static int gve_register_qpls(struct gve_priv *priv) { - int start_id; + int num_tx_qpls, num_rx_qpls; int err; int i; - start_id = gve_tx_start_qpl_id(priv); - for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) { - err = gve_adminq_register_page_list(priv, &priv->qpls[i]); - if (err) { - netif_err(priv, drv, priv->dev, - "failed to register queue page list %d\n", - priv->qpls[i].id); - /* This failure will trigger a reset - no need to clean - * up - */ + num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_num_xdp_qpls(priv), + gve_is_qpl(priv)); + num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv)); + + for (i = 0; i < num_tx_qpls; i++) { + err = gve_register_qpl(priv, gve_tx_get_qpl(priv, i)); + if (err) return err; - } } - start_id = gve_rx_start_qpl_id(priv); - for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) { - err = gve_adminq_register_page_list(priv, &priv->qpls[i]); - if (err) { - netif_err(priv, drv, priv->dev, - "failed to register queue page list %d\n", - priv->qpls[i].id); - /* This failure will trigger a reset - no need to clean - * up - */ + for (i = 0; i < num_rx_qpls; i++) { + err = gve_register_qpl(priv, gve_rx_get_qpl(priv, i)); + if (err) return err; - } } + return 0; } @@ -660,48 +793,38 @@ static int gve_unregister_xdp_qpls(struct gve_priv *priv) int err; int i; - start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv)); + start_id = gve_xdp_tx_start_queue_id(priv); for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) { - err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id); - /* This failure will trigger a reset - no need to clean up */ - if (err) { - netif_err(priv, drv, priv->dev, - "Failed to unregister queue page list %d\n", - priv->qpls[i].id); + err = gve_unregister_qpl(priv, gve_tx_get_qpl(priv, i)); + /* This failure will trigger a reset - no need to clean */ + if (err) return err; - } } return 0; } static int gve_unregister_qpls(struct gve_priv *priv) { - int start_id; + int num_tx_qpls, num_rx_qpls; int err; int i; - start_id = gve_tx_start_qpl_id(priv); - for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) { - err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id); - /* This failure will trigger a reset - no need to clean up */ - if (err) { - netif_err(priv, drv, priv->dev, - "Failed to unregister queue page list %d\n", - priv->qpls[i].id); + num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_num_xdp_qpls(priv), + gve_is_qpl(priv)); + num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv)); + + for (i = 0; i < num_tx_qpls; i++) { + err = gve_unregister_qpl(priv, gve_tx_get_qpl(priv, i)); + /* This failure will trigger a reset - no need to clean */ + if (err) return err; - } } - start_id = gve_rx_start_qpl_id(priv); - for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) { - err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id); - /* This failure will trigger a reset - no need to clean up */ - if (err) { - netif_err(priv, drv, priv->dev, - "Failed to unregister queue page list %d\n", - priv->qpls[i].id); + for (i = 0; i < num_rx_qpls; i++) { + err = gve_unregister_qpl(priv, gve_rx_get_qpl(priv, i)); + /* This failure will trigger a reset - no need to clean */ + if (err) return err; - } } return 0; } @@ -776,120 +899,124 @@ static int gve_create_rings(struct gve_priv *priv) return 0; } -static void add_napi_init_xdp_sync_stats(struct gve_priv *priv, - int (*napi_poll)(struct napi_struct *napi, - int budget)) +static void init_xdp_sync_stats(struct gve_priv *priv) { int start_id = gve_xdp_tx_start_queue_id(priv); int i; - /* Add xdp tx napi & init sync stats*/ + /* Init stats */ for (i = start_id; i < start_id + priv->num_xdp_queues; i++) { int ntfy_idx = gve_tx_idx_to_ntfy(priv, i); u64_stats_init(&priv->tx[i].statss); priv->tx[i].ntfy_id = ntfy_idx; - gve_add_napi(priv, ntfy_idx, napi_poll); } } -static void add_napi_init_sync_stats(struct gve_priv *priv, - int (*napi_poll)(struct napi_struct *napi, - int budget)) +static void gve_init_sync_stats(struct gve_priv *priv) { int i; - /* Add tx napi & init sync stats*/ - for (i = 0; i < gve_num_tx_queues(priv); i++) { - int ntfy_idx = gve_tx_idx_to_ntfy(priv, i); - + for (i = 0; i < priv->tx_cfg.num_queues; i++) u64_stats_init(&priv->tx[i].statss); - priv->tx[i].ntfy_id = ntfy_idx; - gve_add_napi(priv, ntfy_idx, napi_poll); - } - /* Add rx napi & init sync stats*/ - for (i = 0; i < priv->rx_cfg.num_queues; i++) { - int ntfy_idx = gve_rx_idx_to_ntfy(priv, i); + /* Init stats for XDP TX queues */ + init_xdp_sync_stats(priv); + + for (i = 0; i < priv->rx_cfg.num_queues; i++) u64_stats_init(&priv->rx[i].statss); - priv->rx[i].ntfy_id = ntfy_idx; - gve_add_napi(priv, ntfy_idx, napi_poll); +} + +static void gve_tx_get_curr_alloc_cfg(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *cfg) +{ + int num_xdp_queues = priv->xdp_prog ? priv->rx_cfg.num_queues : 0; + + cfg->qcfg = &priv->tx_cfg; + cfg->raw_addressing = !gve_is_qpl(priv); + cfg->ring_size = priv->tx_desc_cnt; + cfg->start_idx = 0; + cfg->num_rings = priv->tx_cfg.num_queues + num_xdp_queues; + cfg->tx = priv->tx; +} + +static void gve_tx_stop_rings(struct gve_priv *priv, int start_id, int num_rings) +{ + int i; + + if (!priv->tx) + return; + + for (i = start_id; i < start_id + num_rings; i++) { + if (gve_is_gqi(priv)) + gve_tx_stop_ring_gqi(priv, i); + else + gve_tx_stop_ring_dqo(priv, i); } } -static void gve_tx_free_rings(struct gve_priv *priv, int start_id, int num_rings) +static void gve_tx_start_rings(struct gve_priv *priv, int start_id, + int num_rings) { - if (gve_is_gqi(priv)) { - gve_tx_free_rings_gqi(priv, start_id, num_rings); - } else { - gve_tx_free_rings_dqo(priv); + int i; + + for (i = start_id; i < start_id + num_rings; i++) { + if (gve_is_gqi(priv)) + gve_tx_start_ring_gqi(priv, i); + else + gve_tx_start_ring_dqo(priv, i); } } static int gve_alloc_xdp_rings(struct gve_priv *priv) { - int start_id; + struct gve_tx_alloc_rings_cfg cfg = {0}; int err = 0; if (!priv->num_xdp_queues) return 0; - start_id = gve_xdp_tx_start_queue_id(priv); - err = gve_tx_alloc_rings(priv, start_id, priv->num_xdp_queues); + gve_tx_get_curr_alloc_cfg(priv, &cfg); + cfg.start_idx = gve_xdp_tx_start_queue_id(priv); + cfg.num_rings = priv->num_xdp_queues; + + err = gve_tx_alloc_rings_gqi(priv, &cfg); if (err) return err; - add_napi_init_xdp_sync_stats(priv, gve_napi_poll); + + gve_tx_start_rings(priv, cfg.start_idx, cfg.num_rings); + init_xdp_sync_stats(priv); return 0; } -static int gve_alloc_rings(struct gve_priv *priv) +static int gve_queues_mem_alloc(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, + struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) { int err; - /* Setup tx rings */ - priv->tx = kvcalloc(priv->tx_cfg.max_queues, sizeof(*priv->tx), - GFP_KERNEL); - if (!priv->tx) - return -ENOMEM; - if (gve_is_gqi(priv)) - err = gve_tx_alloc_rings(priv, 0, gve_num_tx_queues(priv)); + err = gve_tx_alloc_rings_gqi(priv, tx_alloc_cfg); else - err = gve_tx_alloc_rings_dqo(priv); + err = gve_tx_alloc_rings_dqo(priv, tx_alloc_cfg); if (err) - goto free_tx; - - /* Setup rx rings */ - priv->rx = kvcalloc(priv->rx_cfg.max_queues, sizeof(*priv->rx), - GFP_KERNEL); - if (!priv->rx) { - err = -ENOMEM; - goto free_tx_queue; - } + return err; if (gve_is_gqi(priv)) - err = gve_rx_alloc_rings(priv); + err = gve_rx_alloc_rings_gqi(priv, rx_alloc_cfg); else - err = gve_rx_alloc_rings_dqo(priv); + err = gve_rx_alloc_rings_dqo(priv, rx_alloc_cfg); if (err) - goto free_rx; - - if (gve_is_gqi(priv)) - add_napi_init_sync_stats(priv, gve_napi_poll); - else - add_napi_init_sync_stats(priv, gve_napi_poll_dqo); + goto free_tx; return 0; -free_rx: - kvfree(priv->rx); - priv->rx = NULL; -free_tx_queue: - gve_tx_free_rings(priv, 0, gve_num_tx_queues(priv)); free_tx: - kvfree(priv->tx); - priv->tx = NULL; + if (gve_is_gqi(priv)) + gve_tx_free_rings_gqi(priv, tx_alloc_cfg); + else + gve_tx_free_rings_dqo(priv, tx_alloc_cfg); return err; } @@ -937,52 +1064,30 @@ static int gve_destroy_rings(struct gve_priv *priv) return 0; } -static void gve_rx_free_rings(struct gve_priv *priv) -{ - if (gve_is_gqi(priv)) - gve_rx_free_rings_gqi(priv); - else - gve_rx_free_rings_dqo(priv); -} - static void gve_free_xdp_rings(struct gve_priv *priv) { - int ntfy_idx, start_id; - int i; + struct gve_tx_alloc_rings_cfg cfg = {0}; + + gve_tx_get_curr_alloc_cfg(priv, &cfg); + cfg.start_idx = gve_xdp_tx_start_queue_id(priv); + cfg.num_rings = priv->num_xdp_queues; - start_id = gve_xdp_tx_start_queue_id(priv); if (priv->tx) { - for (i = start_id; i < start_id + priv->num_xdp_queues; i++) { - ntfy_idx = gve_tx_idx_to_ntfy(priv, i); - gve_remove_napi(priv, ntfy_idx); - } - gve_tx_free_rings(priv, start_id, priv->num_xdp_queues); + gve_tx_stop_rings(priv, cfg.start_idx, cfg.num_rings); + gve_tx_free_rings_gqi(priv, &cfg); } } -static void gve_free_rings(struct gve_priv *priv) +static void gve_queues_mem_free(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *tx_cfg, + struct gve_rx_alloc_rings_cfg *rx_cfg) { - int num_tx_queues = gve_num_tx_queues(priv); - int ntfy_idx; - int i; - - if (priv->tx) { - for (i = 0; i < num_tx_queues; i++) { - ntfy_idx = gve_tx_idx_to_ntfy(priv, i); - gve_remove_napi(priv, ntfy_idx); - } - gve_tx_free_rings(priv, 0, num_tx_queues); - kvfree(priv->tx); - priv->tx = NULL; - } - if (priv->rx) { - for (i = 0; i < priv->rx_cfg.num_queues; i++) { - ntfy_idx = gve_rx_idx_to_ntfy(priv, i); - gve_remove_napi(priv, ntfy_idx); - } - gve_rx_free_rings(priv); - kvfree(priv->rx); - priv->rx = NULL; + if (gve_is_gqi(priv)) { + gve_tx_free_rings_gqi(priv, tx_cfg); + gve_rx_free_rings_gqi(priv, rx_cfg); + } else { + gve_tx_free_rings_dqo(priv, tx_cfg); + gve_rx_free_rings_dqo(priv, rx_cfg); } } @@ -1004,44 +1109,41 @@ int gve_alloc_page(struct gve_priv *priv, struct device *dev, return 0; } -static int gve_alloc_queue_page_list(struct gve_priv *priv, u32 id, - int pages) +struct gve_queue_page_list *gve_alloc_queue_page_list(struct gve_priv *priv, + u32 id, int pages) { - struct gve_queue_page_list *qpl = &priv->qpls[id]; + struct gve_queue_page_list *qpl; int err; int i; - if (pages + priv->num_registered_pages > priv->max_registered_pages) { - netif_err(priv, drv, priv->dev, - "Reached max number of registered pages %llu > %llu\n", - pages + priv->num_registered_pages, - priv->max_registered_pages); - return -EINVAL; - } + qpl = kvzalloc(sizeof(*qpl), GFP_KERNEL); + if (!qpl) + return NULL; qpl->id = id; qpl->num_entries = 0; qpl->pages = kvcalloc(pages, sizeof(*qpl->pages), GFP_KERNEL); - /* caller handles clean up */ if (!qpl->pages) - return -ENOMEM; + goto abort; + qpl->page_buses = kvcalloc(pages, sizeof(*qpl->page_buses), GFP_KERNEL); - /* caller handles clean up */ if (!qpl->page_buses) - return -ENOMEM; + goto abort; for (i = 0; i < pages; i++) { err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i], &qpl->page_buses[i], gve_qpl_dma_dir(priv, id), GFP_KERNEL); - /* caller handles clean up */ if (err) - return -ENOMEM; + goto abort; qpl->num_entries++; } - priv->num_registered_pages += pages; - return 0; + return qpl; + +abort: + gve_free_queue_page_list(priv, qpl, id); + return NULL; } void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma, @@ -1053,13 +1155,16 @@ void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma, put_page(page); } -static void gve_free_queue_page_list(struct gve_priv *priv, u32 id) +void gve_free_queue_page_list(struct gve_priv *priv, + struct gve_queue_page_list *qpl, + u32 id) { - struct gve_queue_page_list *qpl = &priv->qpls[id]; int i; - if (!qpl->pages) + if (!qpl) return; + if (!qpl->pages) + goto free_qpl; if (!qpl->page_buses) goto free_pages; @@ -1072,115 +1177,8 @@ static void gve_free_queue_page_list(struct gve_priv *priv, u32 id) free_pages: kvfree(qpl->pages); qpl->pages = NULL; - priv->num_registered_pages -= qpl->num_entries; -} - -static int gve_alloc_xdp_qpls(struct gve_priv *priv) -{ - int start_id; - int i, j; - int err; - - start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv)); - for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) { - err = gve_alloc_queue_page_list(priv, i, - priv->tx_pages_per_qpl); - if (err) - goto free_qpls; - } - - return 0; - -free_qpls: - for (j = start_id; j <= i; j++) - gve_free_queue_page_list(priv, j); - return err; -} - -static int gve_alloc_qpls(struct gve_priv *priv) -{ - int max_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues; - int page_count; - int start_id; - int i, j; - int err; - - if (!gve_is_qpl(priv)) - return 0; - - priv->qpls = kvcalloc(max_queues, sizeof(*priv->qpls), GFP_KERNEL); - if (!priv->qpls) - return -ENOMEM; - - start_id = gve_tx_start_qpl_id(priv); - page_count = priv->tx_pages_per_qpl; - for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) { - err = gve_alloc_queue_page_list(priv, i, - page_count); - if (err) - goto free_qpls; - } - - start_id = gve_rx_start_qpl_id(priv); - - /* For GQI_QPL number of pages allocated have 1:1 relationship with - * number of descriptors. For DQO, number of pages required are - * more than descriptors (because of out of order completions). - */ - page_count = priv->queue_format == GVE_GQI_QPL_FORMAT ? - priv->rx_data_slot_cnt : priv->rx_pages_per_qpl; - for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) { - err = gve_alloc_queue_page_list(priv, i, - page_count); - if (err) - goto free_qpls; - } - - priv->qpl_cfg.qpl_map_size = BITS_TO_LONGS(max_queues) * - sizeof(unsigned long) * BITS_PER_BYTE; - priv->qpl_cfg.qpl_id_map = kvcalloc(BITS_TO_LONGS(max_queues), - sizeof(unsigned long), GFP_KERNEL); - if (!priv->qpl_cfg.qpl_id_map) { - err = -ENOMEM; - goto free_qpls; - } - - return 0; - -free_qpls: - for (j = 0; j <= i; j++) - gve_free_queue_page_list(priv, j); - kvfree(priv->qpls); - priv->qpls = NULL; - return err; -} - -static void gve_free_xdp_qpls(struct gve_priv *priv) -{ - int start_id; - int i; - - start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv)); - for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) - gve_free_queue_page_list(priv, i); -} - -static void gve_free_qpls(struct gve_priv *priv) -{ - int max_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues; - int i; - - if (!priv->qpls) - return; - - kvfree(priv->qpl_cfg.qpl_id_map); - priv->qpl_cfg.qpl_id_map = NULL; - - for (i = 0; i < max_queues; i++) - gve_free_queue_page_list(priv, i); - - kvfree(priv->qpls); - priv->qpls = NULL; +free_qpl: + kvfree(qpl); } /* Use this to schedule a reset when the device is capable of continuing @@ -1278,58 +1276,127 @@ static void gve_unreg_xdp_info(struct gve_priv *priv) static void gve_drain_page_cache(struct gve_priv *priv) { - struct page_frag_cache *nc; int i; - for (i = 0; i < priv->rx_cfg.num_queues; i++) { - nc = &priv->rx[i].page_cache; - if (nc->va) { - __page_frag_cache_drain(virt_to_page(nc->va), - nc->pagecnt_bias); - nc->va = NULL; - } - } + for (i = 0; i < priv->rx_cfg.num_queues; i++) + page_frag_cache_drain(&priv->rx[i].page_cache); } -static int gve_open(struct net_device *dev) +static void gve_rx_get_curr_alloc_cfg(struct gve_priv *priv, + struct gve_rx_alloc_rings_cfg *cfg) { - struct gve_priv *priv = netdev_priv(dev); + cfg->qcfg = &priv->rx_cfg; + cfg->qcfg_tx = &priv->tx_cfg; + cfg->raw_addressing = !gve_is_qpl(priv); + cfg->enable_header_split = priv->header_split_enabled; + cfg->ring_size = priv->rx_desc_cnt; + cfg->packet_buffer_size = gve_is_gqi(priv) ? + GVE_DEFAULT_RX_BUFFER_SIZE : + priv->data_buffer_size_dqo; + cfg->rx = priv->rx; +} + +void gve_get_curr_alloc_cfgs(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, + struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) +{ + gve_tx_get_curr_alloc_cfg(priv, tx_alloc_cfg); + gve_rx_get_curr_alloc_cfg(priv, rx_alloc_cfg); +} + +static void gve_rx_start_ring(struct gve_priv *priv, int i) +{ + if (gve_is_gqi(priv)) + gve_rx_start_ring_gqi(priv, i); + else + gve_rx_start_ring_dqo(priv, i); +} + +static void gve_rx_start_rings(struct gve_priv *priv, int num_rings) +{ + int i; + + for (i = 0; i < num_rings; i++) + gve_rx_start_ring(priv, i); +} + +static void gve_rx_stop_ring(struct gve_priv *priv, int i) +{ + if (gve_is_gqi(priv)) + gve_rx_stop_ring_gqi(priv, i); + else + gve_rx_stop_ring_dqo(priv, i); +} + +static void gve_rx_stop_rings(struct gve_priv *priv, int num_rings) +{ + int i; + + if (!priv->rx) + return; + + for (i = 0; i < num_rings; i++) + gve_rx_stop_ring(priv, i); +} + +static void gve_queues_mem_remove(struct gve_priv *priv) +{ + struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; + struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; + + gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); + gve_queues_mem_free(priv, &tx_alloc_cfg, &rx_alloc_cfg); + priv->tx = NULL; + priv->rx = NULL; +} + +/* The passed-in queue memory is stored into priv and the queues are made live. + * No memory is allocated. Passed-in memory is freed on errors. + */ +static int gve_queues_start(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, + struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) +{ + struct net_device *dev = priv->dev; int err; + /* Record new resources into priv */ + priv->tx = tx_alloc_cfg->tx; + priv->rx = rx_alloc_cfg->rx; + + /* Record new configs into priv */ + priv->tx_cfg = *tx_alloc_cfg->qcfg; + priv->rx_cfg = *rx_alloc_cfg->qcfg; + priv->tx_desc_cnt = tx_alloc_cfg->ring_size; + priv->rx_desc_cnt = rx_alloc_cfg->ring_size; + if (priv->xdp_prog) priv->num_xdp_queues = priv->rx_cfg.num_queues; else priv->num_xdp_queues = 0; - err = gve_alloc_qpls(priv); - if (err) - return err; - - err = gve_alloc_rings(priv); - if (err) - goto free_qpls; + gve_tx_start_rings(priv, 0, tx_alloc_cfg->num_rings); + gve_rx_start_rings(priv, rx_alloc_cfg->qcfg->num_queues); + gve_init_sync_stats(priv); err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues); if (err) - goto free_rings; + goto stop_and_free_rings; err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues); if (err) - goto free_rings; + goto stop_and_free_rings; err = gve_reg_xdp_info(priv, dev); if (err) - goto free_rings; + goto stop_and_free_rings; err = gve_register_qpls(priv); if (err) goto reset; - if (!gve_is_gqi(priv)) { - /* Hard code this for now. This may be tuned in the future for - * performance. - */ - priv->data_buffer_size_dqo = GVE_DEFAULT_RX_BUFFER_SIZE; - } + priv->header_split_enabled = rx_alloc_cfg->enable_header_split; + priv->data_buffer_size_dqo = rx_alloc_cfg->packet_buffer_size; + err = gve_create_rings(priv); if (err) goto reset; @@ -1346,32 +1413,49 @@ static int gve_open(struct net_device *dev) priv->interface_up_cnt++; return 0; -free_rings: - gve_free_rings(priv); -free_qpls: - gve_free_qpls(priv); - return err; - reset: - /* This must have been called from a reset due to the rtnl lock - * so just return at this point. - */ if (gve_get_reset_in_progress(priv)) - return err; - /* Otherwise reset before returning */ + goto stop_and_free_rings; gve_reset_and_teardown(priv, true); /* if this fails there is nothing we can do so just ignore the return */ gve_reset_recovery(priv, false); /* return the original error */ return err; +stop_and_free_rings: + gve_tx_stop_rings(priv, 0, gve_num_tx_queues(priv)); + gve_rx_stop_rings(priv, priv->rx_cfg.num_queues); + gve_queues_mem_remove(priv); + return err; } -static int gve_close(struct net_device *dev) +static int gve_open(struct net_device *dev) { + struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; + struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; struct gve_priv *priv = netdev_priv(dev); int err; - netif_carrier_off(dev); + gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); + + err = gve_queues_mem_alloc(priv, &tx_alloc_cfg, &rx_alloc_cfg); + if (err) + return err; + + /* No need to free on error: ownership of resources is lost after + * calling gve_queues_start. + */ + err = gve_queues_start(priv, &tx_alloc_cfg, &rx_alloc_cfg); + if (err) + return err; + + return 0; +} + +static int gve_queues_stop(struct gve_priv *priv) +{ + int err; + + netif_carrier_off(priv->dev); if (gve_get_device_rings_ok(priv)) { gve_turndown(priv); gve_drain_page_cache(priv); @@ -1386,8 +1470,10 @@ static int gve_close(struct net_device *dev) del_timer_sync(&priv->stats_report_timer); gve_unreg_xdp_info(priv); - gve_free_rings(priv); - gve_free_qpls(priv); + + gve_tx_stop_rings(priv, 0, gve_num_tx_queues(priv)); + gve_rx_stop_rings(priv, priv->rx_cfg.num_queues); + priv->interface_down_cnt++; return 0; @@ -1402,6 +1488,19 @@ err: return gve_reset_recovery(priv, false); } +static int gve_close(struct net_device *dev) +{ + struct gve_priv *priv = netdev_priv(dev); + int err; + + err = gve_queues_stop(priv); + if (err) + return err; + + gve_queues_mem_remove(priv); + return 0; +} + static int gve_remove_xdp_queues(struct gve_priv *priv) { int err; @@ -1416,7 +1515,7 @@ static int gve_remove_xdp_queues(struct gve_priv *priv) gve_unreg_xdp_info(priv); gve_free_xdp_rings(priv); - gve_free_xdp_qpls(priv); + priv->num_xdp_queues = 0; return 0; } @@ -1425,15 +1524,11 @@ static int gve_add_xdp_queues(struct gve_priv *priv) { int err; - priv->num_xdp_queues = priv->tx_cfg.num_queues; - - err = gve_alloc_xdp_qpls(priv); - if (err) - goto err; + priv->num_xdp_queues = priv->rx_cfg.num_queues; err = gve_alloc_xdp_rings(priv); if (err) - goto free_xdp_qpls; + goto err; err = gve_reg_xdp_info(priv, priv->dev); if (err) @@ -1451,8 +1546,6 @@ static int gve_add_xdp_queues(struct gve_priv *priv) free_xdp_rings: gve_free_xdp_rings(priv); -free_xdp_qpls: - gve_free_xdp_qpls(priv); err: priv->num_xdp_queues = 0; return err; @@ -1483,7 +1576,7 @@ static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog, u32 status; old_prog = READ_ONCE(priv->xdp_prog); - if (!netif_carrier_ok(priv->dev)) { + if (!netif_running(priv->dev)) { WRITE_ONCE(priv->xdp_prog, prog); if (old_prog) bpf_prog_put(old_prog); @@ -1540,8 +1633,8 @@ static int gve_xsk_pool_enable(struct net_device *dev, if (err) return err; - /* If XDP prog is not installed, return */ - if (!priv->xdp_prog) + /* If XDP prog is not installed or interface is down, return. */ + if (!priv->xdp_prog || !netif_running(dev)) return 0; rx = &priv->rx[qid]; @@ -1586,21 +1679,16 @@ static int gve_xsk_pool_disable(struct net_device *dev, if (qid >= priv->rx_cfg.num_queues) return -EINVAL; - /* If XDP prog is not installed, unmap DMA and return */ - if (!priv->xdp_prog) - goto done; - - tx_qid = gve_xdp_tx_queue_id(priv, qid); - if (!netif_running(dev)) { - priv->rx[qid].xsk_pool = NULL; - xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq); - priv->tx[tx_qid].xsk_pool = NULL; + /* If XDP prog is not installed or interface is down, unmap DMA and + * return. + */ + if (!priv->xdp_prog || !netif_running(dev)) goto done; - } napi_rx = &priv->ntfy_blocks[priv->rx[qid].ntfy_id].napi; napi_disable(napi_rx); /* make sure current rx poll is done */ + tx_qid = gve_xdp_tx_queue_id(priv, qid); napi_tx = &priv->ntfy_blocks[priv->tx[tx_qid].ntfy_id].napi; napi_disable(napi_tx); /* make sure current tx poll is done */ @@ -1626,24 +1714,20 @@ done: static int gve_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) { struct gve_priv *priv = netdev_priv(dev); - int tx_queue_id = gve_xdp_tx_queue_id(priv, queue_id); + struct napi_struct *napi; + + if (!gve_get_napi_enabled(priv)) + return -ENETDOWN; if (queue_id >= priv->rx_cfg.num_queues || !priv->xdp_prog) return -EINVAL; - if (flags & XDP_WAKEUP_TX) { - struct gve_tx_ring *tx = &priv->tx[tx_queue_id]; - struct napi_struct *napi = - &priv->ntfy_blocks[tx->ntfy_id].napi; - - if (!napi_if_scheduled_mark_missed(napi)) { - /* Call local_bh_enable to trigger SoftIRQ processing */ - local_bh_disable(); - napi_schedule(napi); - local_bh_enable(); - } - - tx->xdp_xsk_wakeup++; + napi = &priv->ntfy_blocks[gve_rx_idx_to_ntfy(priv, queue_id)].napi; + if (!napi_if_scheduled_mark_missed(napi)) { + /* Call local_bh_enable to trigger SoftIRQ processing */ + local_bh_disable(); + napi_schedule(napi); + local_bh_enable(); } return 0; @@ -1702,42 +1786,82 @@ static int gve_xdp(struct net_device *dev, struct netdev_bpf *xdp) } } +int gve_flow_rules_reset(struct gve_priv *priv) +{ + if (!priv->max_flow_rules) + return 0; + + return gve_adminq_reset_flow_rules(priv); +} + +int gve_adjust_config(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, + struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) +{ + int err; + + /* Allocate resources for the new confiugration */ + err = gve_queues_mem_alloc(priv, tx_alloc_cfg, rx_alloc_cfg); + if (err) { + netif_err(priv, drv, priv->dev, + "Adjust config failed to alloc new queues"); + return err; + } + + /* Teardown the device and free existing resources */ + err = gve_close(priv->dev); + if (err) { + netif_err(priv, drv, priv->dev, + "Adjust config failed to close old queues"); + gve_queues_mem_free(priv, tx_alloc_cfg, rx_alloc_cfg); + return err; + } + + /* Bring the device back up again with the new resources. */ + err = gve_queues_start(priv, tx_alloc_cfg, rx_alloc_cfg); + if (err) { + netif_err(priv, drv, priv->dev, + "Adjust config failed to start new queues, !!! DISABLING ALL QUEUES !!!\n"); + /* No need to free on error: ownership of resources is lost after + * calling gve_queues_start. + */ + gve_turndown(priv); + return err; + } + + return 0; +} + int gve_adjust_queues(struct gve_priv *priv, struct gve_queue_config new_rx_config, struct gve_queue_config new_tx_config) { + struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; + struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; + int num_xdp_queues; int err; - if (netif_carrier_ok(priv->dev)) { - /* To make this process as simple as possible we teardown the - * device, set the new configuration, and then bring the device - * up again. - */ - err = gve_close(priv->dev); - /* we have already tried to reset in close, - * just fail at this point - */ - if (err) - return err; - priv->tx_cfg = new_tx_config; - priv->rx_cfg = new_rx_config; + gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); - err = gve_open(priv->dev); - if (err) - goto err; + /* Relay the new config from ethtool */ + tx_alloc_cfg.qcfg = &new_tx_config; + rx_alloc_cfg.qcfg_tx = &new_tx_config; + rx_alloc_cfg.qcfg = &new_rx_config; + tx_alloc_cfg.num_rings = new_tx_config.num_queues; - return 0; + /* Add dedicated XDP TX queues if enabled. */ + num_xdp_queues = priv->xdp_prog ? new_rx_config.num_queues : 0; + tx_alloc_cfg.num_rings += num_xdp_queues; + + if (netif_running(priv->dev)) { + err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); + return err; } /* Set the config for the next up. */ priv->tx_cfg = new_tx_config; priv->rx_cfg = new_rx_config; return 0; -err: - netif_err(priv, drv, priv->dev, - "Adjust queues failed! !!! DISABLING ALL QUEUES !!!\n"); - gve_turndown(priv); - return err; } static void gve_turndown(struct gve_priv *priv) @@ -1755,20 +1879,37 @@ static void gve_turndown(struct gve_priv *priv) int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; + if (!gve_tx_was_added_to_block(priv, idx)) + continue; + + if (idx < priv->tx_cfg.num_queues) + netif_queue_set_napi(priv->dev, idx, + NETDEV_QUEUE_TYPE_TX, NULL); + napi_disable(&block->napi); } for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; + if (!gve_rx_was_added_to_block(priv, idx)) + continue; + + netif_queue_set_napi(priv->dev, idx, NETDEV_QUEUE_TYPE_RX, + NULL); napi_disable(&block->napi); } /* Stop tx queues */ netif_tx_disable(priv->dev); + xdp_features_clear_redirect_target(priv->dev); + gve_clear_napi_enabled(priv); gve_clear_report_stats(priv); + + /* Make sure that all traffic is finished processing. */ + synchronize_net(); } static void gve_turnup(struct gve_priv *priv) @@ -1783,30 +1924,71 @@ static void gve_turnup(struct gve_priv *priv) int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; + if (!gve_tx_was_added_to_block(priv, idx)) + continue; + napi_enable(&block->napi); + + if (idx < priv->tx_cfg.num_queues) + netif_queue_set_napi(priv->dev, idx, + NETDEV_QUEUE_TYPE_TX, + &block->napi); + if (gve_is_gqi(priv)) { iowrite32be(0, gve_irq_doorbell(priv, block)); } else { gve_set_itr_coalesce_usecs_dqo(priv, block, priv->tx_coalesce_usecs); } + + /* Any descs written by the NIC before this barrier will be + * handled by the one-off napi schedule below. Whereas any + * descs after the barrier will generate interrupts. + */ + mb(); + napi_schedule(&block->napi); } for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; + if (!gve_rx_was_added_to_block(priv, idx)) + continue; + napi_enable(&block->napi); + netif_queue_set_napi(priv->dev, idx, NETDEV_QUEUE_TYPE_RX, + &block->napi); + if (gve_is_gqi(priv)) { iowrite32be(0, gve_irq_doorbell(priv, block)); } else { gve_set_itr_coalesce_usecs_dqo(priv, block, priv->rx_coalesce_usecs); } + + /* Any descs written by the NIC before this barrier will be + * handled by the one-off napi schedule below. Whereas any + * descs after the barrier will generate interrupts. + */ + mb(); + napi_schedule(&block->napi); } + if (priv->num_xdp_queues && gve_supports_xdp_xmit(priv)) + xdp_features_set_redirect_target(priv->dev, false); + gve_set_napi_enabled(priv); } +static void gve_turnup_and_check_status(struct gve_priv *priv) +{ + u32 status; + + gve_turnup(priv); + status = ioread32be(&priv->reg_bar0->device_status); + gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); +} + static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct gve_notify_block *block; @@ -1853,39 +2035,82 @@ out: priv->tx_timeo_cnt++; } +u16 gve_get_pkt_buf_size(const struct gve_priv *priv, bool enable_hsplit) +{ + if (enable_hsplit && priv->max_rx_buffer_size >= GVE_MAX_RX_BUFFER_SIZE) + return GVE_MAX_RX_BUFFER_SIZE; + else + return GVE_DEFAULT_RX_BUFFER_SIZE; +} + +/* header-split is not supported on non-DQO_RDA yet even if device advertises it */ +bool gve_header_split_supported(const struct gve_priv *priv) +{ + return priv->header_buf_size && priv->queue_format == GVE_DQO_RDA_FORMAT; +} + +int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split) +{ + struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; + struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; + bool enable_hdr_split; + int err = 0; + + if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_UNKNOWN) + return 0; + + if (!gve_header_split_supported(priv)) { + dev_err(&priv->pdev->dev, "Header-split not supported\n"); + return -EOPNOTSUPP; + } + + if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_ENABLED) + enable_hdr_split = true; + else + enable_hdr_split = false; + + if (enable_hdr_split == priv->header_split_enabled) + return 0; + + gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); + + rx_alloc_cfg.enable_header_split = enable_hdr_split; + rx_alloc_cfg.packet_buffer_size = gve_get_pkt_buf_size(priv, enable_hdr_split); + + if (netif_running(priv->dev)) + err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); + return err; +} + static int gve_set_features(struct net_device *netdev, netdev_features_t features) { const netdev_features_t orig_features = netdev->features; + struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; + struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; struct gve_priv *priv = netdev_priv(netdev); int err; + gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); + if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) { netdev->features ^= NETIF_F_LRO; - if (netif_carrier_ok(netdev)) { - /* To make this process as simple as possible we - * teardown the device, set the new configuration, - * and then bring the device up again. - */ - err = gve_close(netdev); - /* We have already tried to reset in close, just fail - * at this point. - */ - if (err) - goto err; - - err = gve_open(netdev); + if (netif_running(netdev)) { + err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); if (err) - goto err; + goto revert_features; } } + if ((netdev->features & NETIF_F_NTUPLE) && !(features & NETIF_F_NTUPLE)) { + err = gve_flow_rules_reset(priv); + if (err) + goto revert_features; + } return 0; -err: - /* Reverts the change on error. */ + +revert_features: netdev->features = orig_features; - netif_err(priv, drv, netdev, - "Set features failed! !!! DISABLING ALL QUEUES !!!\n"); return err; } @@ -2021,14 +2246,17 @@ static void gve_service_task(struct work_struct *work) static void gve_set_netdev_xdp_features(struct gve_priv *priv) { + xdp_features_t xdp_features; + if (priv->queue_format == GVE_GQI_QPL_FORMAT) { - priv->dev->xdp_features = NETDEV_XDP_ACT_BASIC; - priv->dev->xdp_features |= NETDEV_XDP_ACT_REDIRECT; - priv->dev->xdp_features |= NETDEV_XDP_ACT_NDO_XMIT; - priv->dev->xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY; + xdp_features = NETDEV_XDP_ACT_BASIC; + xdp_features |= NETDEV_XDP_ACT_REDIRECT; + xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY; } else { - priv->dev->xdp_features = 0; + xdp_features = 0; } + + xdp_set_features_flag(priv->dev, xdp_features); } static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) @@ -2051,6 +2279,8 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) goto err; } + priv->num_registered_pages = 0; + if (skip_describe_device) goto setup_device; @@ -2080,7 +2310,6 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) if (!gve_is_gqi(priv)) netif_set_tso_max_size(priv->dev, GVE_DQO_TX_MAX); - priv->num_registered_pages = 0; priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK; /* gvnic has one Notification Block per MSI-x vector, except for the * management vector @@ -2164,7 +2393,7 @@ err: int gve_reset(struct gve_priv *priv, bool attempt_teardown) { - bool was_up = netif_carrier_ok(priv->dev); + bool was_up = netif_running(priv->dev); int err; dev_info(&priv->pdev->dev, "Performing reset\n"); @@ -2215,6 +2444,188 @@ static void gve_write_version(u8 __iomem *driver_version_register) writeb('\n', driver_version_register); } +static int gve_rx_queue_stop(struct net_device *dev, void *per_q_mem, int idx) +{ + struct gve_priv *priv = netdev_priv(dev); + struct gve_rx_ring *gve_per_q_mem; + int err; + + if (!priv->rx) + return -EAGAIN; + + /* Destroying queue 0 while other queues exist is not supported in DQO */ + if (!gve_is_gqi(priv) && idx == 0) + return -ERANGE; + + /* Single-queue destruction requires quiescence on all queues */ + gve_turndown(priv); + + /* This failure will trigger a reset - no need to clean up */ + err = gve_adminq_destroy_single_rx_queue(priv, idx); + if (err) + return err; + + if (gve_is_qpl(priv)) { + /* This failure will trigger a reset - no need to clean up */ + err = gve_unregister_qpl(priv, gve_rx_get_qpl(priv, idx)); + if (err) + return err; + } + + gve_rx_stop_ring(priv, idx); + + /* Turn the unstopped queues back up */ + gve_turnup_and_check_status(priv); + + gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; + *gve_per_q_mem = priv->rx[idx]; + memset(&priv->rx[idx], 0, sizeof(priv->rx[idx])); + return 0; +} + +static void gve_rx_queue_mem_free(struct net_device *dev, void *per_q_mem) +{ + struct gve_priv *priv = netdev_priv(dev); + struct gve_rx_alloc_rings_cfg cfg = {0}; + struct gve_rx_ring *gve_per_q_mem; + + gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; + gve_rx_get_curr_alloc_cfg(priv, &cfg); + + if (gve_is_gqi(priv)) + gve_rx_free_ring_gqi(priv, gve_per_q_mem, &cfg); + else + gve_rx_free_ring_dqo(priv, gve_per_q_mem, &cfg); +} + +static int gve_rx_queue_mem_alloc(struct net_device *dev, void *per_q_mem, + int idx) +{ + struct gve_priv *priv = netdev_priv(dev); + struct gve_rx_alloc_rings_cfg cfg = {0}; + struct gve_rx_ring *gve_per_q_mem; + int err; + + if (!priv->rx) + return -EAGAIN; + + gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; + gve_rx_get_curr_alloc_cfg(priv, &cfg); + + if (gve_is_gqi(priv)) + err = gve_rx_alloc_ring_gqi(priv, &cfg, gve_per_q_mem, idx); + else + err = gve_rx_alloc_ring_dqo(priv, &cfg, gve_per_q_mem, idx); + + return err; +} + +static int gve_rx_queue_start(struct net_device *dev, void *per_q_mem, int idx) +{ + struct gve_priv *priv = netdev_priv(dev); + struct gve_rx_ring *gve_per_q_mem; + int err; + + if (!priv->rx) + return -EAGAIN; + + gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; + priv->rx[idx] = *gve_per_q_mem; + + /* Single-queue creation requires quiescence on all queues */ + gve_turndown(priv); + + gve_rx_start_ring(priv, idx); + + if (gve_is_qpl(priv)) { + /* This failure will trigger a reset - no need to clean up */ + err = gve_register_qpl(priv, gve_rx_get_qpl(priv, idx)); + if (err) + goto abort; + } + + /* This failure will trigger a reset - no need to clean up */ + err = gve_adminq_create_single_rx_queue(priv, idx); + if (err) + goto abort; + + if (gve_is_gqi(priv)) + gve_rx_write_doorbell(priv, &priv->rx[idx]); + else + gve_rx_post_buffers_dqo(&priv->rx[idx]); + + /* Turn the unstopped queues back up */ + gve_turnup_and_check_status(priv); + return 0; + +abort: + gve_rx_stop_ring(priv, idx); + + /* All failures in this func result in a reset, by clearing the struct + * at idx, we prevent a double free when that reset runs. The reset, + * which needs the rtnl lock, will not run till this func returns and + * its caller gives up the lock. + */ + memset(&priv->rx[idx], 0, sizeof(priv->rx[idx])); + return err; +} + +static const struct netdev_queue_mgmt_ops gve_queue_mgmt_ops = { + .ndo_queue_mem_size = sizeof(struct gve_rx_ring), + .ndo_queue_mem_alloc = gve_rx_queue_mem_alloc, + .ndo_queue_mem_free = gve_rx_queue_mem_free, + .ndo_queue_start = gve_rx_queue_start, + .ndo_queue_stop = gve_rx_queue_stop, +}; + +static void gve_get_rx_queue_stats(struct net_device *dev, int idx, + struct netdev_queue_stats_rx *rx_stats) +{ + struct gve_priv *priv = netdev_priv(dev); + struct gve_rx_ring *rx = &priv->rx[idx]; + unsigned int start; + + do { + start = u64_stats_fetch_begin(&rx->statss); + rx_stats->packets = rx->rpackets; + rx_stats->bytes = rx->rbytes; + rx_stats->alloc_fail = rx->rx_skb_alloc_fail + + rx->rx_buf_alloc_fail; + } while (u64_stats_fetch_retry(&rx->statss, start)); +} + +static void gve_get_tx_queue_stats(struct net_device *dev, int idx, + struct netdev_queue_stats_tx *tx_stats) +{ + struct gve_priv *priv = netdev_priv(dev); + struct gve_tx_ring *tx = &priv->tx[idx]; + unsigned int start; + + do { + start = u64_stats_fetch_begin(&tx->statss); + tx_stats->packets = tx->pkt_done; + tx_stats->bytes = tx->bytes_done; + } while (u64_stats_fetch_retry(&tx->statss, start)); +} + +static void gve_get_base_stats(struct net_device *dev, + struct netdev_queue_stats_rx *rx, + struct netdev_queue_stats_tx *tx) +{ + rx->packets = 0; + rx->bytes = 0; + rx->alloc_fail = 0; + + tx->packets = 0; + tx->bytes = 0; +} + +static const struct netdev_stat_ops gve_stat_ops = { + .get_queue_stats_rx = gve_get_rx_queue_stats, + .get_queue_stats_tx = gve_get_tx_queue_stats, + .get_base_stats = gve_get_base_stats, +}; + static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { int max_tx_queues, max_rx_queues; @@ -2269,6 +2680,8 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_drvdata(pdev, dev); dev->ethtool_ops = &gve_ethtool_ops; dev->netdev_ops = &gve_netdev_ops; + dev->queue_mgmt_ops = &gve_queue_mgmt_ops; + dev->stat_ops = &gve_stat_ops; /* Set default and supported features. * @@ -2297,6 +2710,8 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) priv->service_task_flags = 0x0; priv->state_flags = 0x0; priv->ethtool_flags = 0x0; + priv->data_buffer_size_dqo = GVE_DEFAULT_RX_BUFFER_SIZE; + priv->max_rx_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE; gve_set_probe_in_progress(priv); priv->gve_wq = alloc_ordered_workqueue("gve", 0); @@ -2368,7 +2783,7 @@ static void gve_shutdown(struct pci_dev *pdev) { struct net_device *netdev = pci_get_drvdata(pdev); struct gve_priv *priv = netdev_priv(netdev); - bool was_up = netif_carrier_ok(priv->dev); + bool was_up = netif_running(priv->dev); rtnl_lock(); if (was_up && gve_close(priv->dev)) { @@ -2386,7 +2801,7 @@ static int gve_suspend(struct pci_dev *pdev, pm_message_t state) { struct net_device *netdev = pci_get_drvdata(pdev); struct gve_priv *priv = netdev_priv(netdev); - bool was_up = netif_carrier_ok(priv->dev); + bool was_up = netif_running(priv->dev); priv->suspend_cnt++; rtnl_lock(); |