diff options
Diffstat (limited to 'drivers/net/ethernet/google/gve/gve_main.c')
| -rw-r--r-- | drivers/net/ethernet/google/gve/gve_main.c | 2162 |
1 files changed, 1757 insertions, 405 deletions
diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 867e87af3432..a5a2b18d309b 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -1,22 +1,31 @@ // SPDX-License-Identifier: (GPL-2.0 OR MIT) /* Google virtual Ethernet (gve) driver * - * Copyright (C) 2015-2021 Google, Inc. + * Copyright (C) 2015-2024 Google LLC */ +#include <linux/bitmap.h> +#include <linux/bpf.h> #include <linux/cpumask.h> #include <linux/etherdevice.h> +#include <linux/filter.h> #include <linux/interrupt.h> +#include <linux/irq.h> #include <linux/module.h> #include <linux/pci.h> #include <linux/sched.h> #include <linux/timer.h> #include <linux/workqueue.h> +#include <linux/utsname.h> +#include <linux/version.h> +#include <net/netdev_queues.h> #include <net/sch_generic.h> +#include <net/xdp_sock_drv.h> #include "gve.h" #include "gve_dqo.h" #include "gve_adminq.h" #include "gve_register.h" +#include "gve_utils.h" #define GVE_DEFAULT_RX_COPYBREAK (256) @@ -24,9 +33,68 @@ #define GVE_VERSION "1.0.0" #define GVE_VERSION_PREFIX "GVE-" +// Minimum amount of time between queue kicks in msec (10 seconds) +#define MIN_TX_TIMEOUT_GAP (1000 * 10) + +char gve_driver_name[] = "gve"; const char gve_version_str[] = GVE_VERSION; static const char gve_version_prefix[] = GVE_VERSION_PREFIX; +static int gve_verify_driver_compatibility(struct gve_priv *priv) +{ + int err; + struct gve_driver_info *driver_info; + dma_addr_t driver_info_bus; + + driver_info = dma_alloc_coherent(&priv->pdev->dev, + sizeof(struct gve_driver_info), + &driver_info_bus, GFP_KERNEL); + if (!driver_info) + return -ENOMEM; + + *driver_info = (struct gve_driver_info) { + .os_type = 1, /* Linux */ + .os_version_major = cpu_to_be32(LINUX_VERSION_MAJOR), + .os_version_minor = cpu_to_be32(LINUX_VERSION_SUBLEVEL), + .os_version_sub = cpu_to_be32(LINUX_VERSION_PATCHLEVEL), + .driver_capability_flags = { + cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS1), + cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS2), + cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS3), + cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS4), + }, + }; + strscpy(driver_info->os_version_str1, utsname()->release, + sizeof(driver_info->os_version_str1)); + strscpy(driver_info->os_version_str2, utsname()->version, + sizeof(driver_info->os_version_str2)); + + err = gve_adminq_verify_driver_compatibility(priv, + sizeof(struct gve_driver_info), + driver_info_bus); + + /* It's ok if the device doesn't support this */ + if (err == -EOPNOTSUPP) + err = 0; + + dma_free_coherent(&priv->pdev->dev, + sizeof(struct gve_driver_info), + driver_info, driver_info_bus); + return err; +} + +static netdev_features_t gve_features_check(struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features) +{ + struct gve_priv *priv = netdev_priv(dev); + + if (!gve_is_gqi(priv)) + return gve_features_check_dqo(skb, dev, features); + + return features; +} + static netdev_tx_t gve_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct gve_priv *priv = netdev_priv(dev); @@ -41,32 +109,119 @@ static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s) { struct gve_priv *priv = netdev_priv(dev); unsigned int start; + u64 packets, bytes; + int num_tx_queues; int ring; + num_tx_queues = gve_num_tx_queues(priv); if (priv->rx) { for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) { do { start = u64_stats_fetch_begin(&priv->rx[ring].statss); - s->rx_packets += priv->rx[ring].rpackets; - s->rx_bytes += priv->rx[ring].rbytes; + packets = priv->rx[ring].rpackets; + bytes = priv->rx[ring].rbytes; } while (u64_stats_fetch_retry(&priv->rx[ring].statss, start)); + s->rx_packets += packets; + s->rx_bytes += bytes; } } if (priv->tx) { - for (ring = 0; ring < priv->tx_cfg.num_queues; ring++) { + for (ring = 0; ring < num_tx_queues; ring++) { do { start = u64_stats_fetch_begin(&priv->tx[ring].statss); - s->tx_packets += priv->tx[ring].pkt_done; - s->tx_bytes += priv->tx[ring].bytes_done; + packets = priv->tx[ring].pkt_done; + bytes = priv->tx[ring].bytes_done; } while (u64_stats_fetch_retry(&priv->tx[ring].statss, start)); + s->tx_packets += packets; + s->tx_bytes += bytes; } } } +static int gve_alloc_flow_rule_caches(struct gve_priv *priv) +{ + struct gve_flow_rules_cache *flow_rules_cache = &priv->flow_rules_cache; + int err = 0; + + if (!priv->max_flow_rules) + return 0; + + flow_rules_cache->rules_cache = + kvcalloc(GVE_FLOW_RULES_CACHE_SIZE, sizeof(*flow_rules_cache->rules_cache), + GFP_KERNEL); + if (!flow_rules_cache->rules_cache) { + dev_err(&priv->pdev->dev, "Cannot alloc flow rules cache\n"); + return -ENOMEM; + } + + flow_rules_cache->rule_ids_cache = + kvcalloc(GVE_FLOW_RULE_IDS_CACHE_SIZE, sizeof(*flow_rules_cache->rule_ids_cache), + GFP_KERNEL); + if (!flow_rules_cache->rule_ids_cache) { + dev_err(&priv->pdev->dev, "Cannot alloc flow rule ids cache\n"); + err = -ENOMEM; + goto free_rules_cache; + } + + return 0; + +free_rules_cache: + kvfree(flow_rules_cache->rules_cache); + flow_rules_cache->rules_cache = NULL; + return err; +} + +static void gve_free_flow_rule_caches(struct gve_priv *priv) +{ + struct gve_flow_rules_cache *flow_rules_cache = &priv->flow_rules_cache; + + kvfree(flow_rules_cache->rule_ids_cache); + flow_rules_cache->rule_ids_cache = NULL; + kvfree(flow_rules_cache->rules_cache); + flow_rules_cache->rules_cache = NULL; +} + +static int gve_alloc_rss_config_cache(struct gve_priv *priv) +{ + struct gve_rss_config *rss_config = &priv->rss_config; + + if (!priv->cache_rss_config) + return 0; + + rss_config->hash_key = kcalloc(priv->rss_key_size, + sizeof(rss_config->hash_key[0]), + GFP_KERNEL); + if (!rss_config->hash_key) + return -ENOMEM; + + rss_config->hash_lut = kcalloc(priv->rss_lut_size, + sizeof(rss_config->hash_lut[0]), + GFP_KERNEL); + if (!rss_config->hash_lut) + goto free_rss_key_cache; + + return 0; + +free_rss_key_cache: + kfree(rss_config->hash_key); + rss_config->hash_key = NULL; + return -ENOMEM; +} + +static void gve_free_rss_config_cache(struct gve_priv *priv) +{ + struct gve_rss_config *rss_config = &priv->rss_config; + + kfree(rss_config->hash_key); + kfree(rss_config->hash_lut); + + memset(rss_config, 0, sizeof(*rss_config)); +} + static int gve_alloc_counter_array(struct gve_priv *priv) { priv->counter_array = @@ -82,6 +237,9 @@ static int gve_alloc_counter_array(struct gve_priv *priv) static void gve_free_counter_array(struct gve_priv *priv) { + if (!priv->counter_array) + return; + dma_free_coherent(&priv->pdev->dev, priv->num_event_counters * sizeof(*priv->counter_array), @@ -111,7 +269,8 @@ static void gve_stats_report_schedule(struct gve_priv *priv) static void gve_stats_report_timer(struct timer_list *t) { - struct gve_priv *priv = from_timer(priv, t, stats_report_timer); + struct gve_priv *priv = timer_container_of(priv, t, + stats_report_timer); mod_timer(&priv->stats_report_timer, round_jiffies(jiffies + @@ -124,11 +283,11 @@ static int gve_alloc_stats_report(struct gve_priv *priv) int tx_stats_num, rx_stats_num; tx_stats_num = (GVE_TX_STATS_REPORT_NUM + NIC_TX_STATS_REPORT_NUM) * - priv->tx_cfg.num_queues; + gve_num_tx_queues(priv); rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) * priv->rx_cfg.num_queues; priv->stats_report_len = struct_size(priv->stats_report, stats, - tx_stats_num + rx_stats_num); + size_add(tx_stats_num, rx_stats_num)); priv->stats_report = dma_alloc_coherent(&priv->pdev->dev, priv->stats_report_len, &priv->stats_report_bus, GFP_KERNEL); @@ -142,7 +301,10 @@ static int gve_alloc_stats_report(struct gve_priv *priv) static void gve_free_stats_report(struct gve_priv *priv) { - del_timer_sync(&priv->stats_report_timer); + if (!priv->stats_report) + return; + + timer_delete_sync(&priv->stats_report_timer); dma_free_coherent(&priv->pdev->dev, priv->stats_report_len, priv->stats_report, priv->stats_report_bus); priv->stats_report = NULL; @@ -175,43 +337,77 @@ static irqreturn_t gve_intr_dqo(int irq, void *arg) return IRQ_HANDLED; } -static int gve_napi_poll(struct napi_struct *napi, int budget) +static int gve_is_napi_on_home_cpu(struct gve_priv *priv, u32 irq) +{ + int cpu_curr = smp_processor_id(); + const struct cpumask *aff_mask; + + aff_mask = irq_get_effective_affinity_mask(irq); + if (unlikely(!aff_mask)) + return 1; + + return cpumask_test_cpu(cpu_curr, aff_mask); +} + +int gve_napi_poll(struct napi_struct *napi, int budget) { struct gve_notify_block *block; __be32 __iomem *irq_doorbell; bool reschedule = false; struct gve_priv *priv; + int work_done = 0; block = container_of(napi, struct gve_notify_block, napi); priv = block->priv; - if (block->tx) - reschedule |= gve_tx_poll(block, budget); - if (block->rx) - reschedule |= gve_rx_poll(block, budget); + if (block->tx) { + if (block->tx->q_num < priv->tx_cfg.num_queues) + reschedule |= gve_tx_poll(block, budget); + else if (budget) + reschedule |= gve_xdp_poll(block, budget); + } + + if (!budget) + return 0; + + if (block->rx) { + work_done = gve_rx_poll(block, budget); + + /* Poll XSK TX as part of RX NAPI. Setup re-poll based on max of + * TX and RX work done. + */ + if (priv->xdp_prog) + work_done = max_t(int, work_done, + gve_xsk_tx_poll(block, budget)); + + reschedule |= work_done == budget; + } if (reschedule) return budget; - napi_complete(napi); - irq_doorbell = gve_irq_doorbell(priv, block); - iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell); + /* Complete processing - don't unmask irq if busy polling is enabled */ + if (likely(napi_complete_done(napi, work_done))) { + irq_doorbell = gve_irq_doorbell(priv, block); + iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell); - /* Double check we have no extra work. - * Ensure unmask synchronizes with checking for work. - */ - mb(); - if (block->tx) - reschedule |= gve_tx_poll(block, -1); - if (block->rx) - reschedule |= gve_rx_poll(block, -1); - if (reschedule && napi_reschedule(napi)) - iowrite32be(GVE_IRQ_MASK, irq_doorbell); + /* Ensure IRQ ACK is visible before we check pending work. + * If queue had issued updates, it would be truly visible. + */ + mb(); - return 0; + if (block->tx) + reschedule |= gve_tx_clean_pending(priv, block->tx); + if (block->rx) + reschedule |= gve_rx_work_pending(block->rx); + + if (reschedule && napi_schedule(napi)) + iowrite32be(GVE_IRQ_MASK, irq_doorbell); + } + return work_done; } -static int gve_napi_poll_dqo(struct napi_struct *napi, int budget) +int gve_napi_poll_dqo(struct napi_struct *napi, int budget) { struct gve_notify_block *block = container_of(napi, struct gve_notify_block, napi); @@ -219,29 +415,42 @@ static int gve_napi_poll_dqo(struct napi_struct *napi, int budget) bool reschedule = false; int work_done = 0; - /* Clear PCI MSI-X Pending Bit Array (PBA) - * - * This bit is set if an interrupt event occurs while the vector is - * masked. If this bit is set and we reenable the interrupt, it will - * fire again. Since we're just about to poll the queue state, we don't - * need it to fire again. - * - * Under high softirq load, it's possible that the interrupt condition - * is triggered twice before we got the chance to process it. - */ - gve_write_irq_doorbell_dqo(priv, block, - GVE_ITR_NO_UPDATE_DQO | GVE_ITR_CLEAR_PBA_BIT_DQO); + if (block->tx) { + if (block->tx->q_num < priv->tx_cfg.num_queues) + reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true); + else + reschedule |= gve_xdp_poll_dqo(block); + } - if (block->tx) - reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true); + if (!budget) + return 0; if (block->rx) { work_done = gve_rx_poll_dqo(block, budget); + + /* Poll XSK TX as part of RX NAPI. Setup re-poll based on if + * either datapath has more work to do. + */ + if (priv->xdp_prog) + reschedule |= gve_xsk_tx_poll_dqo(block, budget); reschedule |= work_done == budget; } - if (reschedule) - return budget; + if (reschedule) { + /* Reschedule by returning budget only if already on the correct + * cpu. + */ + if (likely(gve_is_napi_on_home_cpu(priv, block->irq))) + return budget; + + /* If not on the cpu with which this queue's irq has affinity + * with, we avoid rescheduling napi and arm the irq instead so + * that napi gets rescheduled back eventually onto the right + * cpu. + */ + if (work_done == budget) + work_done--; + } if (likely(napi_complete_done(napi, work_done))) { /* Enable interrupts again. @@ -259,16 +468,24 @@ static int gve_napi_poll_dqo(struct napi_struct *napi, int budget) return work_done; } +static const struct cpumask *gve_get_node_mask(struct gve_priv *priv) +{ + if (priv->numa_node == NUMA_NO_NODE) + return cpu_all_mask; + else + return cpumask_of_node(priv->numa_node); +} + static int gve_alloc_notify_blocks(struct gve_priv *priv) { int num_vecs_requested = priv->num_ntfy_blks + 1; - char *name = priv->dev->name; - unsigned int active_cpus; + const struct cpumask *node_mask; + unsigned int cur_cpu; int vecs_enabled; int i, j; int err; - priv->msix_vectors = kvzalloc(num_vecs_requested * + priv->msix_vectors = kvcalloc(num_vecs_requested, sizeof(*priv->msix_vectors), GFP_KERNEL); if (!priv->msix_vectors) return -ENOMEM; @@ -302,34 +519,42 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv) if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues) priv->rx_cfg.num_queues = priv->rx_cfg.max_queues; } - /* Half the notification blocks go to TX and half to RX */ - active_cpus = min_t(int, priv->num_ntfy_blks / 2, num_online_cpus()); /* Setup Management Vector - the last vector */ - snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "%s-mgmnt", - name); + snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "gve-mgmnt@pci:%s", + pci_name(priv->pdev)); err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv); if (err) { dev_err(&priv->pdev->dev, "Did not receive management vector.\n"); goto abort_with_msix_enabled; } - priv->ntfy_blocks = + priv->irq_db_indices = dma_alloc_coherent(&priv->pdev->dev, priv->num_ntfy_blks * - sizeof(*priv->ntfy_blocks), - &priv->ntfy_block_bus, GFP_KERNEL); - if (!priv->ntfy_blocks) { + sizeof(*priv->irq_db_indices), + &priv->irq_db_indices_bus, GFP_KERNEL); + if (!priv->irq_db_indices) { err = -ENOMEM; goto abort_with_mgmt_vector; } + + priv->ntfy_blocks = kvzalloc(priv->num_ntfy_blks * + sizeof(*priv->ntfy_blocks), GFP_KERNEL); + if (!priv->ntfy_blocks) { + err = -ENOMEM; + goto abort_with_irq_db_indices; + } + /* Setup the other blocks - the first n-1 vectors */ + node_mask = gve_get_node_mask(priv); + cur_cpu = cpumask_first(node_mask); for (i = 0; i < priv->num_ntfy_blks; i++) { struct gve_notify_block *block = &priv->ntfy_blocks[i]; int msix_idx = i; - snprintf(block->name, sizeof(block->name), "%s-ntfy-block.%d", - name, i); + snprintf(block->name, sizeof(block->name), "gve-ntfy-blk%d@pci:%s", + i, pci_name(priv->pdev)); block->priv = priv; err = request_irq(priv->msix_vectors[msix_idx].vector, gve_is_gqi(priv) ? gve_intr : gve_intr_dqo, @@ -339,8 +564,18 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv) "Failed to receive msix vector %d\n", i); goto abort_with_some_ntfy_blocks; } - irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, - get_cpu_mask(i % active_cpus)); + block->irq = priv->msix_vectors[msix_idx].vector; + irq_set_affinity_and_hint(block->irq, + cpumask_of(cur_cpu)); + block->irq_db_index = &priv->irq_db_indices[i].index; + + cur_cpu = cpumask_next(cur_cpu, node_mask); + /* Wrap once CPUs in the node have been exhausted, or when + * starting RX queue affinities. TX and RX queues of the same + * index share affinity. + */ + if (cur_cpu >= nr_cpu_ids || (i + 1) == priv->tx_cfg.max_queues) + cur_cpu = cpumask_first(node_mask); } return 0; abort_with_some_ntfy_blocks: @@ -351,11 +586,15 @@ abort_with_some_ntfy_blocks: irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, NULL); free_irq(priv->msix_vectors[msix_idx].vector, block); + block->irq = 0; } - dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks * - sizeof(*priv->ntfy_blocks), - priv->ntfy_blocks, priv->ntfy_block_bus); + kvfree(priv->ntfy_blocks); priv->ntfy_blocks = NULL; +abort_with_irq_db_indices: + dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks * + sizeof(*priv->irq_db_indices), + priv->irq_db_indices, priv->irq_db_indices_bus); + priv->irq_db_indices = NULL; abort_with_mgmt_vector: free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); abort_with_msix_enabled: @@ -370,22 +609,26 @@ static void gve_free_notify_blocks(struct gve_priv *priv) { int i; - if (priv->msix_vectors) { - /* Free the irqs */ - for (i = 0; i < priv->num_ntfy_blks; i++) { - struct gve_notify_block *block = &priv->ntfy_blocks[i]; - int msix_idx = i; + if (!priv->msix_vectors) + return; - irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, - NULL); - free_irq(priv->msix_vectors[msix_idx].vector, block); - } - free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); + /* Free the irqs */ + for (i = 0; i < priv->num_ntfy_blks; i++) { + struct gve_notify_block *block = &priv->ntfy_blocks[i]; + int msix_idx = i; + + irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, + NULL); + free_irq(priv->msix_vectors[msix_idx].vector, block); + block->irq = 0; } - dma_free_coherent(&priv->pdev->dev, - priv->num_ntfy_blks * sizeof(*priv->ntfy_blocks), - priv->ntfy_blocks, priv->ntfy_block_bus); + free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); + kvfree(priv->ntfy_blocks); priv->ntfy_blocks = NULL; + dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks * + sizeof(*priv->irq_db_indices), + priv->irq_db_indices, priv->irq_db_indices_bus); + priv->irq_db_indices = NULL; pci_disable_msix(priv->pdev); kvfree(priv->msix_vectors); priv->msix_vectors = NULL; @@ -395,19 +638,28 @@ static int gve_setup_device_resources(struct gve_priv *priv) { int err; - err = gve_alloc_counter_array(priv); + err = gve_alloc_flow_rule_caches(priv); if (err) return err; - err = gve_alloc_notify_blocks(priv); + err = gve_alloc_rss_config_cache(priv); + if (err) + goto abort_with_flow_rule_caches; + err = gve_alloc_counter_array(priv); + if (err) + goto abort_with_rss_config_cache; + err = gve_init_clock(priv); if (err) goto abort_with_counter; + err = gve_alloc_notify_blocks(priv); + if (err) + goto abort_with_clock; err = gve_alloc_stats_report(priv); if (err) goto abort_with_ntfy_blocks; err = gve_adminq_configure_device_resources(priv, priv->counter_array_bus, priv->num_event_counters, - priv->ntfy_block_bus, + priv->irq_db_indices_bus, priv->num_ntfy_blks); if (unlikely(err)) { dev_err(&priv->pdev->dev, @@ -416,7 +668,7 @@ static int gve_setup_device_resources(struct gve_priv *priv) goto abort_with_stats_report; } - if (priv->queue_format == GVE_DQO_RDA_FORMAT) { + if (!gve_is_gqi(priv)) { priv->ptype_lut_dqo = kvzalloc(sizeof(*priv->ptype_lut_dqo), GFP_KERNEL); if (!priv->ptype_lut_dqo) { @@ -431,6 +683,12 @@ static int gve_setup_device_resources(struct gve_priv *priv) } } + err = gve_init_rss_config(priv, priv->rx_cfg.num_queues); + if (err) { + dev_err(&priv->pdev->dev, "Failed to init RSS config"); + goto abort_with_ptype_lut; + } + err = gve_adminq_report_stats(priv, priv->stats_report_len, priv->stats_report_bus, GVE_STATS_REPORT_TIMER_PERIOD); @@ -447,8 +705,14 @@ abort_with_stats_report: gve_free_stats_report(priv); abort_with_ntfy_blocks: gve_free_notify_blocks(priv); +abort_with_clock: + gve_teardown_clock(priv); abort_with_counter: gve_free_counter_array(priv); +abort_with_rss_config_cache: + gve_free_rss_config_cache(priv); +abort_with_flow_rule_caches: + gve_free_flow_rule_caches(priv); return err; } @@ -461,6 +725,12 @@ static void gve_teardown_device_resources(struct gve_priv *priv) /* Tell device its resources are being freed */ if (gve_get_device_resources_ok(priv)) { + err = gve_flow_rules_reset(priv); + if (err) { + dev_err(&priv->pdev->dev, + "Failed to reset flow rules: err=%d\n", err); + gve_trigger_reset(priv); + } /* detach the stats report */ err = gve_adminq_report_stats(priv, 0, 0x0, GVE_STATS_REPORT_TIMER_PERIOD); if (err) { @@ -480,84 +750,152 @@ static void gve_teardown_device_resources(struct gve_priv *priv) kvfree(priv->ptype_lut_dqo); priv->ptype_lut_dqo = NULL; + gve_free_flow_rule_caches(priv); + gve_free_rss_config_cache(priv); gve_free_counter_array(priv); gve_free_notify_blocks(priv); gve_free_stats_report(priv); + gve_teardown_clock(priv); gve_clear_device_resources_ok(priv); } -static void gve_add_napi(struct gve_priv *priv, int ntfy_idx, - int (*gve_poll)(struct napi_struct *, int)) +static int gve_unregister_qpl(struct gve_priv *priv, + struct gve_queue_page_list *qpl) +{ + int err; + + if (!qpl) + return 0; + + err = gve_adminq_unregister_page_list(priv, qpl->id); + if (err) { + netif_err(priv, drv, priv->dev, + "Failed to unregister queue page list %d\n", + qpl->id); + return err; + } + + priv->num_registered_pages -= qpl->num_entries; + return 0; +} + +static int gve_register_qpl(struct gve_priv *priv, + struct gve_queue_page_list *qpl) +{ + int pages; + int err; + + if (!qpl) + return 0; + + pages = qpl->num_entries; + + if (pages + priv->num_registered_pages > priv->max_registered_pages) { + netif_err(priv, drv, priv->dev, + "Reached max number of registered pages %llu > %llu\n", + pages + priv->num_registered_pages, + priv->max_registered_pages); + return -EINVAL; + } + + err = gve_adminq_register_page_list(priv, qpl); + if (err) { + netif_err(priv, drv, priv->dev, + "failed to register queue page list %d\n", + qpl->id); + return err; + } + + priv->num_registered_pages += pages; + return 0; +} + +static struct gve_queue_page_list *gve_tx_get_qpl(struct gve_priv *priv, int idx) { - struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; + struct gve_tx_ring *tx = &priv->tx[idx]; - netif_napi_add(priv->dev, &block->napi, gve_poll, - NAPI_POLL_WEIGHT); + if (gve_is_gqi(priv)) + return tx->tx_fifo.qpl; + else + return tx->dqo.qpl; } -static void gve_remove_napi(struct gve_priv *priv, int ntfy_idx) +static struct gve_queue_page_list *gve_rx_get_qpl(struct gve_priv *priv, int idx) { - struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; + struct gve_rx_ring *rx = &priv->rx[idx]; - netif_napi_del(&block->napi); + if (gve_is_gqi(priv)) + return rx->data.qpl; + else + return rx->dqo.qpl; } static int gve_register_qpls(struct gve_priv *priv) { - int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv); + int num_tx_qpls, num_rx_qpls; int err; int i; - for (i = 0; i < num_qpls; i++) { - err = gve_adminq_register_page_list(priv, &priv->qpls[i]); - if (err) { - netif_err(priv, drv, priv->dev, - "failed to register queue page list %d\n", - priv->qpls[i].id); - /* This failure will trigger a reset - no need to clean - * up - */ + num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_is_qpl(priv)); + num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv)); + + for (i = 0; i < num_tx_qpls; i++) { + err = gve_register_qpl(priv, gve_tx_get_qpl(priv, i)); + if (err) return err; - } } + + for (i = 0; i < num_rx_qpls; i++) { + err = gve_register_qpl(priv, gve_rx_get_qpl(priv, i)); + if (err) + return err; + } + return 0; } static int gve_unregister_qpls(struct gve_priv *priv) { - int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv); + int num_tx_qpls, num_rx_qpls; int err; int i; - for (i = 0; i < num_qpls; i++) { - err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id); - /* This failure will trigger a reset - no need to clean up */ - if (err) { - netif_err(priv, drv, priv->dev, - "Failed to unregister queue page list %d\n", - priv->qpls[i].id); + num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_is_qpl(priv)); + num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv)); + + for (i = 0; i < num_tx_qpls; i++) { + err = gve_unregister_qpl(priv, gve_tx_get_qpl(priv, i)); + /* This failure will trigger a reset - no need to clean */ + if (err) + return err; + } + + for (i = 0; i < num_rx_qpls; i++) { + err = gve_unregister_qpl(priv, gve_rx_get_qpl(priv, i)); + /* This failure will trigger a reset - no need to clean */ + if (err) return err; - } } return 0; } static int gve_create_rings(struct gve_priv *priv) { + int num_tx_queues = gve_num_tx_queues(priv); int err; int i; - err = gve_adminq_create_tx_queues(priv, priv->tx_cfg.num_queues); + err = gve_adminq_create_tx_queues(priv, 0, num_tx_queues); if (err) { netif_err(priv, drv, priv->dev, "failed to create %d tx queues\n", - priv->tx_cfg.num_queues); + num_tx_queues); /* This failure will trigger a reset - no need to clean * up */ return err; } netif_dbg(priv, drv, priv->dev, "created %d tx queues\n", - priv->tx_cfg.num_queues); + num_tx_queues); err = gve_adminq_create_rx_queues(priv, priv->rx_cfg.num_queues); if (err) { @@ -590,94 +928,107 @@ static int gve_create_rings(struct gve_priv *priv) return 0; } -static void add_napi_init_sync_stats(struct gve_priv *priv, - int (*napi_poll)(struct napi_struct *napi, - int budget)) +static void init_xdp_sync_stats(struct gve_priv *priv) { + int start_id = gve_xdp_tx_start_queue_id(priv); int i; - /* Add tx napi & init sync stats*/ - for (i = 0; i < priv->tx_cfg.num_queues; i++) { + /* Init stats */ + for (i = start_id; i < start_id + priv->tx_cfg.num_xdp_queues; i++) { int ntfy_idx = gve_tx_idx_to_ntfy(priv, i); u64_stats_init(&priv->tx[i].statss); priv->tx[i].ntfy_id = ntfy_idx; - gve_add_napi(priv, ntfy_idx, napi_poll); } - /* Add rx napi & init sync stats*/ - for (i = 0; i < priv->rx_cfg.num_queues; i++) { - int ntfy_idx = gve_rx_idx_to_ntfy(priv, i); +} + +static void gve_init_sync_stats(struct gve_priv *priv) +{ + int i; + + for (i = 0; i < priv->tx_cfg.num_queues; i++) + u64_stats_init(&priv->tx[i].statss); + + /* Init stats for XDP TX queues */ + init_xdp_sync_stats(priv); + for (i = 0; i < priv->rx_cfg.num_queues; i++) u64_stats_init(&priv->rx[i].statss); - priv->rx[i].ntfy_id = ntfy_idx; - gve_add_napi(priv, ntfy_idx, napi_poll); - } } -static void gve_tx_free_rings(struct gve_priv *priv) +static void gve_tx_get_curr_alloc_cfg(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *cfg) { - if (gve_is_gqi(priv)) { - gve_tx_free_rings_gqi(priv); - } else { - gve_tx_free_rings_dqo(priv); - } + cfg->qcfg = &priv->tx_cfg; + cfg->raw_addressing = !gve_is_qpl(priv); + cfg->ring_size = priv->tx_desc_cnt; + cfg->num_xdp_rings = cfg->qcfg->num_xdp_queues; + cfg->tx = priv->tx; } -static int gve_alloc_rings(struct gve_priv *priv) +static void gve_tx_stop_rings(struct gve_priv *priv, int num_rings) { - int err; + int i; - /* Setup tx rings */ - priv->tx = kvzalloc(priv->tx_cfg.num_queues * sizeof(*priv->tx), - GFP_KERNEL); if (!priv->tx) - return -ENOMEM; + return; - if (gve_is_gqi(priv)) - err = gve_tx_alloc_rings(priv); - else - err = gve_tx_alloc_rings_dqo(priv); - if (err) - goto free_tx; + for (i = 0; i < num_rings; i++) { + if (gve_is_gqi(priv)) + gve_tx_stop_ring_gqi(priv, i); + else + gve_tx_stop_ring_dqo(priv, i); + } +} - /* Setup rx rings */ - priv->rx = kvzalloc(priv->rx_cfg.num_queues * sizeof(*priv->rx), - GFP_KERNEL); - if (!priv->rx) { - err = -ENOMEM; - goto free_tx_queue; +static void gve_tx_start_rings(struct gve_priv *priv, int num_rings) +{ + int i; + + for (i = 0; i < num_rings; i++) { + if (gve_is_gqi(priv)) + gve_tx_start_ring_gqi(priv, i); + else + gve_tx_start_ring_dqo(priv, i); } +} + +static int gve_queues_mem_alloc(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, + struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) +{ + int err; if (gve_is_gqi(priv)) - err = gve_rx_alloc_rings(priv); + err = gve_tx_alloc_rings_gqi(priv, tx_alloc_cfg); else - err = gve_rx_alloc_rings_dqo(priv); + err = gve_tx_alloc_rings_dqo(priv, tx_alloc_cfg); if (err) - goto free_rx; + return err; if (gve_is_gqi(priv)) - add_napi_init_sync_stats(priv, gve_napi_poll); + err = gve_rx_alloc_rings_gqi(priv, rx_alloc_cfg); else - add_napi_init_sync_stats(priv, gve_napi_poll_dqo); + err = gve_rx_alloc_rings_dqo(priv, rx_alloc_cfg); + if (err) + goto free_tx; return 0; -free_rx: - kvfree(priv->rx); - priv->rx = NULL; -free_tx_queue: - gve_tx_free_rings(priv); free_tx: - kvfree(priv->tx); - priv->tx = NULL; + if (gve_is_gqi(priv)) + gve_tx_free_rings_gqi(priv, tx_alloc_cfg); + else + gve_tx_free_rings_dqo(priv, tx_alloc_cfg); return err; } static int gve_destroy_rings(struct gve_priv *priv) { + int num_tx_queues = gve_num_tx_queues(priv); int err; - err = gve_adminq_destroy_tx_queues(priv, priv->tx_cfg.num_queues); + err = gve_adminq_destroy_tx_queues(priv, 0, num_tx_queues); if (err) { netif_err(priv, drv, priv->dev, "failed to destroy tx queues\n"); @@ -696,44 +1047,24 @@ static int gve_destroy_rings(struct gve_priv *priv) return 0; } -static void gve_rx_free_rings(struct gve_priv *priv) -{ - if (gve_is_gqi(priv)) - gve_rx_free_rings_gqi(priv); - else - gve_rx_free_rings_dqo(priv); -} - -static void gve_free_rings(struct gve_priv *priv) +static void gve_queues_mem_free(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *tx_cfg, + struct gve_rx_alloc_rings_cfg *rx_cfg) { - int ntfy_idx; - int i; - - if (priv->tx) { - for (i = 0; i < priv->tx_cfg.num_queues; i++) { - ntfy_idx = gve_tx_idx_to_ntfy(priv, i); - gve_remove_napi(priv, ntfy_idx); - } - gve_tx_free_rings(priv); - kvfree(priv->tx); - priv->tx = NULL; - } - if (priv->rx) { - for (i = 0; i < priv->rx_cfg.num_queues; i++) { - ntfy_idx = gve_rx_idx_to_ntfy(priv, i); - gve_remove_napi(priv, ntfy_idx); - } - gve_rx_free_rings(priv); - kvfree(priv->rx); - priv->rx = NULL; + if (gve_is_gqi(priv)) { + gve_tx_free_rings_gqi(priv, tx_cfg); + gve_rx_free_rings_gqi(priv, rx_cfg); + } else { + gve_tx_free_rings_dqo(priv, tx_cfg); + gve_rx_free_rings_dqo(priv, rx_cfg); } } int gve_alloc_page(struct gve_priv *priv, struct device *dev, struct page **page, dma_addr_t *dma, - enum dma_data_direction dir) + enum dma_data_direction dir, gfp_t gfp_flags) { - *page = alloc_page(GFP_KERNEL); + *page = alloc_pages_node(priv->numa_node, gfp_flags, 0); if (!*page) { priv->page_alloc_fail++; return -ENOMEM; @@ -747,45 +1078,41 @@ int gve_alloc_page(struct gve_priv *priv, struct device *dev, return 0; } -static int gve_alloc_queue_page_list(struct gve_priv *priv, u32 id, - int pages) +struct gve_queue_page_list *gve_alloc_queue_page_list(struct gve_priv *priv, + u32 id, int pages) { - struct gve_queue_page_list *qpl = &priv->qpls[id]; + struct gve_queue_page_list *qpl; int err; int i; - if (pages + priv->num_registered_pages > priv->max_registered_pages) { - netif_err(priv, drv, priv->dev, - "Reached max number of registered pages %llu > %llu\n", - pages + priv->num_registered_pages, - priv->max_registered_pages); - return -EINVAL; - } + qpl = kvzalloc(sizeof(*qpl), GFP_KERNEL); + if (!qpl) + return NULL; qpl->id = id; qpl->num_entries = 0; - qpl->pages = kvzalloc(pages * sizeof(*qpl->pages), GFP_KERNEL); - /* caller handles clean up */ + qpl->pages = kvcalloc(pages, sizeof(*qpl->pages), GFP_KERNEL); if (!qpl->pages) - return -ENOMEM; - qpl->page_buses = kvzalloc(pages * sizeof(*qpl->page_buses), - GFP_KERNEL); - /* caller handles clean up */ + goto abort; + + qpl->page_buses = kvcalloc(pages, sizeof(*qpl->page_buses), GFP_KERNEL); if (!qpl->page_buses) - return -ENOMEM; + goto abort; for (i = 0; i < pages; i++) { err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i], &qpl->page_buses[i], - gve_qpl_dma_dir(priv, id)); - /* caller handles clean up */ + gve_qpl_dma_dir(priv, id), GFP_KERNEL); if (err) - return -ENOMEM; + goto abort; qpl->num_entries++; } - priv->num_registered_pages += pages; - return 0; + return qpl; + +abort: + gve_free_queue_page_list(priv, qpl, id); + return NULL; } void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma, @@ -797,14 +1124,16 @@ void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma, put_page(page); } -static void gve_free_queue_page_list(struct gve_priv *priv, - int id) +void gve_free_queue_page_list(struct gve_priv *priv, + struct gve_queue_page_list *qpl, + u32 id) { - struct gve_queue_page_list *qpl = &priv->qpls[id]; int i; - if (!qpl->pages) + if (!qpl) return; + if (!qpl->pages) + goto free_qpl; if (!qpl->page_buses) goto free_pages; @@ -813,118 +1142,261 @@ static void gve_free_queue_page_list(struct gve_priv *priv, qpl->page_buses[i], gve_qpl_dma_dir(priv, id)); kvfree(qpl->page_buses); + qpl->page_buses = NULL; free_pages: kvfree(qpl->pages); - priv->num_registered_pages -= qpl->num_entries; + qpl->pages = NULL; +free_qpl: + kvfree(qpl); } -static int gve_alloc_qpls(struct gve_priv *priv) +/* Use this to schedule a reset when the device is capable of continuing + * to handle other requests in its current state. If it is not, do a reset + * in thread instead. + */ +void gve_schedule_reset(struct gve_priv *priv) { - int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv); - int i, j; + gve_set_do_reset(priv); + queue_work(priv->gve_wq, &priv->service_task); +} + +static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up); +static int gve_reset_recovery(struct gve_priv *priv, bool was_up); +static void gve_turndown(struct gve_priv *priv); +static void gve_turnup(struct gve_priv *priv); + +static void gve_unreg_xsk_pool(struct gve_priv *priv, u16 qid) +{ + struct gve_rx_ring *rx; + + if (!priv->rx) + return; + + rx = &priv->rx[qid]; + rx->xsk_pool = NULL; + if (xdp_rxq_info_is_reg(&rx->xdp_rxq)) + xdp_rxq_info_unreg_mem_model(&rx->xdp_rxq); + + if (!priv->tx) + return; + priv->tx[gve_xdp_tx_queue_id(priv, qid)].xsk_pool = NULL; +} + +static int gve_reg_xsk_pool(struct gve_priv *priv, struct net_device *dev, + struct xsk_buff_pool *pool, u16 qid) +{ + struct gve_rx_ring *rx; + u16 tx_qid; int err; - /* Raw addressing means no QPLs */ - if (priv->queue_format == GVE_GQI_RDA_FORMAT) - return 0; + rx = &priv->rx[qid]; + err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq, + MEM_TYPE_XSK_BUFF_POOL, pool); + if (err) { + gve_unreg_xsk_pool(priv, qid); + return err; + } - priv->qpls = kvzalloc(num_qpls * sizeof(*priv->qpls), GFP_KERNEL); - if (!priv->qpls) - return -ENOMEM; + rx->xsk_pool = pool; - for (i = 0; i < gve_num_tx_qpls(priv); i++) { - err = gve_alloc_queue_page_list(priv, i, - priv->tx_pages_per_qpl); - if (err) - goto free_qpls; + tx_qid = gve_xdp_tx_queue_id(priv, qid); + priv->tx[tx_qid].xsk_pool = pool; + + return 0; +} + +static void gve_unreg_xdp_info(struct gve_priv *priv) +{ + int i; + + if (!priv->tx_cfg.num_xdp_queues || !priv->rx) + return; + + for (i = 0; i < priv->rx_cfg.num_queues; i++) { + struct gve_rx_ring *rx = &priv->rx[i]; + + if (xdp_rxq_info_is_reg(&rx->xdp_rxq)) + xdp_rxq_info_unreg(&rx->xdp_rxq); + + gve_unreg_xsk_pool(priv, i); } - for (; i < num_qpls; i++) { - err = gve_alloc_queue_page_list(priv, i, - priv->rx_data_slot_cnt); +} + +static struct xsk_buff_pool *gve_get_xsk_pool(struct gve_priv *priv, int qid) +{ + if (!test_bit(qid, priv->xsk_pools)) + return NULL; + + return xsk_get_pool_from_qid(priv->dev, qid); +} + +static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev) +{ + struct napi_struct *napi; + struct gve_rx_ring *rx; + int err = 0; + int i; + + if (!priv->tx_cfg.num_xdp_queues) + return 0; + + for (i = 0; i < priv->rx_cfg.num_queues; i++) { + struct xsk_buff_pool *xsk_pool; + + rx = &priv->rx[i]; + napi = &priv->ntfy_blocks[rx->ntfy_id].napi; + + err = xdp_rxq_info_reg(&rx->xdp_rxq, dev, i, + napi->napi_id); if (err) - goto free_qpls; - } + goto err; - priv->qpl_cfg.qpl_map_size = BITS_TO_LONGS(num_qpls) * - sizeof(unsigned long) * BITS_PER_BYTE; - priv->qpl_cfg.qpl_id_map = kvzalloc(BITS_TO_LONGS(num_qpls) * - sizeof(unsigned long), GFP_KERNEL); - if (!priv->qpl_cfg.qpl_id_map) { - err = -ENOMEM; - goto free_qpls; + xsk_pool = gve_get_xsk_pool(priv, i); + if (xsk_pool) + err = gve_reg_xsk_pool(priv, dev, xsk_pool, i); + else if (gve_is_qpl(priv)) + err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq, + MEM_TYPE_PAGE_SHARED, + NULL); + else + err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq, + MEM_TYPE_PAGE_POOL, + rx->dqo.page_pool); + if (err) + goto err; } - return 0; -free_qpls: - for (j = 0; j <= i; j++) - gve_free_queue_page_list(priv, j); - kvfree(priv->qpls); +err: + gve_unreg_xdp_info(priv); return err; } -static void gve_free_qpls(struct gve_priv *priv) + +static void gve_drain_page_cache(struct gve_priv *priv) { - int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv); int i; - /* Raw addressing means no QPLs */ - if (priv->queue_format == GVE_GQI_RDA_FORMAT) - return; + for (i = 0; i < priv->rx_cfg.num_queues; i++) + page_frag_cache_drain(&priv->rx[i].page_cache); +} - kvfree(priv->qpl_cfg.qpl_id_map); +static void gve_rx_get_curr_alloc_cfg(struct gve_priv *priv, + struct gve_rx_alloc_rings_cfg *cfg) +{ + cfg->qcfg_rx = &priv->rx_cfg; + cfg->qcfg_tx = &priv->tx_cfg; + cfg->raw_addressing = !gve_is_qpl(priv); + cfg->enable_header_split = priv->header_split_enabled; + cfg->ring_size = priv->rx_desc_cnt; + cfg->packet_buffer_size = priv->rx_cfg.packet_buffer_size; + cfg->rx = priv->rx; + cfg->xdp = !!cfg->qcfg_tx->num_xdp_queues; +} - for (i = 0; i < num_qpls; i++) - gve_free_queue_page_list(priv, i); +void gve_get_curr_alloc_cfgs(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, + struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) +{ + gve_tx_get_curr_alloc_cfg(priv, tx_alloc_cfg); + gve_rx_get_curr_alloc_cfg(priv, rx_alloc_cfg); +} - kvfree(priv->qpls); +static void gve_rx_start_ring(struct gve_priv *priv, int i) +{ + if (gve_is_gqi(priv)) + gve_rx_start_ring_gqi(priv, i); + else + gve_rx_start_ring_dqo(priv, i); } -/* Use this to schedule a reset when the device is capable of continuing - * to handle other requests in its current state. If it is not, do a reset - * in thread instead. - */ -void gve_schedule_reset(struct gve_priv *priv) +static void gve_rx_start_rings(struct gve_priv *priv, int num_rings) { - gve_set_do_reset(priv); - queue_work(priv->gve_wq, &priv->service_task); + int i; + + for (i = 0; i < num_rings; i++) + gve_rx_start_ring(priv, i); } -static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up); -static int gve_reset_recovery(struct gve_priv *priv, bool was_up); -static void gve_turndown(struct gve_priv *priv); -static void gve_turnup(struct gve_priv *priv); +static void gve_rx_stop_ring(struct gve_priv *priv, int i) +{ + if (gve_is_gqi(priv)) + gve_rx_stop_ring_gqi(priv, i); + else + gve_rx_stop_ring_dqo(priv, i); +} -static int gve_open(struct net_device *dev) +static void gve_rx_stop_rings(struct gve_priv *priv, int num_rings) { - struct gve_priv *priv = netdev_priv(dev); + int i; + + if (!priv->rx) + return; + + for (i = 0; i < num_rings; i++) + gve_rx_stop_ring(priv, i); +} + +static void gve_queues_mem_remove(struct gve_priv *priv) +{ + struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; + struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; + + gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); + gve_queues_mem_free(priv, &tx_alloc_cfg, &rx_alloc_cfg); + priv->tx = NULL; + priv->rx = NULL; +} + +/* The passed-in queue memory is stored into priv and the queues are made live. + * No memory is allocated. Passed-in memory is freed on errors. + */ +static int gve_queues_start(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, + struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) +{ + struct net_device *dev = priv->dev; int err; - err = gve_alloc_qpls(priv); - if (err) - return err; + /* Record new resources into priv */ + priv->tx = tx_alloc_cfg->tx; + priv->rx = rx_alloc_cfg->rx; - err = gve_alloc_rings(priv); - if (err) - goto free_qpls; + /* Record new configs into priv */ + priv->tx_cfg = *tx_alloc_cfg->qcfg; + priv->tx_cfg.num_xdp_queues = tx_alloc_cfg->num_xdp_rings; + priv->rx_cfg = *rx_alloc_cfg->qcfg_rx; + priv->tx_desc_cnt = tx_alloc_cfg->ring_size; + priv->rx_desc_cnt = rx_alloc_cfg->ring_size; + + gve_tx_start_rings(priv, gve_num_tx_queues(priv)); + gve_rx_start_rings(priv, rx_alloc_cfg->qcfg_rx->num_queues); + gve_init_sync_stats(priv); err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues); if (err) - goto free_rings; + goto stop_and_free_rings; err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues); if (err) - goto free_rings; + goto stop_and_free_rings; + + err = gve_reg_xdp_info(priv, dev); + if (err) + goto stop_and_free_rings; + + if (rx_alloc_cfg->reset_rss) { + err = gve_init_rss_config(priv, priv->rx_cfg.num_queues); + if (err) + goto reset; + } err = gve_register_qpls(priv); if (err) goto reset; - if (!gve_is_gqi(priv)) { - /* Hard code this for now. This may be tuned in the future for - * performance. - */ - priv->data_buffer_size_dqo = GVE_RX_BUFFER_SIZE_DQO; - } + priv->header_split_enabled = rx_alloc_cfg->enable_header_split; + priv->rx_cfg.packet_buffer_size = rx_alloc_cfg->packet_buffer_size; + err = gve_create_rings(priv); if (err) goto reset; @@ -941,34 +1413,52 @@ static int gve_open(struct net_device *dev) priv->interface_up_cnt++; return 0; -free_rings: - gve_free_rings(priv); -free_qpls: - gve_free_qpls(priv); - return err; - reset: - /* This must have been called from a reset due to the rtnl lock - * so just return at this point. - */ if (gve_get_reset_in_progress(priv)) - return err; - /* Otherwise reset before returning */ + goto stop_and_free_rings; gve_reset_and_teardown(priv, true); /* if this fails there is nothing we can do so just ignore the return */ gve_reset_recovery(priv, false); /* return the original error */ return err; +stop_and_free_rings: + gve_tx_stop_rings(priv, gve_num_tx_queues(priv)); + gve_rx_stop_rings(priv, priv->rx_cfg.num_queues); + gve_queues_mem_remove(priv); + return err; } -static int gve_close(struct net_device *dev) +static int gve_open(struct net_device *dev) { + struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; + struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; struct gve_priv *priv = netdev_priv(dev); int err; - netif_carrier_off(dev); + gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); + + err = gve_queues_mem_alloc(priv, &tx_alloc_cfg, &rx_alloc_cfg); + if (err) + return err; + + /* No need to free on error: ownership of resources is lost after + * calling gve_queues_start. + */ + err = gve_queues_start(priv, &tx_alloc_cfg, &rx_alloc_cfg); + if (err) + return err; + + return 0; +} + +static int gve_queues_stop(struct gve_priv *priv) +{ + int err; + + netif_carrier_off(priv->dev); if (gve_get_device_rings_ok(priv)) { gve_turndown(priv); + gve_drain_page_cache(priv); err = gve_destroy_rings(priv); if (err) goto err; @@ -977,10 +1467,13 @@ static int gve_close(struct net_device *dev) goto err; gve_clear_device_rings_ok(priv); } - del_timer_sync(&priv->stats_report_timer); + timer_delete_sync(&priv->stats_report_timer); + + gve_unreg_xdp_info(priv); + + gve_tx_stop_rings(priv, gve_num_tx_queues(priv)); + gve_rx_stop_rings(priv, priv->rx_cfg.num_queues); - gve_free_rings(priv); - gve_free_qpls(priv); priv->interface_down_cnt++; return 0; @@ -995,42 +1488,392 @@ err: return gve_reset_recovery(priv, false); } -int gve_adjust_queues(struct gve_priv *priv, - struct gve_queue_config new_rx_config, - struct gve_queue_config new_tx_config) +static int gve_close(struct net_device *dev) { + struct gve_priv *priv = netdev_priv(dev); int err; - if (netif_carrier_ok(priv->dev)) { - /* To make this process as simple as possible we teardown the - * device, set the new configuration, and then bring the device - * up again. - */ - err = gve_close(priv->dev); - /* we have already tried to reset in close, - * just fail at this point - */ + err = gve_queues_stop(priv); + if (err) + return err; + + gve_queues_mem_remove(priv); + return 0; +} + +static void gve_handle_link_status(struct gve_priv *priv, bool link_status) +{ + if (!gve_get_napi_enabled(priv)) + return; + + if (link_status == netif_carrier_ok(priv->dev)) + return; + + if (link_status) { + netdev_info(priv->dev, "Device link is up.\n"); + netif_carrier_on(priv->dev); + } else { + netdev_info(priv->dev, "Device link is down.\n"); + netif_carrier_off(priv->dev); + } +} + +static int gve_configure_rings_xdp(struct gve_priv *priv, + u16 num_xdp_rings) +{ + struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; + struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; + + gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); + tx_alloc_cfg.num_xdp_rings = num_xdp_rings; + + rx_alloc_cfg.xdp = !!num_xdp_rings; + return gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); +} + +static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog, + struct netlink_ext_ack *extack) +{ + struct bpf_prog *old_prog; + int err = 0; + u32 status; + + old_prog = READ_ONCE(priv->xdp_prog); + if (!netif_running(priv->dev)) { + WRITE_ONCE(priv->xdp_prog, prog); + if (old_prog) + bpf_prog_put(old_prog); + + /* Update priv XDP queue configuration */ + priv->tx_cfg.num_xdp_queues = priv->xdp_prog ? + priv->rx_cfg.num_queues : 0; + return 0; + } + + if (!old_prog && prog) + err = gve_configure_rings_xdp(priv, priv->rx_cfg.num_queues); + else if (old_prog && !prog) + err = gve_configure_rings_xdp(priv, 0); + + if (err) + goto out; + + WRITE_ONCE(priv->xdp_prog, prog); + if (old_prog) + bpf_prog_put(old_prog); + +out: + status = ioread32be(&priv->reg_bar0->device_status); + gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); + return err; +} + +static int gve_xdp_xmit(struct net_device *dev, int n, + struct xdp_frame **frames, u32 flags) +{ + struct gve_priv *priv = netdev_priv(dev); + + if (priv->queue_format == GVE_GQI_QPL_FORMAT) + return gve_xdp_xmit_gqi(dev, n, frames, flags); + else if (priv->queue_format == GVE_DQO_RDA_FORMAT) + return gve_xdp_xmit_dqo(dev, n, frames, flags); + + return -EOPNOTSUPP; +} + +static int gve_xsk_pool_enable(struct net_device *dev, + struct xsk_buff_pool *pool, + u16 qid) +{ + struct gve_priv *priv = netdev_priv(dev); + int err; + + if (qid >= priv->rx_cfg.num_queues) { + dev_err(&priv->pdev->dev, "xsk pool invalid qid %d", qid); + return -EINVAL; + } + if (xsk_pool_get_rx_frame_size(pool) < + priv->dev->max_mtu + sizeof(struct ethhdr)) { + dev_err(&priv->pdev->dev, "xsk pool frame_len too small"); + return -EINVAL; + } + + err = xsk_pool_dma_map(pool, &priv->pdev->dev, + DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); + if (err) + return err; + + set_bit(qid, priv->xsk_pools); + + /* If XDP prog is not installed or interface is down, return. */ + if (!priv->xdp_prog || !netif_running(dev)) + return 0; + + err = gve_reg_xsk_pool(priv, dev, pool, qid); + if (err) + goto err_xsk_pool_dma_mapped; + + /* Stop and start RDA queues to repost buffers. */ + if (!gve_is_qpl(priv)) { + err = gve_configure_rings_xdp(priv, priv->rx_cfg.num_queues); if (err) - return err; - priv->tx_cfg = new_tx_config; - priv->rx_cfg = new_rx_config; + goto err_xsk_pool_registered; + } + return 0; - err = gve_open(priv->dev); +err_xsk_pool_registered: + gve_unreg_xsk_pool(priv, qid); +err_xsk_pool_dma_mapped: + clear_bit(qid, priv->xsk_pools); + xsk_pool_dma_unmap(pool, + DMA_ATTR_SKIP_CPU_SYNC | + DMA_ATTR_WEAK_ORDERING); + return err; +} + +static int gve_xsk_pool_disable(struct net_device *dev, + u16 qid) +{ + struct gve_priv *priv = netdev_priv(dev); + struct napi_struct *napi_rx; + struct napi_struct *napi_tx; + struct xsk_buff_pool *pool; + int tx_qid; + int err; + + if (qid >= priv->rx_cfg.num_queues) + return -EINVAL; + + clear_bit(qid, priv->xsk_pools); + + pool = xsk_get_pool_from_qid(dev, qid); + if (pool) + xsk_pool_dma_unmap(pool, + DMA_ATTR_SKIP_CPU_SYNC | + DMA_ATTR_WEAK_ORDERING); + + if (!netif_running(dev) || !priv->tx_cfg.num_xdp_queues) + return 0; + + /* Stop and start RDA queues to repost buffers. */ + if (!gve_is_qpl(priv) && priv->xdp_prog) { + err = gve_configure_rings_xdp(priv, priv->rx_cfg.num_queues); if (err) - goto err; + return err; + } + + napi_rx = &priv->ntfy_blocks[priv->rx[qid].ntfy_id].napi; + napi_disable(napi_rx); /* make sure current rx poll is done */ + + tx_qid = gve_xdp_tx_queue_id(priv, qid); + napi_tx = &priv->ntfy_blocks[priv->tx[tx_qid].ntfy_id].napi; + napi_disable(napi_tx); /* make sure current tx poll is done */ + + gve_unreg_xsk_pool(priv, qid); + smp_mb(); /* Make sure it is visible to the workers on datapath */ + + napi_enable(napi_rx); + napi_enable(napi_tx); + if (gve_is_gqi(priv)) { + if (gve_rx_work_pending(&priv->rx[qid])) + napi_schedule(napi_rx); + + if (gve_tx_clean_pending(priv, &priv->tx[tx_qid])) + napi_schedule(napi_tx); + } + + return 0; +} + +static int gve_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) +{ + struct gve_priv *priv = netdev_priv(dev); + struct napi_struct *napi; + + if (!gve_get_napi_enabled(priv)) + return -ENETDOWN; + + if (queue_id >= priv->rx_cfg.num_queues || !priv->xdp_prog) + return -EINVAL; + + napi = &priv->ntfy_blocks[gve_rx_idx_to_ntfy(priv, queue_id)].napi; + if (!napi_if_scheduled_mark_missed(napi)) { + /* Call local_bh_enable to trigger SoftIRQ processing */ + local_bh_disable(); + napi_schedule(napi); + local_bh_enable(); + } + + return 0; +} + +static int gve_verify_xdp_configuration(struct net_device *dev, + struct netlink_ext_ack *extack) +{ + struct gve_priv *priv = netdev_priv(dev); + u16 max_xdp_mtu; + + if (dev->features & NETIF_F_LRO) { + NL_SET_ERR_MSG_MOD(extack, + "XDP is not supported when LRO is on."); + return -EOPNOTSUPP; + } + if (priv->header_split_enabled) { + NL_SET_ERR_MSG_MOD(extack, + "XDP is not supported when header-data split is enabled."); + return -EOPNOTSUPP; + } + + if (priv->rx_cfg.packet_buffer_size != SZ_2K) { + NL_SET_ERR_MSG_FMT_MOD(extack, + "XDP is not supported for Rx buf len %d, only %d supported.", + priv->rx_cfg.packet_buffer_size, SZ_2K); + return -EOPNOTSUPP; + } + + max_xdp_mtu = priv->rx_cfg.packet_buffer_size - sizeof(struct ethhdr); + if (priv->queue_format == GVE_GQI_QPL_FORMAT) + max_xdp_mtu -= GVE_RX_PAD; + + if (dev->mtu > max_xdp_mtu) { + NL_SET_ERR_MSG_FMT_MOD(extack, + "XDP is not supported for mtu %d.", + dev->mtu); + return -EOPNOTSUPP; + } + + if (priv->rx_cfg.num_queues != priv->tx_cfg.num_queues || + (2 * priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)) { + netdev_warn(dev, + "XDP load failed: The number of configured RX queues %d should be equal to the number of configured TX queues %d and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues %d.", + priv->rx_cfg.num_queues, priv->tx_cfg.num_queues, + priv->tx_cfg.max_queues); + NL_SET_ERR_MSG_MOD(extack, + "XDP load failed: The number of configured RX queues should be equal to the number of configured TX queues and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues"); + return -EINVAL; + } + return 0; +} + +static int gve_xdp(struct net_device *dev, struct netdev_bpf *xdp) +{ + struct gve_priv *priv = netdev_priv(dev); + int err; + + err = gve_verify_xdp_configuration(dev, xdp->extack); + if (err) + return err; + switch (xdp->command) { + case XDP_SETUP_PROG: + return gve_set_xdp(priv, xdp->prog, xdp->extack); + case XDP_SETUP_XSK_POOL: + if (xdp->xsk.pool) + return gve_xsk_pool_enable(dev, xdp->xsk.pool, xdp->xsk.queue_id); + else + return gve_xsk_pool_disable(dev, xdp->xsk.queue_id); + default: + return -EINVAL; + } +} + +int gve_init_rss_config(struct gve_priv *priv, u16 num_queues) +{ + struct gve_rss_config *rss_config = &priv->rss_config; + struct ethtool_rxfh_param rxfh = {0}; + u16 i; + + if (!priv->cache_rss_config) return 0; + + for (i = 0; i < priv->rss_lut_size; i++) + rss_config->hash_lut[i] = + ethtool_rxfh_indir_default(i, num_queues); + + netdev_rss_key_fill(rss_config->hash_key, priv->rss_key_size); + + rxfh.hfunc = ETH_RSS_HASH_TOP; + + return gve_adminq_configure_rss(priv, &rxfh); +} + +int gve_flow_rules_reset(struct gve_priv *priv) +{ + if (!priv->max_flow_rules) + return 0; + + return gve_adminq_reset_flow_rules(priv); +} + +int gve_adjust_config(struct gve_priv *priv, + struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, + struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) +{ + int err; + + /* Allocate resources for the new configuration */ + err = gve_queues_mem_alloc(priv, tx_alloc_cfg, rx_alloc_cfg); + if (err) { + netif_err(priv, drv, priv->dev, + "Adjust config failed to alloc new queues"); + return err; + } + + /* Teardown the device and free existing resources */ + err = gve_close(priv->dev); + if (err) { + netif_err(priv, drv, priv->dev, + "Adjust config failed to close old queues"); + gve_queues_mem_free(priv, tx_alloc_cfg, rx_alloc_cfg); + return err; + } + + /* Bring the device back up again with the new resources. */ + err = gve_queues_start(priv, tx_alloc_cfg, rx_alloc_cfg); + if (err) { + netif_err(priv, drv, priv->dev, + "Adjust config failed to start new queues, !!! DISABLING ALL QUEUES !!!\n"); + /* No need to free on error: ownership of resources is lost after + * calling gve_queues_start. + */ + gve_turndown(priv); + return err; + } + + return 0; +} + +int gve_adjust_queues(struct gve_priv *priv, + struct gve_rx_queue_config new_rx_config, + struct gve_tx_queue_config new_tx_config, + bool reset_rss) +{ + struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; + struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; + int err; + + gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); + + /* Relay the new config from ethtool */ + tx_alloc_cfg.qcfg = &new_tx_config; + rx_alloc_cfg.qcfg_tx = &new_tx_config; + rx_alloc_cfg.qcfg_rx = &new_rx_config; + rx_alloc_cfg.reset_rss = reset_rss; + + if (netif_running(priv->dev)) { + err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); + return err; } /* Set the config for the next up. */ + if (reset_rss) { + err = gve_init_rss_config(priv, new_rx_config.num_queues); + if (err) + return err; + } priv->tx_cfg = new_tx_config; priv->rx_cfg = new_rx_config; return 0; -err: - netif_err(priv, drv, priv->dev, - "Adjust queues failed! !!! DISABLING ALL QUEUES !!!\n"); - gve_turndown(priv); - return err; } static void gve_turndown(struct gve_priv *priv) @@ -1044,24 +1887,41 @@ static void gve_turndown(struct gve_priv *priv) return; /* Disable napi to prevent more work from coming in */ - for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) { + for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; - napi_disable(&block->napi); + if (!gve_tx_was_added_to_block(priv, idx)) + continue; + + if (idx < priv->tx_cfg.num_queues) + netif_queue_set_napi(priv->dev, idx, + NETDEV_QUEUE_TYPE_TX, NULL); + + napi_disable_locked(&block->napi); } for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; - napi_disable(&block->napi); + if (!gve_rx_was_added_to_block(priv, idx)) + continue; + + netif_queue_set_napi(priv->dev, idx, NETDEV_QUEUE_TYPE_RX, + NULL); + napi_disable_locked(&block->napi); } /* Stop tx queues */ netif_tx_disable(priv->dev); + xdp_features_clear_redirect_target_locked(priv->dev); + gve_clear_napi_enabled(priv); gve_clear_report_stats(priv); + + /* Make sure that all traffic is finished processing. */ + synchronize_net(); } static void gve_turnup(struct gve_priv *priv) @@ -1072,87 +1932,282 @@ static void gve_turnup(struct gve_priv *priv) netif_tx_start_all_queues(priv->dev); /* Enable napi and unmask interrupts for all queues */ - for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) { + for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; - napi_enable(&block->napi); + if (!gve_tx_was_added_to_block(priv, idx)) + continue; + + napi_enable_locked(&block->napi); + + if (idx < priv->tx_cfg.num_queues) + netif_queue_set_napi(priv->dev, idx, + NETDEV_QUEUE_TYPE_TX, + &block->napi); + if (gve_is_gqi(priv)) { iowrite32be(0, gve_irq_doorbell(priv, block)); } else { - u32 val = gve_set_itr_ratelimit_dqo(GVE_TX_IRQ_RATELIMIT_US_DQO); - - gve_write_irq_doorbell_dqo(priv, block, val); + gve_set_itr_coalesce_usecs_dqo(priv, block, + priv->tx_coalesce_usecs); } + + /* Any descs written by the NIC before this barrier will be + * handled by the one-off napi schedule below. Whereas any + * descs after the barrier will generate interrupts. + */ + mb(); + napi_schedule(&block->napi); } for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; - napi_enable(&block->napi); + if (!gve_rx_was_added_to_block(priv, idx)) + continue; + + napi_enable_locked(&block->napi); + netif_queue_set_napi(priv->dev, idx, NETDEV_QUEUE_TYPE_RX, + &block->napi); + if (gve_is_gqi(priv)) { iowrite32be(0, gve_irq_doorbell(priv, block)); } else { - u32 val = gve_set_itr_ratelimit_dqo(GVE_RX_IRQ_RATELIMIT_US_DQO); - - gve_write_irq_doorbell_dqo(priv, block, val); + gve_set_itr_coalesce_usecs_dqo(priv, block, + priv->rx_coalesce_usecs); } + + /* Any descs written by the NIC before this barrier will be + * handled by the one-off napi schedule below. Whereas any + * descs after the barrier will generate interrupts. + */ + mb(); + napi_schedule(&block->napi); } + if (priv->tx_cfg.num_xdp_queues && gve_supports_xdp_xmit(priv)) + xdp_features_set_redirect_target_locked(priv->dev, false); + gve_set_napi_enabled(priv); } +static void gve_turnup_and_check_status(struct gve_priv *priv) +{ + u32 status; + + gve_turnup(priv); + status = ioread32be(&priv->reg_bar0->device_status); + gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); +} + +static struct gve_notify_block *gve_get_tx_notify_block(struct gve_priv *priv, + unsigned int txqueue) +{ + u32 ntfy_idx; + + if (txqueue > priv->tx_cfg.num_queues) + return NULL; + + ntfy_idx = gve_tx_idx_to_ntfy(priv, txqueue); + if (ntfy_idx >= priv->num_ntfy_blks) + return NULL; + + return &priv->ntfy_blocks[ntfy_idx]; +} + +static bool gve_tx_timeout_try_q_kick(struct gve_priv *priv, + unsigned int txqueue) +{ + struct gve_notify_block *block; + u32 current_time; + + block = gve_get_tx_notify_block(priv, txqueue); + + if (!block) + return false; + + current_time = jiffies_to_msecs(jiffies); + if (block->tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time) + return false; + + netdev_info(priv->dev, "Kicking queue %d", txqueue); + napi_schedule(&block->napi); + block->tx->last_kick_msec = current_time; + return true; +} + static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue) { - struct gve_priv *priv = netdev_priv(dev); + struct gve_notify_block *block; + struct gve_priv *priv; + + netdev_info(dev, "Timeout on tx queue, %d", txqueue); + priv = netdev_priv(dev); + + if (!gve_tx_timeout_try_q_kick(priv, txqueue)) + gve_schedule_reset(priv); - gve_schedule_reset(priv); + block = gve_get_tx_notify_block(priv, txqueue); + if (block) + block->tx->queue_timeout++; priv->tx_timeo_cnt++; } +/* Header split is only supported on DQ RDA queue format. If XDP is enabled, + * header split is not allowed. + */ +bool gve_header_split_supported(const struct gve_priv *priv) +{ + return priv->header_buf_size && + priv->queue_format == GVE_DQO_RDA_FORMAT && !priv->xdp_prog; +} + +int gve_set_rx_buf_len_config(struct gve_priv *priv, u32 rx_buf_len, + struct netlink_ext_ack *extack, + struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) +{ + u32 old_rx_buf_len = rx_alloc_cfg->packet_buffer_size; + + if (rx_buf_len == old_rx_buf_len) + return 0; + + /* device options may not always contain support for 4K buffers */ + if (!gve_is_dqo(priv) || priv->max_rx_buffer_size < SZ_4K) { + NL_SET_ERR_MSG_MOD(extack, + "Modifying Rx buf len is not supported"); + return -EOPNOTSUPP; + } + + if (priv->xdp_prog && rx_buf_len != SZ_2K) { + NL_SET_ERR_MSG_MOD(extack, + "Rx buf len can only be 2048 when XDP is on"); + return -EINVAL; + } + + if (rx_buf_len != SZ_2K && rx_buf_len != SZ_4K) { + NL_SET_ERR_MSG_MOD(extack, + "Rx buf len can only be 2048 or 4096"); + return -EINVAL; + } + rx_alloc_cfg->packet_buffer_size = rx_buf_len; + + return 0; +} + +int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split, + struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) +{ + bool enable_hdr_split; + + if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_UNKNOWN) + return 0; + + if (!gve_header_split_supported(priv)) { + dev_err(&priv->pdev->dev, "Header-split not supported\n"); + return -EOPNOTSUPP; + } + + if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_ENABLED) + enable_hdr_split = true; + else + enable_hdr_split = false; + + if (enable_hdr_split == priv->header_split_enabled) + return 0; + + rx_alloc_cfg->enable_header_split = enable_hdr_split; + + return 0; +} + static int gve_set_features(struct net_device *netdev, netdev_features_t features) { const netdev_features_t orig_features = netdev->features; + struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; + struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; struct gve_priv *priv = netdev_priv(netdev); int err; + gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); + if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) { netdev->features ^= NETIF_F_LRO; - if (netif_carrier_ok(netdev)) { - /* To make this process as simple as possible we - * teardown the device, set the new configuration, - * and then bring the device up again. - */ - err = gve_close(netdev); - /* We have already tried to reset in close, just fail - * at this point. - */ - if (err) - goto err; - - err = gve_open(netdev); + if (priv->xdp_prog && (netdev->features & NETIF_F_LRO)) { + netdev_warn(netdev, + "XDP is not supported when LRO is on.\n"); + err = -EOPNOTSUPP; + goto revert_features; + } + if (netif_running(netdev)) { + err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); if (err) - goto err; + goto revert_features; } } + if ((netdev->features & NETIF_F_NTUPLE) && !(features & NETIF_F_NTUPLE)) { + err = gve_flow_rules_reset(priv); + if (err) + goto revert_features; + } return 0; -err: - /* Reverts the change on error. */ + +revert_features: netdev->features = orig_features; - netif_err(priv, drv, netdev, - "Set features failed! !!! DISABLING ALL QUEUES !!!\n"); return err; } +static int gve_get_ts_config(struct net_device *dev, + struct kernel_hwtstamp_config *kernel_config) +{ + struct gve_priv *priv = netdev_priv(dev); + + *kernel_config = priv->ts_config; + return 0; +} + +static int gve_set_ts_config(struct net_device *dev, + struct kernel_hwtstamp_config *kernel_config, + struct netlink_ext_ack *extack) +{ + struct gve_priv *priv = netdev_priv(dev); + + if (kernel_config->tx_type != HWTSTAMP_TX_OFF) { + NL_SET_ERR_MSG_MOD(extack, "TX timestamping is not supported"); + return -ERANGE; + } + + if (kernel_config->rx_filter != HWTSTAMP_FILTER_NONE) { + if (!priv->nic_ts_report) { + NL_SET_ERR_MSG_MOD(extack, + "RX timestamping is not supported"); + kernel_config->rx_filter = HWTSTAMP_FILTER_NONE; + return -EOPNOTSUPP; + } + + kernel_config->rx_filter = HWTSTAMP_FILTER_ALL; + } + + priv->ts_config.rx_filter = kernel_config->rx_filter; + + return 0; +} + static const struct net_device_ops gve_netdev_ops = { .ndo_start_xmit = gve_start_xmit, + .ndo_features_check = gve_features_check, .ndo_open = gve_open, .ndo_stop = gve_close, .ndo_get_stats64 = gve_get_stats, .ndo_tx_timeout = gve_tx_timeout, .ndo_set_features = gve_set_features, + .ndo_bpf = gve_xdp, + .ndo_xdp_xmit = gve_xdp_xmit, + .ndo_xsk_wakeup = gve_xsk_wakeup, + .ndo_hwtstamp_get = gve_get_ts_config, + .ndo_hwtstamp_set = gve_set_ts_config, }; static void gve_handle_status(struct gve_priv *priv, u32 status) @@ -1178,16 +2233,19 @@ static void gve_handle_reset(struct gve_priv *priv) if (gve_get_do_reset(priv)) { rtnl_lock(); + netdev_lock(priv->dev); gve_reset(priv, false); + netdev_unlock(priv->dev); rtnl_unlock(); } } void gve_handle_report_stats(struct gve_priv *priv) { - int idx, stats_idx = 0, tx_bytes; - unsigned int start = 0; struct stats *stats = priv->stats_report->stats; + int idx, stats_idx = 0; + unsigned int start = 0; + u64 tx_bytes; if (!gve_get_report_stats(priv)) return; @@ -1195,7 +2253,7 @@ void gve_handle_report_stats(struct gve_priv *priv) be64_add_cpu(&priv->stats_report->written_count, 1); /* tx stats */ if (priv->tx) { - for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) { + for (idx = 0; idx < gve_num_tx_queues(priv); idx++) { u32 last_completion = 0; u32 tx_frames = 0; @@ -1234,6 +2292,11 @@ void gve_handle_report_stats(struct gve_priv *priv) .value = cpu_to_be64(last_completion), .queue_id = cpu_to_be32(idx), }; + stats[stats_idx++] = (struct stats) { + .stat_name = cpu_to_be32(TX_TIMEOUT_CNT), + .value = cpu_to_be64(priv->tx[idx].queue_timeout), + .queue_id = cpu_to_be32(idx), + }; } } /* rx stats */ @@ -1246,30 +2309,13 @@ void gve_handle_report_stats(struct gve_priv *priv) }; stats[stats_idx++] = (struct stats) { .stat_name = cpu_to_be32(RX_BUFFERS_POSTED), - .value = cpu_to_be64(priv->rx[0].fill_cnt), + .value = cpu_to_be64(priv->rx[idx].fill_cnt), .queue_id = cpu_to_be32(idx), }; } } } -static void gve_handle_link_status(struct gve_priv *priv, bool link_status) -{ - if (!gve_get_napi_enabled(priv)) - return; - - if (link_status == netif_carrier_ok(priv->dev)) - return; - - if (link_status) { - netdev_info(priv->dev, "Device link is up.\n"); - netif_carrier_on(priv->dev); - } else { - netdev_info(priv->dev, "Device link is down.\n"); - netif_carrier_off(priv->dev); - } -} - /* Handle NIC status register changes, reset requests and report stats */ static void gve_service_task(struct work_struct *work) { @@ -1283,6 +2329,29 @@ static void gve_service_task(struct work_struct *work) gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); } +static void gve_set_netdev_xdp_features(struct gve_priv *priv) +{ + xdp_features_t xdp_features; + + if (priv->queue_format == GVE_GQI_QPL_FORMAT) { + xdp_features = NETDEV_XDP_ACT_BASIC; + xdp_features |= NETDEV_XDP_ACT_REDIRECT; + xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY; + } else if (priv->queue_format == GVE_DQO_RDA_FORMAT) { + xdp_features = NETDEV_XDP_ACT_BASIC; + xdp_features |= NETDEV_XDP_ACT_REDIRECT; + xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY; + } else { + xdp_features = 0; + } + + xdp_set_features_flag_locked(priv->dev, xdp_features); +} + +static const struct xdp_metadata_ops gve_xdp_metadata_ops = { + .xmo_rx_timestamp = gve_xdp_rx_timestamp, +}; + static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) { int num_ntfy; @@ -1296,6 +2365,15 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) return err; } + err = gve_verify_driver_compatibility(priv); + if (err) { + dev_err(&priv->pdev->dev, + "Could not verify driver compatibility: err=%d\n", err); + goto err; + } + + priv->num_registered_pages = 0; + if (skip_describe_device) goto setup_device; @@ -1307,14 +2385,6 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) "Could not get device information: err=%d\n", err); goto err; } - if (gve_is_gqi(priv) && priv->dev->max_mtu > PAGE_SIZE) { - priv->dev->max_mtu = PAGE_SIZE; - err = gve_adminq_set_mtu(priv, priv->dev->mtu); - if (err) { - dev_err(&priv->pdev->dev, "Could not set mtu"); - goto err; - } - } priv->dev->mtu = priv->dev->max_mtu; num_ntfy = pci_msix_vec_count(priv->pdev); if (num_ntfy <= 0) { @@ -1329,13 +2399,17 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) goto err; } - priv->num_registered_pages = 0; + /* Big TCP is only supported on DQO */ + if (!gve_is_gqi(priv)) + netif_set_tso_max_size(priv->dev, GVE_DQO_TX_MAX); + priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK; /* gvnic has one Notification Block per MSI-x vector, except for the * management vector */ priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1; priv->mgmt_msix_idx = priv->num_ntfy_blks; + priv->numa_node = dev_to_node(&priv->pdev->dev); priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2); @@ -1350,16 +2424,41 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues, priv->rx_cfg.num_queues); } + priv->tx_cfg.num_xdp_queues = 0; dev_info(&priv->pdev->dev, "TX queues %d, RX queues %d\n", priv->tx_cfg.num_queues, priv->rx_cfg.num_queues); dev_info(&priv->pdev->dev, "Max TX queues %d, Max RX queues %d\n", priv->tx_cfg.max_queues, priv->rx_cfg.max_queues); + if (!gve_is_gqi(priv)) { + priv->tx_coalesce_usecs = GVE_TX_IRQ_RATELIMIT_US_DQO; + priv->rx_coalesce_usecs = GVE_RX_IRQ_RATELIMIT_US_DQO; + } + + priv->ts_config.tx_type = HWTSTAMP_TX_OFF; + priv->ts_config.rx_filter = HWTSTAMP_FILTER_NONE; + setup_device: + priv->xsk_pools = bitmap_zalloc(priv->rx_cfg.max_queues, GFP_KERNEL); + if (!priv->xsk_pools) { + err = -ENOMEM; + goto err; + } + + gve_set_netdev_xdp_features(priv); + if (!gve_is_gqi(priv)) + priv->dev->xdp_metadata_ops = &gve_xdp_metadata_ops; + err = gve_setup_device_resources(priv); - if (!err) - return 0; + if (err) + goto err_free_xsk_bitmap; + + return 0; + +err_free_xsk_bitmap: + bitmap_free(priv->xsk_pools); + priv->xsk_pools = NULL; err: gve_adminq_free(&priv->pdev->dev, priv); return err; @@ -1369,6 +2468,8 @@ static void gve_teardown_priv_resources(struct gve_priv *priv) { gve_teardown_device_resources(priv); gve_adminq_free(&priv->pdev->dev, priv); + bitmap_free(priv->xsk_pools); + priv->xsk_pools = NULL; } static void gve_trigger_reset(struct gve_priv *priv) @@ -1407,7 +2508,7 @@ err: int gve_reset(struct gve_priv *priv, bool attempt_teardown) { - bool was_up = netif_carrier_ok(priv->dev); + bool was_up = netif_running(priv->dev); int err; dev_info(&priv->pdev->dev, "Performing reset\n"); @@ -1458,6 +2559,188 @@ static void gve_write_version(u8 __iomem *driver_version_register) writeb('\n', driver_version_register); } +static int gve_rx_queue_stop(struct net_device *dev, void *per_q_mem, int idx) +{ + struct gve_priv *priv = netdev_priv(dev); + struct gve_rx_ring *gve_per_q_mem; + int err; + + if (!priv->rx) + return -EAGAIN; + + /* Destroying queue 0 while other queues exist is not supported in DQO */ + if (!gve_is_gqi(priv) && idx == 0) + return -ERANGE; + + /* Single-queue destruction requires quiescence on all queues */ + gve_turndown(priv); + + /* This failure will trigger a reset - no need to clean up */ + err = gve_adminq_destroy_single_rx_queue(priv, idx); + if (err) + return err; + + if (gve_is_qpl(priv)) { + /* This failure will trigger a reset - no need to clean up */ + err = gve_unregister_qpl(priv, gve_rx_get_qpl(priv, idx)); + if (err) + return err; + } + + gve_rx_stop_ring(priv, idx); + + /* Turn the unstopped queues back up */ + gve_turnup_and_check_status(priv); + + gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; + *gve_per_q_mem = priv->rx[idx]; + memset(&priv->rx[idx], 0, sizeof(priv->rx[idx])); + return 0; +} + +static void gve_rx_queue_mem_free(struct net_device *dev, void *per_q_mem) +{ + struct gve_priv *priv = netdev_priv(dev); + struct gve_rx_alloc_rings_cfg cfg = {0}; + struct gve_rx_ring *gve_per_q_mem; + + gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; + gve_rx_get_curr_alloc_cfg(priv, &cfg); + + if (gve_is_gqi(priv)) + gve_rx_free_ring_gqi(priv, gve_per_q_mem, &cfg); + else + gve_rx_free_ring_dqo(priv, gve_per_q_mem, &cfg); +} + +static int gve_rx_queue_mem_alloc(struct net_device *dev, void *per_q_mem, + int idx) +{ + struct gve_priv *priv = netdev_priv(dev); + struct gve_rx_alloc_rings_cfg cfg = {0}; + struct gve_rx_ring *gve_per_q_mem; + int err; + + if (!priv->rx) + return -EAGAIN; + + gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; + gve_rx_get_curr_alloc_cfg(priv, &cfg); + + if (gve_is_gqi(priv)) + err = gve_rx_alloc_ring_gqi(priv, &cfg, gve_per_q_mem, idx); + else + err = gve_rx_alloc_ring_dqo(priv, &cfg, gve_per_q_mem, idx); + + return err; +} + +static int gve_rx_queue_start(struct net_device *dev, void *per_q_mem, int idx) +{ + struct gve_priv *priv = netdev_priv(dev); + struct gve_rx_ring *gve_per_q_mem; + int err; + + if (!priv->rx) + return -EAGAIN; + + gve_per_q_mem = (struct gve_rx_ring *)per_q_mem; + priv->rx[idx] = *gve_per_q_mem; + + /* Single-queue creation requires quiescence on all queues */ + gve_turndown(priv); + + gve_rx_start_ring(priv, idx); + + if (gve_is_qpl(priv)) { + /* This failure will trigger a reset - no need to clean up */ + err = gve_register_qpl(priv, gve_rx_get_qpl(priv, idx)); + if (err) + goto abort; + } + + /* This failure will trigger a reset - no need to clean up */ + err = gve_adminq_create_single_rx_queue(priv, idx); + if (err) + goto abort; + + if (gve_is_gqi(priv)) + gve_rx_write_doorbell(priv, &priv->rx[idx]); + else + gve_rx_post_buffers_dqo(&priv->rx[idx]); + + /* Turn the unstopped queues back up */ + gve_turnup_and_check_status(priv); + return 0; + +abort: + gve_rx_stop_ring(priv, idx); + + /* All failures in this func result in a reset, by clearing the struct + * at idx, we prevent a double free when that reset runs. The reset, + * which needs the rtnl lock, will not run till this func returns and + * its caller gives up the lock. + */ + memset(&priv->rx[idx], 0, sizeof(priv->rx[idx])); + return err; +} + +static const struct netdev_queue_mgmt_ops gve_queue_mgmt_ops = { + .ndo_queue_mem_size = sizeof(struct gve_rx_ring), + .ndo_queue_mem_alloc = gve_rx_queue_mem_alloc, + .ndo_queue_mem_free = gve_rx_queue_mem_free, + .ndo_queue_start = gve_rx_queue_start, + .ndo_queue_stop = gve_rx_queue_stop, +}; + +static void gve_get_rx_queue_stats(struct net_device *dev, int idx, + struct netdev_queue_stats_rx *rx_stats) +{ + struct gve_priv *priv = netdev_priv(dev); + struct gve_rx_ring *rx = &priv->rx[idx]; + unsigned int start; + + do { + start = u64_stats_fetch_begin(&rx->statss); + rx_stats->packets = rx->rpackets; + rx_stats->bytes = rx->rbytes; + rx_stats->alloc_fail = rx->rx_skb_alloc_fail + + rx->rx_buf_alloc_fail; + } while (u64_stats_fetch_retry(&rx->statss, start)); +} + +static void gve_get_tx_queue_stats(struct net_device *dev, int idx, + struct netdev_queue_stats_tx *tx_stats) +{ + struct gve_priv *priv = netdev_priv(dev); + struct gve_tx_ring *tx = &priv->tx[idx]; + unsigned int start; + + do { + start = u64_stats_fetch_begin(&tx->statss); + tx_stats->packets = tx->pkt_done; + tx_stats->bytes = tx->bytes_done; + } while (u64_stats_fetch_retry(&tx->statss, start)); +} + +static void gve_get_base_stats(struct net_device *dev, + struct netdev_queue_stats_rx *rx, + struct netdev_queue_stats_tx *tx) +{ + rx->packets = 0; + rx->bytes = 0; + rx->alloc_fail = 0; + + tx->packets = 0; + tx->bytes = 0; +} + +static const struct netdev_stat_ops gve_stat_ops = { + .get_queue_stats_rx = gve_get_rx_queue_stats, + .get_queue_stats_tx = gve_get_tx_queue_stats, + .get_base_stats = gve_get_base_stats, +}; + static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { int max_tx_queues, max_rx_queues; @@ -1469,27 +2752,20 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err = pci_enable_device(pdev); if (err) - return -ENXIO; + return err; - err = pci_request_regions(pdev, "gvnic-cfg"); + err = pci_request_regions(pdev, gve_driver_name); if (err) goto abort_with_enabled; pci_set_master(pdev); - err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); if (err) { dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err); goto abort_with_pci_region; } - err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); - if (err) { - dev_err(&pdev->dev, - "Failed to set consistent dma mask: err=%d\n", err); - goto abort_with_pci_region; - } - reg_bar = pci_iomap(pdev, GVE_REGISTER_BAR, 0); if (!reg_bar) { dev_err(&pdev->dev, "Failed to map pci bar!\n"); @@ -1512,12 +2788,15 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues); if (!dev) { dev_err(&pdev->dev, "could not allocate netdev\n"); + err = -ENOMEM; goto abort_with_db_bar; } SET_NETDEV_DEV(dev, &pdev->dev); pci_set_drvdata(pdev, dev); dev->ethtool_ops = &gve_ethtool_ops; dev->netdev_ops = &gve_netdev_ops; + dev->queue_mgmt_ops = &gve_queue_mgmt_ops; + dev->stat_ops = &gve_stat_ops; /* Set default and supported features. * @@ -1546,6 +2825,8 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) priv->service_task_flags = 0x0; priv->state_flags = 0x0; priv->ethtool_flags = 0x0; + priv->rx_cfg.packet_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE; + priv->max_rx_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE; gve_set_probe_in_progress(priv); priv->gve_wq = alloc_ordered_workqueue("gve", 0); @@ -1563,9 +2844,12 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) goto abort_with_wq; + if (!gve_is_gqi(priv) && !gve_is_qpl(priv)) + dev->netmem_tx = true; + err = register_netdev(dev); if (err) - goto abort_with_wq; + goto abort_with_gve_init; dev_info(&pdev->dev, "GVE version %s\n", gve_version_str); dev_info(&pdev->dev, "GVE queue format %d\n", (int)priv->queue_format); @@ -1573,6 +2857,9 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) queue_work(priv->gve_wq, &priv->service_task); return 0; +abort_with_gve_init: + gve_teardown_priv_resources(priv); + abort_with_wq: destroy_workqueue(priv->gve_wq); @@ -1590,7 +2877,7 @@ abort_with_pci_region: abort_with_enabled: pci_disable_device(pdev); - return -ENXIO; + return err; } static void gve_remove(struct pci_dev *pdev) @@ -1610,22 +2897,87 @@ static void gve_remove(struct pci_dev *pdev) pci_disable_device(pdev); } +static void gve_shutdown(struct pci_dev *pdev) +{ + struct net_device *netdev = pci_get_drvdata(pdev); + struct gve_priv *priv = netdev_priv(netdev); + bool was_up = netif_running(priv->dev); + + netif_device_detach(netdev); + + rtnl_lock(); + netdev_lock(netdev); + if (was_up && gve_close(priv->dev)) { + /* If the dev was up, attempt to close, if close fails, reset */ + gve_reset_and_teardown(priv, was_up); + } else { + /* If the dev wasn't up or close worked, finish tearing down */ + gve_teardown_priv_resources(priv); + } + netdev_unlock(netdev); + rtnl_unlock(); +} + +#ifdef CONFIG_PM +static int gve_suspend(struct pci_dev *pdev, pm_message_t state) +{ + struct net_device *netdev = pci_get_drvdata(pdev); + struct gve_priv *priv = netdev_priv(netdev); + bool was_up = netif_running(priv->dev); + + priv->suspend_cnt++; + rtnl_lock(); + netdev_lock(netdev); + if (was_up && gve_close(priv->dev)) { + /* If the dev was up, attempt to close, if close fails, reset */ + gve_reset_and_teardown(priv, was_up); + } else { + /* If the dev wasn't up or close worked, finish tearing down */ + gve_teardown_priv_resources(priv); + } + priv->up_before_suspend = was_up; + netdev_unlock(netdev); + rtnl_unlock(); + return 0; +} + +static int gve_resume(struct pci_dev *pdev) +{ + struct net_device *netdev = pci_get_drvdata(pdev); + struct gve_priv *priv = netdev_priv(netdev); + int err; + + priv->resume_cnt++; + rtnl_lock(); + netdev_lock(netdev); + err = gve_reset_recovery(priv, priv->up_before_suspend); + netdev_unlock(netdev); + rtnl_unlock(); + return err; +} +#endif /* CONFIG_PM */ + static const struct pci_device_id gve_id_table[] = { { PCI_DEVICE(PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC) }, { } }; -static struct pci_driver gvnic_driver = { - .name = "gvnic", +static struct pci_driver gve_driver = { + .name = gve_driver_name, .id_table = gve_id_table, .probe = gve_probe, .remove = gve_remove, + .shutdown = gve_shutdown, +#ifdef CONFIG_PM + .suspend = gve_suspend, + .resume = gve_resume, +#endif }; -module_pci_driver(gvnic_driver); +module_pci_driver(gve_driver); MODULE_DEVICE_TABLE(pci, gve_id_table); MODULE_AUTHOR("Google, Inc."); -MODULE_DESCRIPTION("gVNIC Driver"); +MODULE_DESCRIPTION("Google Virtual NIC Driver"); MODULE_LICENSE("Dual MIT/GPL"); MODULE_VERSION(GVE_VERSION); |
