diff options
Diffstat (limited to 'drivers/net/ethernet/google/gve/gve_main.c')
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_main.c | 362 |
1 files changed, 253 insertions, 109 deletions
diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index d1aeb722d48f..1f411d7c4373 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -4,6 +4,7 @@ * Copyright (C) 2015-2024 Google LLC */ +#include <linux/bitmap.h> #include <linux/bpf.h> #include <linux/cpumask.h> #include <linux/etherdevice.h> @@ -414,14 +415,24 @@ int gve_napi_poll_dqo(struct napi_struct *napi, int budget) bool reschedule = false; int work_done = 0; - if (block->tx) - reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true); + if (block->tx) { + if (block->tx->q_num < priv->tx_cfg.num_queues) + reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true); + else + reschedule |= gve_xdp_poll_dqo(block); + } if (!budget) return 0; if (block->rx) { work_done = gve_rx_poll_dqo(block, budget); + + /* Poll XSK TX as part of RX NAPI. Setup re-poll based on if + * either datapath has more work to do. + */ + if (priv->xdp_prog) + reschedule |= gve_xsk_tx_poll_dqo(block, budget); reschedule |= work_done == budget; } @@ -457,10 +468,19 @@ int gve_napi_poll_dqo(struct napi_struct *napi, int budget) return work_done; } +static const struct cpumask *gve_get_node_mask(struct gve_priv *priv) +{ + if (priv->numa_node == NUMA_NO_NODE) + return cpu_all_mask; + else + return cpumask_of_node(priv->numa_node); +} + static int gve_alloc_notify_blocks(struct gve_priv *priv) { int num_vecs_requested = priv->num_ntfy_blks + 1; - unsigned int active_cpus; + const struct cpumask *node_mask; + unsigned int cur_cpu; int vecs_enabled; int i, j; int err; @@ -499,8 +519,6 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv) if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues) priv->rx_cfg.num_queues = priv->rx_cfg.max_queues; } - /* Half the notification blocks go to TX and half to RX */ - active_cpus = min_t(int, priv->num_ntfy_blks / 2, num_online_cpus()); /* Setup Management Vector - the last vector */ snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "gve-mgmnt@pci:%s", @@ -529,6 +547,8 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv) } /* Setup the other blocks - the first n-1 vectors */ + node_mask = gve_get_node_mask(priv); + cur_cpu = cpumask_first(node_mask); for (i = 0; i < priv->num_ntfy_blks; i++) { struct gve_notify_block *block = &priv->ntfy_blocks[i]; int msix_idx = i; @@ -545,9 +565,17 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv) goto abort_with_some_ntfy_blocks; } block->irq = priv->msix_vectors[msix_idx].vector; - irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, - get_cpu_mask(i % active_cpus)); + irq_set_affinity_and_hint(block->irq, + cpumask_of(cur_cpu)); block->irq_db_index = &priv->irq_db_indices[i].index; + + cur_cpu = cpumask_next(cur_cpu, node_mask); + /* Wrap once CPUs in the node have been exhausted, or when + * starting RX queue affinities. TX and RX queues of the same + * index share affinity. + */ + if (cur_cpu >= nr_cpu_ids || (i + 1) == priv->tx_cfg.max_queues) + cur_cpu = cpumask_first(node_mask); } return 0; abort_with_some_ntfy_blocks: @@ -619,9 +647,12 @@ static int gve_setup_device_resources(struct gve_priv *priv) err = gve_alloc_counter_array(priv); if (err) goto abort_with_rss_config_cache; - err = gve_alloc_notify_blocks(priv); + err = gve_init_clock(priv); if (err) goto abort_with_counter; + err = gve_alloc_notify_blocks(priv); + if (err) + goto abort_with_clock; err = gve_alloc_stats_report(priv); if (err) goto abort_with_ntfy_blocks; @@ -674,6 +705,8 @@ abort_with_stats_report: gve_free_stats_report(priv); abort_with_ntfy_blocks: gve_free_notify_blocks(priv); +abort_with_clock: + gve_teardown_clock(priv); abort_with_counter: gve_free_counter_array(priv); abort_with_rss_config_cache: @@ -722,6 +755,7 @@ static void gve_teardown_device_resources(struct gve_priv *priv) gve_free_counter_array(priv); gve_free_notify_blocks(priv); gve_free_stats_report(priv); + gve_teardown_clock(priv); gve_clear_device_resources_ok(priv); } @@ -1030,7 +1064,7 @@ int gve_alloc_page(struct gve_priv *priv, struct device *dev, struct page **page, dma_addr_t *dma, enum dma_data_direction dir, gfp_t gfp_flags) { - *page = alloc_page(gfp_flags); + *page = alloc_pages_node(priv->numa_node, gfp_flags, 0); if (!*page) { priv->page_alloc_fail++; return -ENOMEM; @@ -1131,18 +1165,84 @@ static int gve_reset_recovery(struct gve_priv *priv, bool was_up); static void gve_turndown(struct gve_priv *priv); static void gve_turnup(struct gve_priv *priv); +static void gve_unreg_xsk_pool(struct gve_priv *priv, u16 qid) +{ + struct gve_rx_ring *rx; + + if (!priv->rx) + return; + + rx = &priv->rx[qid]; + rx->xsk_pool = NULL; + if (xdp_rxq_info_is_reg(&rx->xdp_rxq)) + xdp_rxq_info_unreg_mem_model(&rx->xdp_rxq); + + if (!priv->tx) + return; + priv->tx[gve_xdp_tx_queue_id(priv, qid)].xsk_pool = NULL; +} + +static int gve_reg_xsk_pool(struct gve_priv *priv, struct net_device *dev, + struct xsk_buff_pool *pool, u16 qid) +{ + struct gve_rx_ring *rx; + u16 tx_qid; + int err; + + rx = &priv->rx[qid]; + err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq, + MEM_TYPE_XSK_BUFF_POOL, pool); + if (err) { + gve_unreg_xsk_pool(priv, qid); + return err; + } + + rx->xsk_pool = pool; + + tx_qid = gve_xdp_tx_queue_id(priv, qid); + priv->tx[tx_qid].xsk_pool = pool; + + return 0; +} + +static void gve_unreg_xdp_info(struct gve_priv *priv) +{ + int i; + + if (!priv->tx_cfg.num_xdp_queues || !priv->rx) + return; + + for (i = 0; i < priv->rx_cfg.num_queues; i++) { + struct gve_rx_ring *rx = &priv->rx[i]; + + if (xdp_rxq_info_is_reg(&rx->xdp_rxq)) + xdp_rxq_info_unreg(&rx->xdp_rxq); + + gve_unreg_xsk_pool(priv, i); + } +} + +static struct xsk_buff_pool *gve_get_xsk_pool(struct gve_priv *priv, int qid) +{ + if (!test_bit(qid, priv->xsk_pools)) + return NULL; + + return xsk_get_pool_from_qid(priv->dev, qid); +} + static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev) { struct napi_struct *napi; struct gve_rx_ring *rx; int err = 0; - int i, j; - u32 tx_qid; + int i; if (!priv->tx_cfg.num_xdp_queues) return 0; for (i = 0; i < priv->rx_cfg.num_queues; i++) { + struct xsk_buff_pool *xsk_pool; + rx = &priv->rx[i]; napi = &priv->ntfy_blocks[rx->ntfy_id].napi; @@ -1150,7 +1250,11 @@ static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev) napi->napi_id); if (err) goto err; - if (gve_is_qpl(priv)) + + xsk_pool = gve_get_xsk_pool(priv, i); + if (xsk_pool) + err = gve_reg_xsk_pool(priv, dev, xsk_pool, i); + else if (gve_is_qpl(priv)) err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq, MEM_TYPE_PAGE_SHARED, NULL); @@ -1160,60 +1264,14 @@ static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev) rx->dqo.page_pool); if (err) goto err; - rx->xsk_pool = xsk_get_pool_from_qid(dev, i); - if (rx->xsk_pool) { - err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, i, - napi->napi_id); - if (err) - goto err; - err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq, - MEM_TYPE_XSK_BUFF_POOL, NULL); - if (err) - goto err; - xsk_pool_set_rxq_info(rx->xsk_pool, - &rx->xsk_rxq); - } - } - - for (i = 0; i < priv->tx_cfg.num_xdp_queues; i++) { - tx_qid = gve_xdp_tx_queue_id(priv, i); - priv->tx[tx_qid].xsk_pool = xsk_get_pool_from_qid(dev, i); } return 0; err: - for (j = i; j >= 0; j--) { - rx = &priv->rx[j]; - if (xdp_rxq_info_is_reg(&rx->xdp_rxq)) - xdp_rxq_info_unreg(&rx->xdp_rxq); - if (xdp_rxq_info_is_reg(&rx->xsk_rxq)) - xdp_rxq_info_unreg(&rx->xsk_rxq); - } + gve_unreg_xdp_info(priv); return err; } -static void gve_unreg_xdp_info(struct gve_priv *priv) -{ - int i, tx_qid; - - if (!priv->tx_cfg.num_xdp_queues || !priv->rx || !priv->tx) - return; - - for (i = 0; i < priv->rx_cfg.num_queues; i++) { - struct gve_rx_ring *rx = &priv->rx[i]; - - xdp_rxq_info_unreg(&rx->xdp_rxq); - if (rx->xsk_pool) { - xdp_rxq_info_unreg(&rx->xsk_rxq); - rx->xsk_pool = NULL; - } - } - - for (i = 0; i < priv->tx_cfg.num_xdp_queues; i++) { - tx_qid = gve_xdp_tx_queue_id(priv, i); - priv->tx[tx_qid].xsk_pool = NULL; - } -} static void gve_drain_page_cache(struct gve_priv *priv) { @@ -1510,14 +1568,24 @@ out: return err; } +static int gve_xdp_xmit(struct net_device *dev, int n, + struct xdp_frame **frames, u32 flags) +{ + struct gve_priv *priv = netdev_priv(dev); + + if (priv->queue_format == GVE_GQI_QPL_FORMAT) + return gve_xdp_xmit_gqi(dev, n, frames, flags); + else if (priv->queue_format == GVE_DQO_RDA_FORMAT) + return gve_xdp_xmit_dqo(dev, n, frames, flags); + + return -EOPNOTSUPP; +} + static int gve_xsk_pool_enable(struct net_device *dev, struct xsk_buff_pool *pool, u16 qid) { struct gve_priv *priv = netdev_priv(dev); - struct napi_struct *napi; - struct gve_rx_ring *rx; - int tx_qid; int err; if (qid >= priv->rx_cfg.num_queues) { @@ -1535,34 +1603,31 @@ static int gve_xsk_pool_enable(struct net_device *dev, if (err) return err; + set_bit(qid, priv->xsk_pools); + /* If XDP prog is not installed or interface is down, return. */ if (!priv->xdp_prog || !netif_running(dev)) return 0; - rx = &priv->rx[qid]; - napi = &priv->ntfy_blocks[rx->ntfy_id].napi; - err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, qid, napi->napi_id); + err = gve_reg_xsk_pool(priv, dev, pool, qid); if (err) - goto err; - - err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq, - MEM_TYPE_XSK_BUFF_POOL, NULL); - if (err) - goto err; - - xsk_pool_set_rxq_info(pool, &rx->xsk_rxq); - rx->xsk_pool = pool; - - tx_qid = gve_xdp_tx_queue_id(priv, qid); - priv->tx[tx_qid].xsk_pool = pool; + goto err_xsk_pool_dma_mapped; + /* Stop and start RDA queues to repost buffers. */ + if (!gve_is_qpl(priv)) { + err = gve_configure_rings_xdp(priv, priv->rx_cfg.num_queues); + if (err) + goto err_xsk_pool_registered; + } return 0; -err: - if (xdp_rxq_info_is_reg(&rx->xsk_rxq)) - xdp_rxq_info_unreg(&rx->xsk_rxq); +err_xsk_pool_registered: + gve_unreg_xsk_pool(priv, qid); +err_xsk_pool_dma_mapped: + clear_bit(qid, priv->xsk_pools); xsk_pool_dma_unmap(pool, - DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); + DMA_ATTR_SKIP_CPU_SYNC | + DMA_ATTR_WEAK_ORDERING); return err; } @@ -1574,18 +1639,28 @@ static int gve_xsk_pool_disable(struct net_device *dev, struct napi_struct *napi_tx; struct xsk_buff_pool *pool; int tx_qid; + int err; - pool = xsk_get_pool_from_qid(dev, qid); - if (!pool) - return -EINVAL; if (qid >= priv->rx_cfg.num_queues) return -EINVAL; - /* If XDP prog is not installed or interface is down, unmap DMA and - * return. - */ - if (!priv->xdp_prog || !netif_running(dev)) - goto done; + clear_bit(qid, priv->xsk_pools); + + pool = xsk_get_pool_from_qid(dev, qid); + if (pool) + xsk_pool_dma_unmap(pool, + DMA_ATTR_SKIP_CPU_SYNC | + DMA_ATTR_WEAK_ORDERING); + + if (!netif_running(dev) || !priv->tx_cfg.num_xdp_queues) + return 0; + + /* Stop and start RDA queues to repost buffers. */ + if (!gve_is_qpl(priv) && priv->xdp_prog) { + err = gve_configure_rings_xdp(priv, priv->rx_cfg.num_queues); + if (err) + return err; + } napi_rx = &priv->ntfy_blocks[priv->rx[qid].ntfy_id].napi; napi_disable(napi_rx); /* make sure current rx poll is done */ @@ -1594,22 +1669,19 @@ static int gve_xsk_pool_disable(struct net_device *dev, napi_tx = &priv->ntfy_blocks[priv->tx[tx_qid].ntfy_id].napi; napi_disable(napi_tx); /* make sure current tx poll is done */ - priv->rx[qid].xsk_pool = NULL; - xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq); - priv->tx[tx_qid].xsk_pool = NULL; + gve_unreg_xsk_pool(priv, qid); smp_mb(); /* Make sure it is visible to the workers on datapath */ napi_enable(napi_rx); - if (gve_rx_work_pending(&priv->rx[qid])) - napi_schedule(napi_rx); - napi_enable(napi_tx); - if (gve_tx_clean_pending(priv, &priv->tx[tx_qid])) - napi_schedule(napi_tx); + if (gve_is_gqi(priv)) { + if (gve_rx_work_pending(&priv->rx[qid])) + napi_schedule(napi_rx); + + if (gve_tx_clean_pending(priv, &priv->tx[tx_qid])) + napi_schedule(napi_tx); + } -done: - xsk_pool_dma_unmap(pool, - DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); return 0; } @@ -1645,9 +1717,8 @@ static int verify_xdp_configuration(struct net_device *dev) return -EOPNOTSUPP; } - if (priv->queue_format != GVE_GQI_QPL_FORMAT) { - netdev_warn(dev, "XDP is not supported in mode %d.\n", - priv->queue_format); + if (priv->header_split_enabled) { + netdev_warn(dev, "XDP is not supported when header-data split is enabled.\n"); return -EOPNOTSUPP; } @@ -1727,7 +1798,7 @@ int gve_adjust_config(struct gve_priv *priv, { int err; - /* Allocate resources for the new confiugration */ + /* Allocate resources for the new configuration */ err = gve_queues_mem_alloc(priv, tx_alloc_cfg, rx_alloc_cfg); if (err) { netif_err(priv, drv, priv->dev, @@ -1978,10 +2049,13 @@ u16 gve_get_pkt_buf_size(const struct gve_priv *priv, bool enable_hsplit) return GVE_DEFAULT_RX_BUFFER_SIZE; } -/* header-split is not supported on non-DQO_RDA yet even if device advertises it */ +/* Header split is only supported on DQ RDA queue format. If XDP is enabled, + * header split is not allowed. + */ bool gve_header_split_supported(const struct gve_priv *priv) { - return priv->header_buf_size && priv->queue_format == GVE_DQO_RDA_FORMAT; + return priv->header_buf_size && + priv->queue_format == GVE_DQO_RDA_FORMAT && !priv->xdp_prog; } int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split) @@ -2030,6 +2104,12 @@ static int gve_set_features(struct net_device *netdev, if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) { netdev->features ^= NETIF_F_LRO; + if (priv->xdp_prog && (netdev->features & NETIF_F_LRO)) { + netdev_warn(netdev, + "XDP is not supported when LRO is on.\n"); + err = -EOPNOTSUPP; + goto revert_features; + } if (netif_running(netdev)) { err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); if (err) @@ -2049,6 +2129,46 @@ revert_features: return err; } +static int gve_get_ts_config(struct net_device *dev, + struct kernel_hwtstamp_config *kernel_config) +{ + struct gve_priv *priv = netdev_priv(dev); + + *kernel_config = priv->ts_config; + return 0; +} + +static int gve_set_ts_config(struct net_device *dev, + struct kernel_hwtstamp_config *kernel_config, + struct netlink_ext_ack *extack) +{ + struct gve_priv *priv = netdev_priv(dev); + + if (kernel_config->tx_type != HWTSTAMP_TX_OFF) { + NL_SET_ERR_MSG_MOD(extack, "TX timestamping is not supported"); + return -ERANGE; + } + + if (kernel_config->rx_filter != HWTSTAMP_FILTER_NONE) { + if (!priv->nic_ts_report) { + NL_SET_ERR_MSG_MOD(extack, + "RX timestamping is not supported"); + kernel_config->rx_filter = HWTSTAMP_FILTER_NONE; + return -EOPNOTSUPP; + } + + kernel_config->rx_filter = HWTSTAMP_FILTER_ALL; + gve_clock_nic_ts_read(priv); + ptp_schedule_worker(priv->ptp->clock, 0); + } else { + ptp_cancel_worker_sync(priv->ptp->clock); + } + + priv->ts_config.rx_filter = kernel_config->rx_filter; + + return 0; +} + static const struct net_device_ops gve_netdev_ops = { .ndo_start_xmit = gve_start_xmit, .ndo_features_check = gve_features_check, @@ -2060,6 +2180,8 @@ static const struct net_device_ops gve_netdev_ops = { .ndo_bpf = gve_xdp, .ndo_xdp_xmit = gve_xdp_xmit, .ndo_xsk_wakeup = gve_xsk_wakeup, + .ndo_hwtstamp_get = gve_get_ts_config, + .ndo_hwtstamp_set = gve_set_ts_config, }; static void gve_handle_status(struct gve_priv *priv, u32 status) @@ -2189,6 +2311,10 @@ static void gve_set_netdev_xdp_features(struct gve_priv *priv) xdp_features = NETDEV_XDP_ACT_BASIC; xdp_features |= NETDEV_XDP_ACT_REDIRECT; xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY; + } else if (priv->queue_format == GVE_DQO_RDA_FORMAT) { + xdp_features = NETDEV_XDP_ACT_BASIC; + xdp_features |= NETDEV_XDP_ACT_REDIRECT; + xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY; } else { xdp_features = 0; } @@ -2243,7 +2369,7 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) goto err; } - /* Big TCP is only supported on DQ*/ + /* Big TCP is only supported on DQO */ if (!gve_is_gqi(priv)) netif_set_tso_max_size(priv->dev, GVE_DQO_TX_MAX); @@ -2253,6 +2379,7 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) */ priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1; priv->mgmt_msix_idx = priv->num_ntfy_blks; + priv->numa_node = dev_to_node(&priv->pdev->dev); priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2); @@ -2279,11 +2406,26 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) priv->rx_coalesce_usecs = GVE_RX_IRQ_RATELIMIT_US_DQO; } + priv->ts_config.tx_type = HWTSTAMP_TX_OFF; + priv->ts_config.rx_filter = HWTSTAMP_FILTER_NONE; + setup_device: + priv->xsk_pools = bitmap_zalloc(priv->rx_cfg.max_queues, GFP_KERNEL); + if (!priv->xsk_pools) { + err = -ENOMEM; + goto err; + } + gve_set_netdev_xdp_features(priv); err = gve_setup_device_resources(priv); - if (!err) - return 0; + if (err) + goto err_free_xsk_bitmap; + + return 0; + +err_free_xsk_bitmap: + bitmap_free(priv->xsk_pools); + priv->xsk_pools = NULL; err: gve_adminq_free(&priv->pdev->dev, priv); return err; @@ -2293,6 +2435,8 @@ static void gve_teardown_priv_resources(struct gve_priv *priv) { gve_teardown_device_resources(priv); gve_adminq_free(&priv->pdev->dev, priv); + bitmap_free(priv->xsk_pools); + priv->xsk_pools = NULL; } static void gve_trigger_reset(struct gve_priv *priv) |