diff options
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en.h | 11 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en/params.c | 36 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h | 3 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 33 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 306 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 138 | ||||
-rw-r--r-- | include/linux/skbuff.h | 12 | ||||
-rw-r--r-- | include/net/netmem.h | 2 | ||||
-rw-r--r-- | include/net/page_pool/helpers.h | 7 |
9 files changed, 381 insertions, 167 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 5b0d03b3efe8..65a73913b9a2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -278,10 +278,6 @@ enum packet_merge { struct mlx5e_packet_merge_param { enum packet_merge type; u32 timeout; - struct { - u8 match_criteria_type; - u8 alignment_granularity; - } shampo; }; struct mlx5e_params { @@ -557,7 +553,7 @@ struct mlx5e_icosq { } ____cacheline_aligned_in_smp; struct mlx5e_frag_page { - struct page *page; + netmem_ref netmem; u16 frags; }; @@ -638,7 +634,6 @@ struct mlx5e_shampo_hd { struct mlx5e_frag_page *pages; u32 hd_per_wq; u16 hd_per_wqe; - u16 pages_per_wq; unsigned long *bitmap; u16 pi; u16 ci; @@ -721,7 +716,11 @@ struct mlx5e_rq { struct bpf_prog __rcu *xdp_prog; struct mlx5e_xdpsq *xdpsq; DECLARE_BITMAP(flags, 8); + + /* page pools */ struct page_pool *page_pool; + struct page_pool *hd_page_pool; + struct mlx5e_xdp_buff mxbuf; /* AF_XDP zero-copy */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 58ec5e44aa7a..fc945bce933a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -901,6 +901,7 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev, { void *rqc = param->rqc; void *wq = MLX5_ADDR_OF(rqc, rqc, wq); + u32 lro_timeout; int ndsegs = 1; int err; @@ -926,22 +927,25 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev, MLX5_SET(wq, wq, log_wqe_stride_size, log_wqe_stride_size - MLX5_MPWQE_LOG_STRIDE_SZ_BASE); MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk)); - if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) { - MLX5_SET(wq, wq, shampo_enable, true); - MLX5_SET(wq, wq, log_reservation_size, - mlx5e_shampo_get_log_rsrv_size(mdev, params)); - MLX5_SET(wq, wq, - log_max_num_of_packets_per_reservation, - mlx5e_shampo_get_log_pkt_per_rsrv(mdev, params)); - MLX5_SET(wq, wq, log_headers_entry_size, - mlx5e_shampo_get_log_hd_entry_size(mdev, params)); - MLX5_SET(rqc, rqc, reservation_timeout, - mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_SHAMPO_TIMEOUT)); - MLX5_SET(rqc, rqc, shampo_match_criteria_type, - params->packet_merge.shampo.match_criteria_type); - MLX5_SET(rqc, rqc, shampo_no_match_alignment_granularity, - params->packet_merge.shampo.alignment_granularity); - } + if (params->packet_merge.type != MLX5E_PACKET_MERGE_SHAMPO) + break; + + MLX5_SET(wq, wq, shampo_enable, true); + MLX5_SET(wq, wq, log_reservation_size, + mlx5e_shampo_get_log_rsrv_size(mdev, params)); + MLX5_SET(wq, wq, + log_max_num_of_packets_per_reservation, + mlx5e_shampo_get_log_pkt_per_rsrv(mdev, params)); + MLX5_SET(wq, wq, log_headers_entry_size, + mlx5e_shampo_get_log_hd_entry_size(mdev, params)); + lro_timeout = + mlx5e_choose_lro_timeout(mdev, + MLX5E_DEFAULT_SHAMPO_TIMEOUT); + MLX5_SET(rqc, rqc, reservation_timeout, lro_timeout); + MLX5_SET(rqc, rqc, shampo_match_criteria_type, + MLX5_RQC_SHAMPO_MATCH_CRITERIA_TYPE_EXTENDED); + MLX5_SET(rqc, rqc, shampo_no_match_alignment_granularity, + MLX5_RQC_SHAMPO_NO_MATCH_ALIGNMENT_GRANULARITY_STRIDE); break; } default: /* MLX5_WQ_TYPE_CYCLIC */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h index e837c21d3d21..6501252359b0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -362,7 +362,8 @@ mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma) dma_unmap_single(pdev, dma->addr, dma->size, DMA_TO_DEVICE); break; case MLX5E_DMA_MAP_PAGE: - dma_unmap_page(pdev, dma->addr, dma->size, DMA_TO_DEVICE); + netmem_dma_unmap_page_attrs(pdev, dma->addr, dma->size, + DMA_TO_DEVICE, 0); break; default: WARN_ONCE(true, "mlx5e_tx_dma_unmap unknown DMA type!\n"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 8b9ee8bac674..35479cbf98d5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -32,6 +32,7 @@ #include <linux/dim.h> #include <linux/ethtool_netlink.h> +#include <net/netdev_queues.h> #include "en.h" #include "en/channels.h" @@ -365,11 +366,6 @@ void mlx5e_ethtool_get_ringparam(struct mlx5e_priv *priv, param->tx_max_pending = 1 << MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE; param->rx_pending = 1 << priv->channels.params.log_rq_mtu_frames; param->tx_pending = 1 << priv->channels.params.log_sq_size; - - kernel_param->tcp_data_split = - (priv->channels.params.packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) ? - ETHTOOL_TCP_DATA_SPLIT_ENABLED : - ETHTOOL_TCP_DATA_SPLIT_DISABLED; } static void mlx5e_get_ringparam(struct net_device *dev, @@ -382,6 +378,27 @@ static void mlx5e_get_ringparam(struct net_device *dev, mlx5e_ethtool_get_ringparam(priv, param, kernel_param); } +static bool mlx5e_ethtool_set_tcp_data_split(struct mlx5e_priv *priv, + u8 tcp_data_split, + struct netlink_ext_ack *extack) +{ + struct net_device *dev = priv->netdev; + + if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_ENABLED && + !(dev->features & NETIF_F_GRO_HW)) { + NL_SET_ERR_MSG_MOD(extack, + "TCP-data-split is not supported when GRO HW is disabled"); + return false; + } + + /* Might need to disable HW-GRO if it was kept on due to hds. */ + if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_DISABLED && + dev->cfg->hds_config == ETHTOOL_TCP_DATA_SPLIT_ENABLED) + netdev_update_features(priv->netdev); + + return true; +} + int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv, struct ethtool_ringparam *param, struct netlink_ext_ack *extack) @@ -440,6 +457,11 @@ static int mlx5e_set_ringparam(struct net_device *dev, { struct mlx5e_priv *priv = netdev_priv(dev); + if (!mlx5e_ethtool_set_tcp_data_split(priv, + kernel_param->tcp_data_split, + extack)) + return -EINVAL; + return mlx5e_ethtool_set_ringparam(priv, param, extack); } @@ -2623,6 +2645,7 @@ const struct ethtool_ops mlx5e_ethtool_ops = { ETHTOOL_COALESCE_USE_ADAPTIVE | ETHTOOL_COALESCE_USE_CQE, .supported_input_xfrm = RXH_XFRM_SYM_OR_XOR, + .supported_ring_params = ETHTOOL_RING_USE_TCP_DATA_SPLIT, .get_drvinfo = mlx5e_get_drvinfo, .get_link = ethtool_op_get_link, .get_link_ext_state = mlx5e_get_link_ext_state, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index ea822c69d137..24559cbcbfc2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -41,6 +41,7 @@ #include <linux/filter.h> #include <net/netdev_lock.h> #include <net/netdev_queues.h> +#include <net/netdev_rx_queue.h> #include <net/page_pool/types.h> #include <net/pkt_sched.h> #include <net/xdp_sock_drv.h> @@ -78,7 +79,8 @@ static bool mlx5e_hw_gro_supported(struct mlx5_core_dev *mdev) { - if (!MLX5_CAP_GEN(mdev, shampo)) + if (!MLX5_CAP_GEN(mdev, shampo) || + !MLX5_CAP_SHAMPO(mdev, shampo_header_split_data_merge)) return false; /* Our HW-GRO implementation relies on "KSM Mkey" for @@ -331,47 +333,6 @@ static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, ucseg->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); } -static int mlx5e_rq_shampo_hd_alloc(struct mlx5e_rq *rq, int node) -{ - rq->mpwqe.shampo = kvzalloc_node(sizeof(*rq->mpwqe.shampo), - GFP_KERNEL, node); - if (!rq->mpwqe.shampo) - return -ENOMEM; - return 0; -} - -static void mlx5e_rq_shampo_hd_free(struct mlx5e_rq *rq) -{ - kvfree(rq->mpwqe.shampo); -} - -static int mlx5e_rq_shampo_hd_info_alloc(struct mlx5e_rq *rq, int node) -{ - struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; - - shampo->bitmap = bitmap_zalloc_node(shampo->hd_per_wq, GFP_KERNEL, - node); - shampo->pages = kvzalloc_node(array_size(shampo->hd_per_wq, - sizeof(*shampo->pages)), - GFP_KERNEL, node); - if (!shampo->bitmap || !shampo->pages) - goto err_nomem; - - return 0; - -err_nomem: - bitmap_free(shampo->bitmap); - kvfree(shampo->pages); - - return -ENOMEM; -} - -static void mlx5e_rq_shampo_hd_info_free(struct mlx5e_rq *rq) -{ - bitmap_free(rq->mpwqe.shampo->bitmap); - kvfree(rq->mpwqe.shampo->pages); -} - static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int node) { int wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq); @@ -584,19 +545,18 @@ static int mlx5e_create_rq_umr_mkey(struct mlx5_core_dev *mdev, struct mlx5e_rq } static int mlx5e_create_rq_hd_umr_mkey(struct mlx5_core_dev *mdev, - struct mlx5e_rq *rq) + u16 hd_per_wq, u32 *umr_mkey) { u32 max_ksm_size = BIT(MLX5_CAP_GEN(mdev, log_max_klm_list_size)); - if (max_ksm_size < rq->mpwqe.shampo->hd_per_wq) { + if (max_ksm_size < hd_per_wq) { mlx5_core_err(mdev, "max ksm list size 0x%x is smaller than shampo header buffer list size 0x%x\n", - max_ksm_size, rq->mpwqe.shampo->hd_per_wq); + max_ksm_size, hd_per_wq); return -EINVAL; } - - return mlx5e_create_umr_ksm_mkey(mdev, rq->mpwqe.shampo->hd_per_wq, + return mlx5e_create_umr_ksm_mkey(mdev, hd_per_wq, MLX5E_SHAMPO_LOG_HEADER_ENTRY_SIZE, - &rq->mpwqe.shampo->mkey); + umr_mkey); } static void mlx5e_init_frags_partition(struct mlx5e_rq *rq) @@ -758,6 +718,42 @@ static int mlx5e_init_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *param xdp_frag_size); } +static int mlx5e_rq_shampo_hd_info_alloc(struct mlx5e_rq *rq, u16 hd_per_wq, + int node) +{ + struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; + + shampo->hd_per_wq = hd_per_wq; + + shampo->bitmap = bitmap_zalloc_node(hd_per_wq, GFP_KERNEL, node); + shampo->pages = kvzalloc_node(array_size(hd_per_wq, + sizeof(*shampo->pages)), + GFP_KERNEL, node); + if (!shampo->bitmap || !shampo->pages) + goto err_nomem; + + return 0; + +err_nomem: + kvfree(shampo->pages); + bitmap_free(shampo->bitmap); + + return -ENOMEM; +} + +static void mlx5e_rq_shampo_hd_info_free(struct mlx5e_rq *rq) +{ + kvfree(rq->mpwqe.shampo->pages); + bitmap_free(rq->mpwqe.shampo->bitmap); +} + +static bool mlx5_rq_needs_separate_hd_pool(struct mlx5e_rq *rq) +{ + struct netdev_rx_queue *rxq = __netif_get_rx_queue(rq->netdev, rq->ix); + + return !!rxq->mp_params.mp_ops; +} + static int mlx5_rq_shampo_alloc(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_rq_param *rqp, @@ -765,42 +761,81 @@ static int mlx5_rq_shampo_alloc(struct mlx5_core_dev *mdev, u32 *pool_size, int node) { + void *wqc = MLX5_ADDR_OF(rqc, rqp->rqc, wq); + u32 hd_pool_size; + u16 hd_per_wq; + int wq_size; int err; if (!test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) return 0; - err = mlx5e_rq_shampo_hd_alloc(rq, node); - if (err) - goto out; - rq->mpwqe.shampo->hd_per_wq = - mlx5e_shampo_hd_per_wq(mdev, params, rqp); - err = mlx5e_create_rq_hd_umr_mkey(mdev, rq); + + rq->mpwqe.shampo = kvzalloc_node(sizeof(*rq->mpwqe.shampo), + GFP_KERNEL, node); + if (!rq->mpwqe.shampo) + return -ENOMEM; + + /* split headers data structures */ + hd_per_wq = mlx5e_shampo_hd_per_wq(mdev, params, rqp); + err = mlx5e_rq_shampo_hd_info_alloc(rq, hd_per_wq, node); if (err) - goto err_shampo_hd; - err = mlx5e_rq_shampo_hd_info_alloc(rq, node); + goto err_shampo_hd_info_alloc; + + err = mlx5e_create_rq_hd_umr_mkey(mdev, hd_per_wq, + &rq->mpwqe.shampo->mkey); if (err) - goto err_shampo_info; + goto err_umr_mkey; + + rq->mpwqe.shampo->key = cpu_to_be32(rq->mpwqe.shampo->mkey); + rq->mpwqe.shampo->hd_per_wqe = + mlx5e_shampo_hd_per_wqe(mdev, params, rqp); + wq_size = BIT(MLX5_GET(wq, wqc, log_wq_sz)); + hd_pool_size = (rq->mpwqe.shampo->hd_per_wqe * wq_size) / + MLX5E_SHAMPO_WQ_HEADER_PER_PAGE; + + if (mlx5_rq_needs_separate_hd_pool(rq)) { + /* Separate page pool for shampo headers */ + struct page_pool_params pp_params = { }; + + pp_params.order = 0; + pp_params.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV; + pp_params.pool_size = hd_pool_size; + pp_params.nid = node; + pp_params.dev = rq->pdev; + pp_params.napi = rq->cq.napi; + pp_params.netdev = rq->netdev; + pp_params.dma_dir = rq->buff.map_dir; + pp_params.max_len = PAGE_SIZE; + + rq->hd_page_pool = page_pool_create(&pp_params); + if (IS_ERR(rq->hd_page_pool)) { + err = PTR_ERR(rq->hd_page_pool); + rq->hd_page_pool = NULL; + goto err_hds_page_pool; + } + } else { + /* Common page pool, reserve space for headers. */ + *pool_size += hd_pool_size; + rq->hd_page_pool = NULL; + } + + /* gro only data structures */ rq->hw_gro_data = kvzalloc_node(sizeof(*rq->hw_gro_data), GFP_KERNEL, node); if (!rq->hw_gro_data) { err = -ENOMEM; goto err_hw_gro_data; } - rq->mpwqe.shampo->key = - cpu_to_be32(rq->mpwqe.shampo->mkey); - rq->mpwqe.shampo->hd_per_wqe = - mlx5e_shampo_hd_per_wqe(mdev, params, rqp); - rq->mpwqe.shampo->pages_per_wq = - rq->mpwqe.shampo->hd_per_wq / MLX5E_SHAMPO_WQ_HEADER_PER_PAGE; - *pool_size += rq->mpwqe.shampo->pages_per_wq; + return 0; err_hw_gro_data: - mlx5e_rq_shampo_hd_info_free(rq); -err_shampo_info: + page_pool_destroy(rq->hd_page_pool); +err_hds_page_pool: mlx5_core_destroy_mkey(mdev, rq->mpwqe.shampo->mkey); -err_shampo_hd: - mlx5e_rq_shampo_hd_free(rq); -out: +err_umr_mkey: + mlx5e_rq_shampo_hd_info_free(rq); +err_shampo_hd_info_alloc: + kvfree(rq->mpwqe.shampo); return err; } @@ -810,9 +845,11 @@ static void mlx5e_rq_free_shampo(struct mlx5e_rq *rq) return; kvfree(rq->hw_gro_data); + if (rq->hd_page_pool != rq->page_pool) + page_pool_destroy(rq->hd_page_pool); mlx5e_rq_shampo_hd_info_free(rq); mlx5_core_destroy_mkey(rq->mdev, rq->mpwqe.shampo->mkey); - mlx5e_rq_shampo_hd_free(rq); + kvfree(rq->mpwqe.shampo); } static int mlx5e_alloc_rq(struct mlx5e_params *params, @@ -929,6 +966,11 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, pp_params.netdev = rq->netdev; pp_params.dma_dir = rq->buff.map_dir; pp_params.max_len = PAGE_SIZE; + pp_params.queue_idx = rq->ix; + + /* Shampo header data split allow for unreadable netmem */ + if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) + pp_params.flags |= PP_FLAG_ALLOW_UNREADABLE_NETMEM; /* page_pool can be used even when there is no rq->xdp_prog, * given page_pool does not handle DMA mapping there is no @@ -941,6 +983,8 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, rq->page_pool = NULL; goto err_free_by_rq_type; } + if (!rq->hd_page_pool) + rq->hd_page_pool = rq->page_pool; if (xdp_rxq_info_is_reg(&rq->xdp_rxq)) err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, MEM_TYPE_PAGE_POOL, rq->page_pool); @@ -4043,10 +4087,6 @@ static int set_feature_hw_gro(struct net_device *netdev, bool enable) if (enable) { new_params.packet_merge.type = MLX5E_PACKET_MERGE_SHAMPO; - new_params.packet_merge.shampo.match_criteria_type = - MLX5_RQC_SHAMPO_MATCH_CRITERIA_TYPE_EXTENDED; - new_params.packet_merge.shampo.alignment_granularity = - MLX5_RQC_SHAMPO_NO_MATCH_ALIGNMENT_GRANULARITY_STRIDE; } else if (new_params.packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) { new_params.packet_merge.type = MLX5E_PACKET_MERGE_NONE; } else { @@ -4373,6 +4413,7 @@ static netdev_features_t mlx5e_fix_uplink_rep_features(struct net_device *netdev static netdev_features_t mlx5e_fix_features(struct net_device *netdev, netdev_features_t features) { + struct netdev_config *cfg = netdev->cfg_pending; struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5e_vlan_table *vlan; struct mlx5e_params *params; @@ -4439,6 +4480,13 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev, } } + /* The header-data split ring param requires HW GRO to stay enabled. */ + if (cfg && cfg->hds_config == ETHTOOL_TCP_DATA_SPLIT_ENABLED && + !(features & NETIF_F_GRO_HW)) { + netdev_warn(netdev, "Keeping HW-GRO enabled, TCP header-data split depends on it\n"); + features |= NETIF_F_GRO_HW; + } + if (mlx5e_is_uplink_rep(priv)) { features = mlx5e_fix_uplink_rep_features(netdev, features); netdev->netns_immutable = true; @@ -5454,6 +5502,103 @@ static const struct netdev_stat_ops mlx5e_stat_ops = { .get_base_stats = mlx5e_get_base_stats, }; +struct mlx5_qmgmt_data { + struct mlx5e_channel *c; + struct mlx5e_channel_param cparam; +}; + +static int mlx5e_queue_mem_alloc(struct net_device *dev, void *newq, + int queue_index) +{ + struct mlx5_qmgmt_data *new = (struct mlx5_qmgmt_data *)newq; + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_channels *chs = &priv->channels; + struct mlx5e_params params = chs->params; + struct mlx5_core_dev *mdev; + int err; + + mutex_lock(&priv->state_lock); + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + err = -ENODEV; + goto unlock; + } + + if (queue_index >= chs->num) { + err = -ERANGE; + goto unlock; + } + + if (MLX5E_GET_PFLAG(&chs->params, MLX5E_PFLAG_TX_PORT_TS) || + chs->params.ptp_rx || + chs->params.xdp_prog || + priv->htb) { + netdev_err(priv->netdev, + "Cloning channels with Port/rx PTP, XDP or HTB is not supported\n"); + err = -EOPNOTSUPP; + goto unlock; + } + + mdev = mlx5_sd_ch_ix_get_dev(priv->mdev, queue_index); + err = mlx5e_build_channel_param(mdev, ¶ms, &new->cparam); + if (err) + goto unlock; + + err = mlx5e_open_channel(priv, queue_index, ¶ms, NULL, &new->c); +unlock: + mutex_unlock(&priv->state_lock); + return err; +} + +static void mlx5e_queue_mem_free(struct net_device *dev, void *mem) +{ + struct mlx5_qmgmt_data *data = (struct mlx5_qmgmt_data *)mem; + + /* not supposed to happen since mlx5e_queue_start never fails + * but this is how this should be implemented just in case + */ + if (data->c) + mlx5e_close_channel(data->c); +} + +static int mlx5e_queue_stop(struct net_device *dev, void *oldq, int queue_index) +{ + /* In mlx5 a txq cannot be simply stopped in isolation, only restarted. + * mlx5e_queue_start does not fail, we stop the old queue there. + * TODO: Improve this. + */ + return 0; +} + +static int mlx5e_queue_start(struct net_device *dev, void *newq, + int queue_index) +{ + struct mlx5_qmgmt_data *new = (struct mlx5_qmgmt_data *)newq; + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_channel *old; + + mutex_lock(&priv->state_lock); + + /* stop and close the old */ + old = priv->channels.c[queue_index]; + mlx5e_deactivate_priv_channels(priv); + /* close old before activating new, to avoid napi conflict */ + mlx5e_close_channel(old); + + /* start the new */ + priv->channels.c[queue_index] = new->c; + mlx5e_activate_priv_channels(priv); + mutex_unlock(&priv->state_lock); + return 0; +} + +static const struct netdev_queue_mgmt_ops mlx5e_queue_mgmt_ops = { + .ndo_queue_mem_size = sizeof(struct mlx5_qmgmt_data), + .ndo_queue_mem_alloc = mlx5e_queue_mem_alloc, + .ndo_queue_mem_free = mlx5e_queue_mem_free, + .ndo_queue_start = mlx5e_queue_start, + .ndo_queue_stop = mlx5e_queue_stop, +}; + static void mlx5e_build_nic_netdev(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); @@ -5464,6 +5609,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) SET_NETDEV_DEV(netdev, mdev->device); netdev->netdev_ops = &mlx5e_netdev_ops; + netdev->queue_mgmt_ops = &mlx5e_queue_mgmt_ops; netdev->xdp_metadata_ops = &mlx5e_xdp_metadata_ops; netdev->xsk_tx_metadata_ops = &mlx5e_xsk_tx_metadata_ops; netdev->request_ops_lock = true; @@ -5506,17 +5652,17 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) MLX5E_MPWRQ_UMR_MODE_ALIGNED)) netdev->vlan_features |= NETIF_F_LRO; + if (mlx5e_hw_gro_supported(mdev) && + mlx5e_check_fragmented_striding_rq_cap(mdev, PAGE_SHIFT, + MLX5E_MPWRQ_UMR_MODE_ALIGNED)) + netdev->vlan_features |= NETIF_F_GRO_HW; + netdev->hw_features = netdev->vlan_features; netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX; netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; netdev->hw_features |= NETIF_F_HW_VLAN_STAG_TX; - if (mlx5e_hw_gro_supported(mdev) && - mlx5e_check_fragmented_striding_rq_cap(mdev, PAGE_SHIFT, - MLX5E_MPWRQ_UMR_MODE_ALIGNED)) - netdev->hw_features |= NETIF_F_GRO_HW; - if (mlx5e_tunnel_any_tx_proto_supported(mdev)) { netdev->hw_enc_features |= NETIF_F_HW_CSUM; netdev->hw_enc_features |= NETIF_F_TSO; @@ -5595,6 +5741,8 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) netdev->priv_flags |= IFF_UNICAST_FLT; + netdev->netmem_tx = true; + netif_set_tso_max_size(netdev, GSO_MAX_SIZE); mlx5e_set_xdp_feature(netdev); mlx5e_set_netdev_dev_addr(netdev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 84b1ab8233b8..2bb32082bfcc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -273,33 +273,32 @@ static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq, #define MLX5E_PAGECNT_BIAS_MAX (PAGE_SIZE / 64) -static int mlx5e_page_alloc_fragmented(struct mlx5e_rq *rq, +static int mlx5e_page_alloc_fragmented(struct page_pool *pp, struct mlx5e_frag_page *frag_page) { - struct page *page; + netmem_ref netmem = page_pool_dev_alloc_netmems(pp); - page = page_pool_dev_alloc_pages(rq->page_pool); - if (unlikely(!page)) + if (unlikely(!netmem)) return -ENOMEM; - page_pool_fragment_page(page, MLX5E_PAGECNT_BIAS_MAX); + page_pool_fragment_netmem(netmem, MLX5E_PAGECNT_BIAS_MAX); *frag_page = (struct mlx5e_frag_page) { - .page = page, + .netmem = netmem, .frags = 0, }; return 0; } -static void mlx5e_page_release_fragmented(struct mlx5e_rq *rq, +static void mlx5e_page_release_fragmented(struct page_pool *pp, struct mlx5e_frag_page *frag_page) { u16 drain_count = MLX5E_PAGECNT_BIAS_MAX - frag_page->frags; - struct page *page = frag_page->page; + netmem_ref netmem = frag_page->netmem; - if (page_pool_unref_page(page, drain_count) == 0) - page_pool_put_unrefed_page(rq->page_pool, page, -1, true); + if (page_pool_unref_netmem(netmem, drain_count) == 0) + page_pool_put_unrefed_netmem(pp, netmem, -1, true); } static inline int mlx5e_get_rx_frag(struct mlx5e_rq *rq, @@ -313,7 +312,8 @@ static inline int mlx5e_get_rx_frag(struct mlx5e_rq *rq, * offset) should just use the new one without replenishing again * by themselves. */ - err = mlx5e_page_alloc_fragmented(rq, frag->frag_page); + err = mlx5e_page_alloc_fragmented(rq->page_pool, + frag->frag_page); return err; } @@ -332,7 +332,7 @@ static inline void mlx5e_put_rx_frag(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *frag) { if (mlx5e_frag_can_release(frag)) - mlx5e_page_release_fragmented(rq, frag->frag_page); + mlx5e_page_release_fragmented(rq->page_pool, frag->frag_page); } static inline struct mlx5e_wqe_frag_info *get_frag(struct mlx5e_rq *rq, u16 ix) @@ -358,7 +358,7 @@ static int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe_cyc *wqe, frag->flags &= ~BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); headroom = i == 0 ? rq->buff.headroom : 0; - addr = page_pool_get_dma_addr(frag->frag_page->page); + addr = page_pool_get_dma_addr_netmem(frag->frag_page->netmem); wqe->data[i].addr = cpu_to_be64(addr + frag->offset + headroom); } @@ -499,9 +499,10 @@ mlx5e_add_skb_shared_info_frag(struct mlx5e_rq *rq, struct skb_shared_info *sinf struct xdp_buff *xdp, struct mlx5e_frag_page *frag_page, u32 frag_offset, u32 len) { + netmem_ref netmem = frag_page->netmem; skb_frag_t *frag; - dma_addr_t addr = page_pool_get_dma_addr(frag_page->page); + dma_addr_t addr = page_pool_get_dma_addr_netmem(netmem); dma_sync_single_for_cpu(rq->pdev, addr + frag_offset, len, rq->buff.map_dir); if (!xdp_buff_has_frags(xdp)) { @@ -514,9 +515,9 @@ mlx5e_add_skb_shared_info_frag(struct mlx5e_rq *rq, struct skb_shared_info *sinf } frag = &sinfo->frags[sinfo->nr_frags++]; - skb_frag_fill_page_desc(frag, frag_page->page, frag_offset, len); + skb_frag_fill_netmem_desc(frag, netmem, frag_offset, len); - if (page_is_pfmemalloc(frag_page->page)) + if (netmem_is_pfmemalloc(netmem)) xdp_buff_set_frag_pfmemalloc(xdp); sinfo->xdp_frags_size += len; } @@ -527,27 +528,29 @@ mlx5e_add_skb_frag(struct mlx5e_rq *rq, struct sk_buff *skb, u32 frag_offset, u32 len, unsigned int truesize) { - dma_addr_t addr = page_pool_get_dma_addr(frag_page->page); + dma_addr_t addr = page_pool_get_dma_addr_netmem(frag_page->netmem); u8 next_frag = skb_shinfo(skb)->nr_frags; + netmem_ref netmem = frag_page->netmem; dma_sync_single_for_cpu(rq->pdev, addr + frag_offset, len, rq->buff.map_dir); - if (skb_can_coalesce(skb, next_frag, frag_page->page, frag_offset)) { + if (skb_can_coalesce_netmem(skb, next_frag, netmem, frag_offset)) { skb_coalesce_rx_frag(skb, next_frag - 1, len, truesize); - } else { - frag_page->frags++; - skb_add_rx_frag(skb, next_frag, frag_page->page, - frag_offset, len, truesize); + return; } + + frag_page->frags++; + skb_add_rx_frag_netmem(skb, next_frag, netmem, + frag_offset, len, truesize); } static inline void mlx5e_copy_skb_header(struct mlx5e_rq *rq, struct sk_buff *skb, - struct page *page, dma_addr_t addr, + netmem_ref netmem, dma_addr_t addr, int offset_from, int dma_offset, u32 headlen) { - const void *from = page_address(page) + offset_from; + const void *from = netmem_address(netmem) + offset_from; /* Aligning len to sizeof(long) optimizes memcpy performance */ unsigned int len = ALIGN(headlen, sizeof(long)); @@ -584,7 +587,8 @@ mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi) struct mlx5e_frag_page *frag_page; frag_page = &wi->alloc_units.frag_pages[i]; - mlx5e_page_release_fragmented(rq, frag_page); + mlx5e_page_release_fragmented(rq->page_pool, + frag_page); } } } @@ -679,12 +683,11 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq, struct mlx5e_frag_page *frag_page = mlx5e_shampo_hd_to_frag_page(rq, index); u64 addr; - err = mlx5e_page_alloc_fragmented(rq, frag_page); + err = mlx5e_page_alloc_fragmented(rq->hd_page_pool, frag_page); if (unlikely(err)) goto err_unmap; - - addr = page_pool_get_dma_addr(frag_page->page); + addr = page_pool_get_dma_addr_netmem(frag_page->netmem); for (int j = 0; j < MLX5E_SHAMPO_WQ_HEADER_PER_PAGE; j++) { header_offset = mlx5e_shampo_hd_offset(index++); @@ -715,7 +718,8 @@ err_unmap: if (!header_offset) { struct mlx5e_frag_page *frag_page = mlx5e_shampo_hd_to_frag_page(rq, index); - mlx5e_page_release_fragmented(rq, frag_page); + mlx5e_page_release_fragmented(rq->hd_page_pool, + frag_page); } } @@ -791,10 +795,11 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) for (i = 0; i < rq->mpwqe.pages_per_wqe; i++, frag_page++) { dma_addr_t addr; - err = mlx5e_page_alloc_fragmented(rq, frag_page); + err = mlx5e_page_alloc_fragmented(rq->page_pool, frag_page); if (unlikely(err)) goto err_unmap; - addr = page_pool_get_dma_addr(frag_page->page); + + addr = page_pool_get_dma_addr_netmem(frag_page->netmem); umr_wqe->inline_mtts[i] = (struct mlx5_mtt) { .ptag = cpu_to_be64(addr | MLX5_EN_WR), }; @@ -836,7 +841,7 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) err_unmap: while (--i >= 0) { frag_page--; - mlx5e_page_release_fragmented(rq, frag_page); + mlx5e_page_release_fragmented(rq->page_pool, frag_page); } bitmap_fill(wi->skip_release_bitmap, rq->mpwqe.pages_per_wqe); @@ -855,7 +860,7 @@ mlx5e_free_rx_shampo_hd_entry(struct mlx5e_rq *rq, u16 header_index) if (((header_index + 1) & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) == 0) { struct mlx5e_frag_page *frag_page = mlx5e_shampo_hd_to_frag_page(rq, header_index); - mlx5e_page_release_fragmented(rq, frag_page); + mlx5e_page_release_fragmented(rq->hd_page_pool, frag_page); } clear_bit(header_index, shampo->bitmap); } @@ -1100,6 +1105,8 @@ INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq) if (rq->page_pool) page_pool_nid_changed(rq->page_pool, numa_mem_id()); + if (rq->hd_page_pool) + page_pool_nid_changed(rq->hd_page_pool, numa_mem_id()); head = rq->mpwqe.actual_wq_head; i = missing; @@ -1212,7 +1219,7 @@ static void *mlx5e_shampo_get_packet_hd(struct mlx5e_rq *rq, u16 header_index) struct mlx5e_frag_page *frag_page = mlx5e_shampo_hd_to_frag_page(rq, header_index); u16 head_offset = mlx5e_shampo_hd_offset(header_index) + rq->buff.headroom; - return page_address(frag_page->page) + head_offset; + return netmem_address(frag_page->netmem) + head_offset; } static void mlx5e_shampo_update_ipv4_udp_hdr(struct mlx5e_rq *rq, struct iphdr *ipv4) @@ -1673,11 +1680,11 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi, dma_addr_t addr; u32 frag_size; - va = page_address(frag_page->page) + wi->offset; + va = netmem_address(frag_page->netmem) + wi->offset; data = va + rx_headroom; frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt); - addr = page_pool_get_dma_addr(frag_page->page); + addr = page_pool_get_dma_addr_netmem(frag_page->netmem); dma_sync_single_range_for_cpu(rq->pdev, addr, wi->offset, frag_size, rq->buff.map_dir); net_prefetch(data); @@ -1727,10 +1734,10 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi frag_page = wi->frag_page; - va = page_address(frag_page->page) + wi->offset; + va = netmem_address(frag_page->netmem) + wi->offset; frag_consumed_bytes = min_t(u32, frag_info->frag_size, cqe_bcnt); - addr = page_pool_get_dma_addr(frag_page->page); + addr = page_pool_get_dma_addr_netmem(frag_page->netmem); dma_sync_single_range_for_cpu(rq->pdev, addr, wi->offset, rq->buff.frame0_sz, rq->buff.map_dir); net_prefetchw(va); /* xdp_frame data area */ @@ -2003,12 +2010,14 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w if (prog) { /* area for bpf_xdp_[store|load]_bytes */ - net_prefetchw(page_address(frag_page->page) + frag_offset); - if (unlikely(mlx5e_page_alloc_fragmented(rq, &wi->linear_page))) { + net_prefetchw(netmem_address(frag_page->netmem) + frag_offset); + if (unlikely(mlx5e_page_alloc_fragmented(rq->page_pool, + &wi->linear_page))) { rq->stats->buff_alloc_err++; return NULL; } - va = page_address(wi->linear_page.page); + + va = netmem_address(wi->linear_page.netmem); net_prefetchw(va); /* xdp_frame data area */ linear_hr = XDP_PACKET_HEADROOM; linear_data_len = 0; @@ -2068,7 +2077,8 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w wi->linear_page.frags++; } - mlx5e_page_release_fragmented(rq, &wi->linear_page); + mlx5e_page_release_fragmented(rq->page_pool, + &wi->linear_page); return NULL; /* page/packet was consumed by XDP */ } @@ -2077,13 +2087,14 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w mxbuf->xdp.data - mxbuf->xdp.data_hard_start, 0, mxbuf->xdp.data - mxbuf->xdp.data_meta); if (unlikely(!skb)) { - mlx5e_page_release_fragmented(rq, &wi->linear_page); + mlx5e_page_release_fragmented(rq->page_pool, + &wi->linear_page); return NULL; } skb_mark_for_recycle(skb); wi->linear_page.frags++; - mlx5e_page_release_fragmented(rq, &wi->linear_page); + mlx5e_page_release_fragmented(rq->page_pool, &wi->linear_page); if (xdp_buff_has_frags(&mxbuf->xdp)) { struct mlx5e_frag_page *pagep; @@ -2117,8 +2128,8 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w while (++pagep < frag_page); } /* copy header */ - addr = page_pool_get_dma_addr(head_page->page); - mlx5e_copy_skb_header(rq, skb, head_page->page, addr, + addr = page_pool_get_dma_addr_netmem(head_page->netmem); + mlx5e_copy_skb_header(rq, skb, head_page->netmem, addr, head_offset, head_offset, headlen); /* skb linear part was allocated with headlen and aligned to long */ skb->tail += headlen; @@ -2148,11 +2159,11 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, return NULL; } - va = page_address(frag_page->page) + head_offset; + va = netmem_address(frag_page->netmem) + head_offset; data = va + rx_headroom; frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt); - addr = page_pool_get_dma_addr(frag_page->page); + addr = page_pool_get_dma_addr_netmem(frag_page->netmem); dma_sync_single_range_for_cpu(rq->pdev, addr, head_offset, frag_size, rq->buff.map_dir); net_prefetch(data); @@ -2191,16 +2202,19 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, struct mlx5_cqe64 *cqe, u16 header_index) { struct mlx5e_frag_page *frag_page = mlx5e_shampo_hd_to_frag_page(rq, header_index); - dma_addr_t page_dma_addr = page_pool_get_dma_addr(frag_page->page); u16 head_offset = mlx5e_shampo_hd_offset(header_index); - dma_addr_t dma_addr = page_dma_addr + head_offset; u16 head_size = cqe->shampo.header_size; u16 rx_headroom = rq->buff.headroom; struct sk_buff *skb = NULL; + dma_addr_t page_dma_addr; + dma_addr_t dma_addr; void *hdr, *data; u32 frag_size; - hdr = page_address(frag_page->page) + head_offset; + page_dma_addr = page_pool_get_dma_addr_netmem(frag_page->netmem); + dma_addr = page_dma_addr + head_offset; + + hdr = netmem_address(frag_page->netmem) + head_offset; data = hdr + rx_headroom; frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + head_size); @@ -2225,7 +2239,7 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, } net_prefetchw(skb->data); - mlx5e_copy_skb_header(rq, skb, frag_page->page, dma_addr, + mlx5e_copy_skb_header(rq, skb, frag_page->netmem, dma_addr, head_offset + rx_headroom, rx_headroom, head_size); /* skb linear part was allocated with headlen and aligned to long */ @@ -2319,11 +2333,23 @@ static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cq } if (!*skb) { - if (likely(head_size)) + if (likely(head_size)) { *skb = mlx5e_skb_from_cqe_shampo(rq, wi, cqe, header_index); - else - *skb = mlx5e_skb_from_cqe_mpwrq_nonlinear(rq, wi, cqe, cqe_bcnt, - data_offset, page_idx); + } else { + struct mlx5e_frag_page *frag_page; + + frag_page = &wi->alloc_units.frag_pages[page_idx]; + /* Drop packets with header in unreadable data area to + * prevent the kernel from touching it. + */ + if (unlikely(netmem_is_net_iov(frag_page->netmem))) + goto free_hd_entry; + *skb = mlx5e_skb_from_cqe_mpwrq_nonlinear(rq, wi, cqe, + cqe_bcnt, + data_offset, + page_idx); + } + if (unlikely(!*skb)) goto free_hd_entry; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 5520524c93bf..9508968cb300 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3873,20 +3873,26 @@ static inline int __must_check skb_put_padto(struct sk_buff *skb, unsigned int l bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i) __must_check; -static inline bool skb_can_coalesce(struct sk_buff *skb, int i, - const struct page *page, int off) +static inline bool skb_can_coalesce_netmem(struct sk_buff *skb, int i, + netmem_ref netmem, int off) { if (skb_zcopy(skb)) return false; if (i) { const skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1]; - return page == skb_frag_page(frag) && + return netmem == skb_frag_netmem(frag) && off == skb_frag_off(frag) + skb_frag_size(frag); } return false; } +static inline bool skb_can_coalesce(struct sk_buff *skb, int i, + const struct page *page, int off) +{ + return skb_can_coalesce_netmem(skb, i, page_to_netmem(page), off); +} + static inline int __skb_linearize(struct sk_buff *skb) { return __pskb_pull_tail(skb, skb->data_len) ? 0 : -ENOMEM; diff --git a/include/net/netmem.h b/include/net/netmem.h index 850869b45b45..7a1dafa3f080 100644 --- a/include/net/netmem.h +++ b/include/net/netmem.h @@ -139,7 +139,7 @@ static inline netmem_ref net_iov_to_netmem(struct net_iov *niov) return (__force netmem_ref)((unsigned long)niov | NET_IOV); } -static inline netmem_ref page_to_netmem(struct page *page) +static inline netmem_ref page_to_netmem(const struct page *page) { return (__force netmem_ref)page; } diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h index 93f2c31baf9b..773fc65780b5 100644 --- a/include/net/page_pool/helpers.h +++ b/include/net/page_pool/helpers.h @@ -153,6 +153,13 @@ static inline netmem_ref page_pool_dev_alloc_netmem(struct page_pool *pool, return page_pool_alloc_netmem(pool, offset, size, gfp); } +static inline netmem_ref page_pool_dev_alloc_netmems(struct page_pool *pool) +{ + gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN; + + return page_pool_alloc_netmems(pool, gfp); +} + static inline struct page *page_pool_alloc(struct page_pool *pool, unsigned int *offset, unsigned int *size, gfp_t gfp) |