summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDragos Tatulea <dtatulea@nvidia.com>2024-11-07 21:43:53 +0200
committerJakub Kicinski <kuba@kernel.org>2024-11-11 19:28:18 -0800
commit8a0ee54027b1fbccda3f2683dafec9b7216993a4 (patch)
tree86d0f79251b5ddc0302287e00d53a3117260d66c
parentbe034baba83e2a80a0b2c0f24c08547b6eedc79a (diff)
net/mlx5e: SHAMPO, Simplify UMR allocation for headers
Allocating page fragments for header data split is currently more complicated than it should be. That's because the number of KSM entries allocated is not aligned to the number of headers per page. This leads to having leftovers in the next allocation which require additional accounting and needlessly complicated code. This patch aligns (down) the number of KSM entries in the UMR WQE to the number of headers per page by: 1) Aligning the max number of entries allocated per UMR WQE (max_ksm_entries) to MLX5E_SHAMPO_WQ_HEADER_PER_PAGE. 2) Aligning the total number of free headers to MLX5E_SHAMPO_WQ_HEADER_PER_PAGE. ... and then it drops the extra accounting code from mlx5e_build_shampo_hd_umr(). Although the number of entries allocated per UMR WQE is slightly smaller due to aligning down, no performance impact was observed. Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com> Signed-off-by: Tariq Toukan <tariqt@nvidia.com> Link: https://patch.msgid.link/20241107194357.683732-9-tariqt@nvidia.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rx.c29
2 files changed, 12 insertions, 18 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 58f3df784ded..4449a57ba5b2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -633,7 +633,6 @@ struct mlx5e_shampo_hd {
u16 pi;
u16 ci;
__be32 key;
- u64 last_addr;
};
struct mlx5e_hw_gro_data {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index d81083f4f316..e044e5d11f05 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -648,30 +648,26 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq,
u16 ksm_entries, u16 index)
{
struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo;
- u16 entries, pi, header_offset, err, wqe_bbs, new_entries;
+ u16 pi, header_offset, err, wqe_bbs;
u32 lkey = rq->mdev->mlx5e_res.hw_objs.mkey;
u16 page_index = shampo->curr_page_index;
struct mlx5e_frag_page *frag_page;
- u64 addr = shampo->last_addr;
struct mlx5e_dma_info *dma_info;
struct mlx5e_umr_wqe *umr_wqe;
int headroom, i;
+ u64 addr = 0;
headroom = rq->buff.headroom;
- new_entries = ksm_entries - (shampo->pi & (MLX5_UMR_KSM_NUM_ENTRIES_ALIGNMENT - 1));
- entries = ALIGN(ksm_entries, MLX5_UMR_KSM_NUM_ENTRIES_ALIGNMENT);
- wqe_bbs = MLX5E_KSM_UMR_WQEBBS(entries);
+ wqe_bbs = MLX5E_KSM_UMR_WQEBBS(ksm_entries);
pi = mlx5e_icosq_get_next_pi(sq, wqe_bbs);
umr_wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi);
- build_ksm_umr(sq, umr_wqe, shampo->key, index, entries);
+ build_ksm_umr(sq, umr_wqe, shampo->key, index, ksm_entries);
frag_page = &shampo->pages[page_index];
- for (i = 0; i < entries; i++, index++) {
+ WARN_ON_ONCE(ksm_entries & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1));
+ for (i = 0; i < ksm_entries; i++, index++) {
dma_info = &shampo->info[index];
- if (i >= ksm_entries || (index < shampo->pi && shampo->pi - index <
- MLX5_UMR_KSM_NUM_ENTRIES_ALIGNMENT))
- goto update_ksm;
header_offset = (index & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) <<
MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE;
if (!(header_offset & (PAGE_SIZE - 1))) {
@@ -691,7 +687,6 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq,
dma_info->frag_page = frag_page;
}
-update_ksm:
umr_wqe->inline_ksms[i] = (struct mlx5_ksm) {
.key = cpu_to_be32(lkey),
.va = cpu_to_be64(dma_info->addr + headroom),
@@ -701,12 +696,11 @@ update_ksm:
sq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) {
.wqe_type = MLX5E_ICOSQ_WQE_SHAMPO_HD_UMR,
.num_wqebbs = wqe_bbs,
- .shampo.len = new_entries,
+ .shampo.len = ksm_entries,
};
- shampo->pi = (shampo->pi + new_entries) & (shampo->hd_per_wq - 1);
+ shampo->pi = (shampo->pi + ksm_entries) & (shampo->hd_per_wq - 1);
shampo->curr_page_index = page_index;
- shampo->last_addr = addr;
sq->pc += wqe_bbs;
sq->doorbell_cseg = &umr_wqe->ctrl;
@@ -731,7 +725,8 @@ static int mlx5e_alloc_rx_hd_mpwqe(struct mlx5e_rq *rq)
struct mlx5e_icosq *sq = rq->icosq;
int i, err, max_ksm_entries, len;
- max_ksm_entries = MLX5E_MAX_KSM_PER_WQE(rq->mdev);
+ max_ksm_entries = ALIGN_DOWN(MLX5E_MAX_KSM_PER_WQE(rq->mdev),
+ MLX5E_SHAMPO_WQ_HEADER_PER_PAGE);
ksm_entries = bitmap_find_window(shampo->bitmap,
shampo->hd_per_wqe,
shampo->hd_per_wq, shampo->pi);
@@ -739,8 +734,8 @@ static int mlx5e_alloc_rx_hd_mpwqe(struct mlx5e_rq *rq)
if (!ksm_entries)
return 0;
- ksm_entries += (shampo->pi & (MLX5_UMR_KSM_NUM_ENTRIES_ALIGNMENT - 1));
- index = ALIGN_DOWN(shampo->pi, MLX5_UMR_KSM_NUM_ENTRIES_ALIGNMENT);
+ /* pi is aligned to MLX5E_SHAMPO_WQ_HEADER_PER_PAGE */
+ index = shampo->pi;
entries_before = shampo->hd_per_wq - index;
if (unlikely(entries_before < ksm_entries))