diff options
author | Or Har-Toov <ohartoov@nvidia.com> | 2025-08-13 15:39:56 +0300 |
---|---|---|
committer | Jason Gunthorpe <jgg@nvidia.com> | 2025-08-25 14:42:19 -0300 |
commit | 1a7c18c485bf17ef408d5ebb7f83e1f8ef329585 (patch) | |
tree | 40bce48b5dc258f09769e521e7e7f6ee02927691 | |
parent | d9e6e85b7beb2aeb8defac2f705b23532ddb25d4 (diff) |
RDMA/mlx5: Better estimate max_qp_wr to reflect WQE count
The mlx5 driver currently derives max_qp_wr directly from the
log_max_qp_sz HCA capability:
props->max_qp_wr = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz);
However, this value represents the number of WQEs in units of Basic
Blocks (see MLX5_SEND_WQE_BB), not actual number of WQEs. Since the size
of a WQE can vary depending on transport type and features (e.g., atomic
operations, UMR, LSO), the actual number of WQEs can be significantly
smaller than the WQEBB count suggests.
This patch introduces a conservative estimation of the worst-case WQE size
— considering largest segments possible with 1 SGE and no inline data or
special features. It uses this to derive a more accurate max_qp_wr value.
Fixes: 938fe83c8dcb ("net/mlx5_core: New device capabilities handling")
Link: https://patch.msgid.link/r/7d992c9831c997ed5c33d30973406dc2dcaf5e89.1755088725.git.leon@kernel.org
Reported-by: Chuck Lever <cel@kernel.org>
Closes: https://lore.kernel.org/all/20250506142202.GJ2260621@ziepe.ca/
Signed-off-by: Or Har-Toov <ohartoov@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
-rw-r--r-- | drivers/infiniband/hw/mlx5/main.c | 48 |
1 files changed, 47 insertions, 1 deletions
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 505349c68e79..de5339170876 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -13,6 +13,7 @@ #include <linux/dma-mapping.h> #include <linux/slab.h> #include <linux/bitmap.h> +#include <linux/log2.h> #include <linux/sched.h> #include <linux/sched/mm.h> #include <linux/sched/task.h> @@ -883,6 +884,51 @@ static void fill_esw_mgr_reg_c0(struct mlx5_core_dev *mdev, resp->reg_c0.mask = mlx5_eswitch_get_vport_metadata_mask(); } +/* + * Calculate maximum SQ overhead across all QP types. + * Other QP types (REG_UMR, UC, RC, UD/SMI/GSI, XRC_TGT) + * have smaller overhead than the types calculated below, + * so they are implicitly included. + */ +static u32 mlx5_ib_calc_max_sq_overhead(void) +{ + u32 max_overhead_xrc, overhead_ud_lso, a, b; + + /* XRC_INI */ + max_overhead_xrc = sizeof(struct mlx5_wqe_xrc_seg); + max_overhead_xrc += sizeof(struct mlx5_wqe_ctrl_seg); + a = sizeof(struct mlx5_wqe_atomic_seg) + + sizeof(struct mlx5_wqe_raddr_seg); + b = sizeof(struct mlx5_wqe_umr_ctrl_seg) + + sizeof(struct mlx5_mkey_seg) + + MLX5_IB_SQ_UMR_INLINE_THRESHOLD / MLX5_IB_UMR_OCTOWORD; + max_overhead_xrc += max(a, b); + + /* UD with LSO */ + overhead_ud_lso = sizeof(struct mlx5_wqe_ctrl_seg); + overhead_ud_lso += sizeof(struct mlx5_wqe_eth_pad); + overhead_ud_lso += sizeof(struct mlx5_wqe_eth_seg); + overhead_ud_lso += sizeof(struct mlx5_wqe_datagram_seg); + + return max(max_overhead_xrc, overhead_ud_lso); +} + +static u32 mlx5_ib_calc_max_qp_wr(struct mlx5_ib_dev *dev) +{ + struct mlx5_core_dev *mdev = dev->mdev; + u32 max_wqe_bb_units = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz); + u32 max_wqe_size; + /* max QP overhead + 1 SGE, no inline, no special features */ + max_wqe_size = mlx5_ib_calc_max_sq_overhead() + + sizeof(struct mlx5_wqe_data_seg); + + max_wqe_size = roundup_pow_of_two(max_wqe_size); + + max_wqe_size = ALIGN(max_wqe_size, MLX5_SEND_WQE_BB); + + return (max_wqe_bb_units * MLX5_SEND_WQE_BB) / max_wqe_size; +} + static int mlx5_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props, struct ib_udata *uhw) @@ -1041,7 +1087,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->max_mr_size = ~0ull; props->page_size_cap = ~(min_page_size - 1); props->max_qp = 1 << MLX5_CAP_GEN(mdev, log_max_qp); - props->max_qp_wr = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz); + props->max_qp_wr = mlx5_ib_calc_max_qp_wr(dev); max_rq_sg = MLX5_CAP_GEN(mdev, max_wqe_sz_rq) / sizeof(struct mlx5_wqe_data_seg); max_sq_desc = min_t(int, MLX5_CAP_GEN(mdev, max_wqe_sz_sq), 512); |