diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-05-01 09:15:05 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-05-01 09:15:05 -0700 |
commit | f34b2cf17825d69ae1e227871059ab18c2f57817 (patch) | |
tree | 501d035f600f88e56cb1dcfa299af279a4f7147d /drivers/infiniband/hw/mlx5/devx.c | |
parent | 9f67672a817ec046f7554a885f0fe0d60e1bf99f (diff) | |
parent | 6da7bda36388ae00822f732c11febfe2ebbb5544 (diff) |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe:
"This is significantly bug fixes and general cleanups. The noteworthy
new features are fairly small:
- XRC support for HNS and improves RQ operations
- Bug fixes and updates for hns, mlx5, bnxt_re, hfi1, i40iw, rxe, siw
and qib
- Quite a few general cleanups on spelling, error handling, static
checker detections, etc
- Increase the number of device ports supported beyond 255. High port
count software switches now exist
- Several bug fixes for rtrs
- mlx5 Device Memory support for host controlled atomics
- Report SRQ tables through to rdma-tool"
* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (145 commits)
IB/qib: Remove redundant assignment to ret
RDMA/nldev: Add copy-on-fork attribute to get sys command
RDMA/bnxt_re: Fix a double free in bnxt_qplib_alloc_res
RDMA/siw: Fix a use after free in siw_alloc_mr
IB/hfi1: Remove redundant variable rcd
RDMA/nldev: Add QP numbers to SRQ information
RDMA/nldev: Return SRQ information
RDMA/restrack: Add support to get resource tracking for SRQ
RDMA/nldev: Return context information
RDMA/core: Add CM to restrack after successful attachment to a device
RDMA/cma: Skip device which doesn't support CM
RDMA/rxe: Fix a bug in rxe_fill_ip_info()
RDMA/mlx5: Expose private query port
RDMA/mlx4: Remove an unused variable
RDMA/mlx5: Fix type assignment for ICM DM
IB/mlx5: Set right RoCE l3 type and roce version while deleting GID
RDMA/i40iw: Fix error unwinding when i40iw_hmc_sd_one fails
RDMA/cxgb4: add missing qpid increment
IB/ipoib: Remove unnecessary struct declaration
RDMA/bnxt_re: Get rid of custom module reference counting
...
Diffstat (limited to 'drivers/infiniband/hw/mlx5/devx.c')
-rw-r--r-- | drivers/infiniband/hw/mlx5/devx.c | 64 |
1 files changed, 54 insertions, 10 deletions
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 07b8350929cd..a0b677accd96 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -2185,27 +2185,69 @@ static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext, return 0; } +static unsigned int devx_umem_find_best_pgsize(struct ib_umem *umem, + unsigned long pgsz_bitmap) +{ + unsigned long page_size; + + /* Don't bother checking larger page sizes as offset must be zero and + * total DEVX umem length must be equal to total umem length. + */ + pgsz_bitmap &= GENMASK_ULL(max_t(u64, order_base_2(umem->length), + PAGE_SHIFT), + MLX5_ADAPTER_PAGE_SHIFT); + if (!pgsz_bitmap) + return 0; + + page_size = ib_umem_find_best_pgoff(umem, pgsz_bitmap, U64_MAX); + if (!page_size) + return 0; + + /* If the page_size is less than the CPU page size then we can use the + * offset and create a umem which is a subset of the page list. + * For larger page sizes we can't be sure the DMA list reflects the + * VA so we must ensure that the umem extent is exactly equal to the + * page list. Reduce the page size until one of these cases is true. + */ + while ((ib_umem_dma_offset(umem, page_size) != 0 || + (umem->length % page_size) != 0) && + page_size > PAGE_SIZE) + page_size /= 2; + + return page_size; +} + static int devx_umem_reg_cmd_alloc(struct mlx5_ib_dev *dev, struct uverbs_attr_bundle *attrs, struct devx_umem *obj, struct devx_umem_reg_cmd *cmd) { + unsigned long pgsz_bitmap; unsigned int page_size; __be64 *mtt; void *umem; + int ret; /* - * We don't know what the user intends to use this umem for, but the HW - * restrictions must be met. MR, doorbell records, QP, WQ and CQ all - * have different requirements. Since we have no idea how to sort this - * out, only support PAGE_SIZE with the expectation that userspace will - * provide the necessary alignments inside the known PAGE_SIZE and that - * FW will check everything. + * If the user does not pass in pgsz_bitmap then the user promises not + * to use umem_offset!=0 in any commands that allocate on top of the + * umem. + * + * If the user wants to use a umem_offset then it must pass in + * pgsz_bitmap which guides the maximum page size and thus maximum + * object alignment inside the umem. See the PRM. + * + * Users are not allowed to use IOVA here, mkeys are not supported on + * umem. */ - page_size = ib_umem_find_best_pgoff( - obj->umem, PAGE_SIZE, - __mlx5_page_offset_to_bitmask(__mlx5_bit_sz(umem, page_offset), - 0)); + ret = uverbs_get_const_default(&pgsz_bitmap, attrs, + MLX5_IB_ATTR_DEVX_UMEM_REG_PGSZ_BITMAP, + GENMASK_ULL(63, + min(PAGE_SHIFT, MLX5_ADAPTER_PAGE_SHIFT))); + if (ret) + return ret; + + page_size = devx_umem_find_best_pgsize(obj->umem, pgsz_bitmap); if (!page_size) return -EINVAL; @@ -2791,6 +2833,8 @@ DECLARE_UVERBS_NAMED_METHOD( UA_MANDATORY), UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS, enum ib_access_flags), + UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_PGSZ_BITMAP, + u64), UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID, UVERBS_ATTR_TYPE(u32), UA_MANDATORY)); |