From 26e990badde40b2fb824bfa3cb9d4288a79584bc Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sat, 3 Oct 2020 20:20:06 -0300 Subject: RDMA: Check attr_mask during modify_qp Each driver should check that it can support the provided attr_mask during modify_qp. IB_USER_VERBS_EX_CMD_MODIFY_QP was being used to block modify_qp_ex because the driver didn't check RATE_LIMIT. Link: https://lore.kernel.org/r/6-v1-caa70ba3d1ab+1436e-ucmd_mask_jgg@nvidia.com Signed-off-by: Jason Gunthorpe --- include/rdma/ib_verbs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/rdma') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 9bf6c319a670..0f9ce27bedcb 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1234,6 +1234,8 @@ enum ib_qp_attr_mask { IB_QP_RESERVED3 = (1<<23), IB_QP_RESERVED4 = (1<<24), IB_QP_RATE_LIMIT = (1<<25), + + IB_QP_ATTR_STANDARD_BITS = GENMASK(20, 0), }; enum ib_qp_state { -- cgit From bd2a40cc2463766ed1a55d94a4ccbdcd621da323 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sat, 3 Oct 2020 20:20:09 -0300 Subject: RDMA/core Remove uverbs_ex_cmd_mask No driver sets it, and the core code sets a maximum mask, simply remove it. Disabled operations are now handled either by having a NULL ops pointer, or by having the common driver callbacks check for unsupported extended attributes. Link: https://lore.kernel.org/r/9-v1-caa70ba3d1ab+1436e-ucmd_mask_jgg@nvidia.com Signed-off-by: Jason Gunthorpe --- include/rdma/ib_verbs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/rdma') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 0f9ce27bedcb..c7c6b06171e9 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2667,7 +2667,6 @@ struct ib_device { const struct attribute_group *groups[3]; u64 uverbs_cmd_mask; - u64 uverbs_ex_cmd_mask; char node_desc[IB_DEVICE_NODE_DESC_MAX]; __be64 node_guid; -- cgit From 676a80adba0131e1603ef3de5f73a19a0d3d0e65 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sat, 3 Oct 2020 20:20:11 -0300 Subject: RDMA: Remove AH from uverbs_cmd_mask Drivers that need a uverbs AH should instead set the create_user_ah() op similar to reg_user_mr(). MODIFY_AH and QUERY_AH cmds were never implemented so are just deleted. Link: https://lore.kernel.org/r/11-v1-caa70ba3d1ab+1436e-ucmd_mask_jgg@nvidia.com Signed-off-by: Jason Gunthorpe --- include/rdma/ib_verbs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/rdma') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index c7c6b06171e9..9420827d2421 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2403,6 +2403,8 @@ struct ib_device_ops { int (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata); int (*create_ah)(struct ib_ah *ah, struct rdma_ah_init_attr *attr, struct ib_udata *udata); + int (*create_user_ah)(struct ib_ah *ah, struct rdma_ah_init_attr *attr, + struct ib_udata *udata); int (*modify_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); int (*query_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); int (*destroy_ah)(struct ib_ah *ah, u32 flags); -- cgit From bfb972c5e1cba88c93912f271ed5ecc114e31431 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 26 Oct 2020 17:15:39 +0100 Subject: IB/verbs: avoid nested container_of() Nested container_of() calls work correctly but cause a warning when building with W=2. Invoking it from an inline function like in drivers/infiniband/hw/mlx5/mlx5_ib.h means we get hundreds of warnings like: include/linux/kernel.h:852:8: warning: declaration of '__mptr' shadows a previous local [-Wshadow] 852 | void *__mptr = (void *)(ptr); \ | ^~~~~~ include/rdma/uverbs_ioctl.h:651:11: note: in expansion of macro 'container_of' 651 | (udata ? container_of(container_of(udata, struct uverbs_attr_bundle, \ | ^~~~~~~~~~~~ include/rdma/uverbs_ioctl.h:651:24: note: in expansion of macro 'container_of' 651 | (udata ? container_of(container_of(udata, struct uverbs_attr_bundle, \ | ^~~~~~~~~~~~ drivers/infiniband/hw/mthca/mthca_qp.c:564:35: note: in expansion of macro 'rdma_udata_to_drv_context' 564 | struct mthca_ucontext *context = rdma_udata_to_drv_context( | ^~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/kernel.h:852:8: note: shadowed declaration is here 852 | void *__mptr = (void *)(ptr); \ | ^~~~~~ include/rdma/uverbs_ioctl.h:651:11: note: in expansion of macro 'container_of' 651 | (udata ? container_of(container_of(udata, struct uverbs_attr_bundle, \ | ^~~~~~~~~~~~ drivers/infiniband/hw/mthca/mthca_qp.c:564:35: note: in expansion of macro 'rdma_udata_to_drv_context' 564 | struct mthca_ucontext *context = rdma_udata_to_drv_context( | ^~~~~~~~~~~~~~~~~~~~~~~~~ In file included from : include/linux/kernel.h:852:8: warning: declaration of '__mptr' shadows a previous local [-Wshadow] 852 | void *__mptr = (void *)(ptr); \ | ^~~~~~ Rewrite the macro to use an inline function internally, which makes it more readable and reduces the amount of useless output from make W=2. Fixes: 730623f4a56f ("IB/verbs: Add helper function rdma_udata_to_drv_context") Link: https://lore.kernel.org/r/20201026161549.3709175-1-arnd@kernel.org Signed-off-by: Arnd Bergmann Signed-off-by: Jason Gunthorpe --- include/rdma/uverbs_ioctl.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'include/rdma') diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index b00270c72740..bf167ef6c688 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -647,12 +647,15 @@ static inline bool uverbs_attr_is_valid(const struct uverbs_attr_bundle *attrs_b * 'ucontext'. * */ -#define rdma_udata_to_drv_context(udata, drv_dev_struct, member) \ - (udata ? container_of(container_of(udata, struct uverbs_attr_bundle, \ - driver_udata) \ - ->context, \ - drv_dev_struct, member) : \ - (drv_dev_struct *)NULL) +static inline struct uverbs_attr_bundle * +rdma_udata_to_uverbs_attr_bundle(struct ib_udata *udata) +{ + return container_of(udata, struct uverbs_attr_bundle, driver_udata); +} + +#define rdma_udata_to_drv_context(udata, drv_dev_struct, member) \ + (udata ? container_of(rdma_udata_to_uverbs_attr_bundle(udata)->context, \ + drv_dev_struct, member) : (drv_dev_struct *)NULL) #define IS_UVERBS_COPY_ERR(_ret) ((_ret) && (_ret) != -ENOENT) -- cgit From efa968ee20248ebf8da8542f21d5d2811e86392f Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 4 Nov 2020 16:45:55 +0200 Subject: RDMA/core: Postpone uobject cleanup on failure till FD close Remove the ib_is_destroyable_retryable() concept. The idea here was to allow the drivers to forcibly clean the HW object even if they otherwise didn't want to (eg because of usecnt). This was an attempt to clean up in a world where drivers were not allowed to fail HW object destruction. Now that we are going back to allowing HW objects to fail destroy this doesn't make sense. Instead if a uobject's HW object can't be destroyed it is left on the uobject list and it is up to uverbs_destroy_ufile_hw() to clean it. Multiple passes over the uobject list allow hidden dependencies to be resolved. If that fails the HW driver is broken, throw a WARN_ON and leak the HW object memory. All the other tricky failure paths (eg on creation error unwind) have already been updated to this new model. Link: https://lore.kernel.org/r/20201104144556.3809085-2-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/rdma/ib_verbs.h | 44 ++------------------------------------------ 1 file changed, 2 insertions(+), 42 deletions(-) (limited to 'include/rdma') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 9420827d2421..7e330f4a6d33 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1471,6 +1471,8 @@ enum rdma_remove_reason { RDMA_REMOVE_DRIVER_REMOVE, /* uobj is being cleaned-up before being committed */ RDMA_REMOVE_ABORT, + /* The driver failed to destroy the uobject and is being disconnected */ + RDMA_REMOVE_DRIVER_FAILURE, }; struct ib_rdmacg_object { @@ -1483,8 +1485,6 @@ struct ib_ucontext { struct ib_device *device; struct ib_uverbs_file *ufile; - bool cleanup_retryable; - struct ib_rdmacg_object cg_obj; /* * Implementation details of the RDMA core, don't use in drivers: @@ -2903,46 +2903,6 @@ static inline bool ib_is_udata_cleared(struct ib_udata *udata, return ib_is_buffer_cleared(udata->inbuf + offset, len); } -/** - * ib_is_destroy_retryable - Check whether the uobject destruction - * is retryable. - * @ret: The initial destruction return code - * @why: remove reason - * @uobj: The uobject that is destroyed - * - * This function is a helper function that IB layer and low-level drivers - * can use to consider whether the destruction of the given uobject is - * retry-able. - * It checks the original return code, if it wasn't success the destruction - * is retryable according to the ucontext state (i.e. cleanup_retryable) and - * the remove reason. (i.e. why). - * Must be called with the object locked for destroy. - */ -static inline bool ib_is_destroy_retryable(int ret, enum rdma_remove_reason why, - struct ib_uobject *uobj) -{ - return ret && (why == RDMA_REMOVE_DESTROY || - uobj->context->cleanup_retryable); -} - -/** - * ib_destroy_usecnt - Called during destruction to check the usecnt - * @usecnt: The usecnt atomic - * @why: remove reason - * @uobj: The uobject that is destroyed - * - * Non-zero usecnts will block destruction unless destruction was triggered by - * a ucontext cleanup. - */ -static inline int ib_destroy_usecnt(atomic_t *usecnt, - enum rdma_remove_reason why, - struct ib_uobject *uobj) -{ - if (atomic_read(usecnt) && ib_is_destroy_retryable(-EBUSY, why, uobj)) - return -EBUSY; - return 0; -} - /** * ib_modify_qp_is_ok - Check that the supplied attribute mask * contains all required attributes and no attributes not allowed for -- cgit From c5633a72a1b8a2740bdb1495eab010f1124fd5ee Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 4 Nov 2020 16:45:56 +0200 Subject: RDMA/core: Make FD destroy callback void All FD object destroy implementations return 0, so declare this callback void. Link: https://lore.kernel.org/r/20201104144556.3809085-3-leon@kernel.org Reviewed-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/rdma/uverbs_types.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/rdma') diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h index 06db27e35f40..a27e9fb4903f 100644 --- a/include/rdma/uverbs_types.h +++ b/include/rdma/uverbs_types.h @@ -138,8 +138,8 @@ struct uverbs_obj_fd_type { * because the driver is removed or the FD is closed. */ struct uverbs_obj_type type; - int (*destroy_object)(struct ib_uobject *uobj, - enum rdma_remove_reason why); + void (*destroy_object)(struct ib_uobject *uobj, + enum rdma_remove_reason why); const struct file_operations *fops; const char *name; int flags; -- cgit From 2af29468e3b3793b49f6c4385d2cabcea43fe076 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 6 Nov 2020 19:19:37 +0100 Subject: RDMA/core: Remove ib_dma_{alloc,free}_coherent These two functions are entirely unused. Link: https://lore.kernel.org/r/20201106181941.1878556-7-hch@lst.de Signed-off-by: Christoph Hellwig Signed-off-by: Jason Gunthorpe --- include/rdma/ib_verbs.h | 29 ----------------------------- 1 file changed, 29 deletions(-) (limited to 'include/rdma') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 7e330f4a6d33..a0f9b1c133e4 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -4061,35 +4061,6 @@ static inline void ib_dma_sync_single_for_device(struct ib_device *dev, dma_sync_single_for_device(dev->dma_device, addr, size, dir); } -/** - * ib_dma_alloc_coherent - Allocate memory and map it for DMA - * @dev: The device for which the DMA address is requested - * @size: The size of the region to allocate in bytes - * @dma_handle: A pointer for returning the DMA address of the region - * @flag: memory allocator flags - */ -static inline void *ib_dma_alloc_coherent(struct ib_device *dev, - size_t size, - dma_addr_t *dma_handle, - gfp_t flag) -{ - return dma_alloc_coherent(dev->dma_device, size, dma_handle, flag); -} - -/** - * ib_dma_free_coherent - Free memory allocated by ib_dma_alloc_coherent() - * @dev: The device for which the DMA addresses were allocated - * @size: The size of the region - * @cpu_addr: the address returned by ib_dma_alloc_coherent() - * @dma_handle: the DMA address returned by ib_dma_alloc_coherent() - */ -static inline void ib_dma_free_coherent(struct ib_device *dev, - size_t size, void *cpu_addr, - dma_addr_t dma_handle) -{ - dma_free_coherent(dev->dma_device, size, cpu_addr, dma_handle); -} - /* ib_reg_user_mr - register a memory region for virtual addresses from kernel * space. This function should be called when 'current' is the owning MM. */ -- cgit From 8ecfca68dc4cbee1272a0161e3f2fb9387dc6930 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 6 Nov 2020 19:19:34 +0100 Subject: RDMA: Lift ibdev_to_node from rds to common code Lift the ibdev_to_node from rds to common code and document it. Link: https://lore.kernel.org/r/20201106181941.1878556-4-hch@lst.de Signed-off-by: Christoph Hellwig Signed-off-by: Jason Gunthorpe --- include/rdma/ib_verbs.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/rdma') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index a0f9b1c133e4..3feb42ef82dc 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -4549,6 +4549,19 @@ static inline struct ib_device *rdma_device_to_ibdev(struct device *device) return coredev->owner; } +/** + * ibdev_to_node - return the NUMA node for a given ib_device + * @dev: device to get the NUMA node for. + */ +static inline int ibdev_to_node(struct ib_device *ibdev) +{ + struct device *parent = ibdev->dev.parent; + + if (!parent) + return NUMA_NO_NODE; + return dev_to_node(parent); +} + /** * rdma_device_to_drv_device - Helper macro to reach back to driver's * ib_device holder structure from device pointer. -- cgit From b045db62f6f61c2f0f993696abe620379db34163 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sun, 15 Nov 2020 13:43:05 +0200 Subject: RDMA/mlx5: Use ib_umem_find_best_pgoff() for SRQ SRQ uses a quantized and scaled page_offset, which is another variation of ib_umem_find_best_pgsz(). Add mlx5_umem_find_best_quantized_pgoff() to perform this calculation for each mailbox. A macro shows how the calculation is directly connected to the mailbox format. This new routine replaces the limited mlx5_ib_cont_pages() and mlx5_ib_get_buf_offset() pairing which would reject valid configurations rather than adjust the page_size to make it work. In turn this is much more aggressive about choosing large page sizes for these objects and when THP is enabled it will now often find a single page solution. Link: https://lore.kernel.org/r/20201115114311.136250-2-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/rdma/ib_umem.h | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'include/rdma') diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h index 70597508c765..7752211c9638 100644 --- a/include/rdma/ib_umem.h +++ b/include/rdma/ib_umem.h @@ -34,6 +34,13 @@ static inline int ib_umem_offset(struct ib_umem *umem) return umem->address & ~PAGE_MASK; } +static inline unsigned long ib_umem_dma_offset(struct ib_umem *umem, + unsigned long pgsz) +{ + return (sg_dma_address(umem->sg_head.sgl) + ib_umem_offset(umem)) & + (pgsz - 1); +} + static inline size_t ib_umem_num_dma_blocks(struct ib_umem *umem, unsigned long pgsz) { @@ -79,6 +86,35 @@ int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset, unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, unsigned long pgsz_bitmap, unsigned long virt); +/** + * ib_umem_find_best_pgoff - Find best HW page size + * + * @umem: umem struct + * @pgsz_bitmap bitmap of HW supported page sizes + * @pgoff_bitmask: Mask of bits that can be represented with an offset + * + * This is very similar to ib_umem_find_best_pgsz() except instead of accepting + * an IOVA it accepts a bitmask specifying what address bits can be represented + * with a page offset. + * + * For instance if the HW has multiple page sizes, requires 64 byte alignemnt, + * and can support aligned offsets up to 4032 then pgoff_bitmask would be + * "111111000000". + * + * If the pgoff_bitmask requires either alignment in the low bit or an + * unavailable page size for the high bits, this function returns 0. + */ +static inline unsigned long ib_umem_find_best_pgoff(struct ib_umem *umem, + unsigned long pgsz_bitmap, + u64 pgoff_bitmask) +{ + struct scatterlist *sg = umem->sg_head.sgl; + dma_addr_t dma_addr; + + dma_addr = sg_dma_address(sg) + (umem->address & ~PAGE_MASK); + return ib_umem_find_best_pgsz(umem, pgsz_bitmap, + dma_addr & pgoff_bitmask); +} #else /* CONFIG_INFINIBAND_USER_MEM */ @@ -101,6 +137,12 @@ static inline unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, { return 0; } +static inline unsigned long ib_umem_find_best_pgoff(struct ib_umem *umem, + unsigned long pgsz_bitmap, + u64 pgoff_bitmask) +{ + return 0; +} #endif /* CONFIG_INFINIBAND_USER_MEM */ -- cgit From 5a7a9e038b032137ae9c45d5429f18a2ffdf7d42 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 6 Nov 2020 19:19:38 +0100 Subject: RDMA/core: remove use of dma_virt_ops Use the ib_dma_* helpers to skip the DMA translation instead. This removes the last user if dma_virt_ops and keeps the weird layering violation inside the RDMA core instead of burderning the DMA mapping subsystems with it. This also means the software RDMA drivers now don't have to mess with DMA parameters that are not relevant to them at all, and that in the future we can use PCI P2P transfers even for software RDMA, as there is no first fake layer of DMA mapping that the P2P DMA support. Link: https://lore.kernel.org/r/20201106181941.1878556-8-hch@lst.de Signed-off-by: Christoph Hellwig Tested-by: Mike Marciniszyn Signed-off-by: Jason Gunthorpe --- include/rdma/ib_verbs.h | 73 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 50 insertions(+), 23 deletions(-) (limited to 'include/rdma') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 3feb42ef82dc..174c1bffa00c 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -3906,6 +3906,16 @@ static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt) -ENOSYS; } +/* + * Drivers that don't need a DMA mapping at the RDMA layer, set dma_device to + * NULL. This causes the ib_dma* helpers to just stash the kernel virtual + * address into the dma address. + */ +static inline bool ib_uses_virt_dma(struct ib_device *dev) +{ + return IS_ENABLED(CONFIG_INFINIBAND_VIRT_DMA) && !dev->dma_device; +} + /** * ib_dma_mapping_error - check a DMA addr for error * @dev: The device for which the dma_addr was created @@ -3913,6 +3923,8 @@ static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt) */ static inline int ib_dma_mapping_error(struct ib_device *dev, u64 dma_addr) { + if (ib_uses_virt_dma(dev)) + return 0; return dma_mapping_error(dev->dma_device, dma_addr); } @@ -3927,6 +3939,8 @@ static inline u64 ib_dma_map_single(struct ib_device *dev, void *cpu_addr, size_t size, enum dma_data_direction direction) { + if (ib_uses_virt_dma(dev)) + return (uintptr_t)cpu_addr; return dma_map_single(dev->dma_device, cpu_addr, size, direction); } @@ -3941,7 +3955,8 @@ static inline void ib_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size, enum dma_data_direction direction) { - dma_unmap_single(dev->dma_device, addr, size, direction); + if (!ib_uses_virt_dma(dev)) + dma_unmap_single(dev->dma_device, addr, size, direction); } /** @@ -3958,6 +3973,8 @@ static inline u64 ib_dma_map_page(struct ib_device *dev, size_t size, enum dma_data_direction direction) { + if (ib_uses_virt_dma(dev)) + return (uintptr_t)(page_address(page) + offset); return dma_map_page(dev->dma_device, page, offset, size, direction); } @@ -3972,7 +3989,30 @@ static inline void ib_dma_unmap_page(struct ib_device *dev, u64 addr, size_t size, enum dma_data_direction direction) { - dma_unmap_page(dev->dma_device, addr, size, direction); + if (!ib_uses_virt_dma(dev)) + dma_unmap_page(dev->dma_device, addr, size, direction); +} + +int ib_dma_virt_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents); +static inline int ib_dma_map_sg_attrs(struct ib_device *dev, + struct scatterlist *sg, int nents, + enum dma_data_direction direction, + unsigned long dma_attrs) +{ + if (ib_uses_virt_dma(dev)) + return ib_dma_virt_map_sg(dev, sg, nents); + return dma_map_sg_attrs(dev->dma_device, sg, nents, direction, + dma_attrs); +} + +static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev, + struct scatterlist *sg, int nents, + enum dma_data_direction direction, + unsigned long dma_attrs) +{ + if (!ib_uses_virt_dma(dev)) + dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction, + dma_attrs); } /** @@ -3986,7 +4026,7 @@ static inline int ib_dma_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction) { - return dma_map_sg(dev->dma_device, sg, nents, direction); + return ib_dma_map_sg_attrs(dev, sg, nents, direction, 0); } /** @@ -4000,24 +4040,7 @@ static inline void ib_dma_unmap_sg(struct ib_device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction) { - dma_unmap_sg(dev->dma_device, sg, nents, direction); -} - -static inline int ib_dma_map_sg_attrs(struct ib_device *dev, - struct scatterlist *sg, int nents, - enum dma_data_direction direction, - unsigned long dma_attrs) -{ - return dma_map_sg_attrs(dev->dma_device, sg, nents, direction, - dma_attrs); -} - -static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev, - struct scatterlist *sg, int nents, - enum dma_data_direction direction, - unsigned long dma_attrs) -{ - dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction, dma_attrs); + ib_dma_unmap_sg_attrs(dev, sg, nents, direction, 0); } /** @@ -4028,6 +4051,8 @@ static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev, */ static inline unsigned int ib_dma_max_seg_size(struct ib_device *dev) { + if (ib_uses_virt_dma(dev)) + return UINT_MAX; return dma_get_max_seg_size(dev->dma_device); } @@ -4043,7 +4068,8 @@ static inline void ib_dma_sync_single_for_cpu(struct ib_device *dev, size_t size, enum dma_data_direction dir) { - dma_sync_single_for_cpu(dev->dma_device, addr, size, dir); + if (!ib_uses_virt_dma(dev)) + dma_sync_single_for_cpu(dev->dma_device, addr, size, dir); } /** @@ -4058,7 +4084,8 @@ static inline void ib_dma_sync_single_for_device(struct ib_device *dev, size_t size, enum dma_data_direction dir) { - dma_sync_single_for_device(dev->dma_device, addr, size, dir); + if (!ib_uses_virt_dma(dev)) + dma_sync_single_for_device(dev->dma_device, addr, size, dir); } /* ib_reg_user_mr - register a memory region for virtual addresses from kernel -- cgit From 2b1f747071c5ce5ad571d80c1541b732cf07f9c1 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 17 Nov 2020 09:01:47 +0200 Subject: RDMA/core: Allow drivers to disable restrack DB Driver QP types are special case with no IBTA restrictions. For example, EFA implemented creation of this QP type as regular one, while mlx5 separated create to two step: create and modify. That separation causes to the situation where DC QP (mlx5) is always added to the same xarray index zero. This change allows to drivers like mlx5 simply disable restrack DB tracking, but it doesn't disable kref on the memory. Fixes: 52e0a118a203 ("RDMA/restrack: Track driver QP types in resource tracker") Link: https://lore.kernel.org/r/20201117070148.1974114-3-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/rdma/restrack.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include/rdma') diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h index d3a1cc5be7bc..05e18839eaff 100644 --- a/include/rdma/restrack.h +++ b/include/rdma/restrack.h @@ -68,6 +68,14 @@ struct rdma_restrack_entry { * As an example for that, see mlx5 QPs with type MLX5_IB_QPT_HW_GSI */ bool valid; + /** + * @no_track: don't add this entry to restrack DB + * + * This field is used to mark an entry that doesn't need to be added to + * internal restrack DB and presented later to the users at the nldev + * query stage. + */ + u8 no_track : 1; /* * @kref: Protect destroy of the resource */ @@ -145,4 +153,20 @@ int rdma_nl_stat_hwcounter_entry(struct sk_buff *msg, const char *name, struct rdma_restrack_entry *rdma_restrack_get_byid(struct ib_device *dev, enum rdma_restrack_type type, u32 id); + +/** + * rdma_restrack_no_track() - don't add resource to the DB + * @res: resource entry + * + * Every user of thie API should be cross examined. + * Probaby you don't need to use this function. + */ +static inline void rdma_restrack_no_track(struct rdma_restrack_entry *res) +{ + res->no_track = true; +} +static inline bool rdma_restrack_is_tracked(struct rdma_restrack_entry *res) +{ + return !res->no_track; +} #endif /* _RDMA_RESTRACK_H_ */ -- cgit From 66f57b871efc576dfe8117b65af4e805e03ea689 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 17 Nov 2020 09:01:48 +0200 Subject: RDMA/restrack: Support all QP types The latest changes in restrack name handling allowed to simplify the QP creation code to support all types of QPs. For example XRC QP are presented with rdmatool. $ ibv_xsrq_pingpong & $ rdma res show qp link ibp0s9/1 lqpn 0 type SMI state RTS sq-psn 0 comm [ib_core] link ibp0s9/1 lqpn 1 type GSI state RTS sq-psn 0 comm [ib_core] link ibp0s9/1 lqpn 7 type UD state RTS sq-psn 0 comm [mlx5_ib] link ibp0s9/1 lqpn 42 type XRC_TGT state INIT sq-psn 0 path-mig-state MIGRATED comm [ib_uverbs] link ibp0s9/1 lqpn 43 type XRC_INI state INIT sq-psn 0 path-mig-state MIGRATED pdn 197 pid 419 comm ibv_xsrq_pingpong Link: https://lore.kernel.org/r/20201117070148.1974114-4-leon@kernel.org Reviewed-by: Mark Zhang Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/rdma/ib_verbs.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/rdma') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 174c1bffa00c..7bee8abae35c 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -3618,8 +3618,14 @@ static inline int ib_post_srq_recv(struct ib_srq *srq, bad_recv_wr ? : &dummy); } -struct ib_qp *ib_create_qp(struct ib_pd *pd, - struct ib_qp_init_attr *qp_init_attr); +struct ib_qp *ib_create_named_qp(struct ib_pd *pd, + struct ib_qp_init_attr *qp_init_attr, + const char *caller); +static inline struct ib_qp *ib_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr) +{ + return ib_create_named_qp(pd, init_attr, KBUILD_MODNAME); +} /** * ib_modify_qp_with_udata - Modifies the attributes for the specified QP. -- cgit From adac4cb3c1ff5c47c9f47be5d017a0e054176e3c Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 30 Nov 2020 09:58:36 +0200 Subject: RDMA/uverbs: Check ODP in ib_check_mr_access() as well No reason only one caller checks this. This properly blocks ODP from the rereg flow if the device does not support ODP. Link: https://lore.kernel.org/r/20201130075839.278575-3-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/rdma/ib_verbs.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/rdma') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 7bee8abae35c..4fcbc6d3d0e0 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -4183,7 +4183,8 @@ struct ib_xrcd *ib_alloc_xrcd_user(struct ib_device *device, struct inode *inode, struct ib_udata *udata); int ib_dealloc_xrcd_user(struct ib_xrcd *xrcd, struct ib_udata *udata); -static inline int ib_check_mr_access(int flags) +static inline int ib_check_mr_access(struct ib_device *ib_dev, + unsigned int flags) { /* * Local write permission is required if remote write or @@ -4196,6 +4197,9 @@ static inline int ib_check_mr_access(int flags) if (flags & ~IB_ACCESS_SUPPORTED) return -EINVAL; + if (flags & IB_ACCESS_ON_DEMAND && + !(ib_dev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING)) + return -EINVAL; return 0; } -- cgit From 6e0954b11c056570cb29676a84e2f8dc4d1dd05e Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 30 Nov 2020 09:58:37 +0200 Subject: RDMA/uverbs: Allow drivers to create a new HW object during rereg_mr mlx5 has an ugly flow where it tries to allocate a new MR and replace the existing MR in the same memory during rereg. This is very complicated and buggy. Instead of trying to replace in-place inside the driver, provide support from uverbs to change the entire HW object assigned to a handle during rereg_mr. Since destroying a MR is allowed to fail (ie if a MW is pointing at it) and can't be detected in advance, the algorithm creates a completely new uobject to hold the new MR and swaps the IDR entries of the two objects. The old MR in the temporary IDR entry is destroyed, and if it fails rereg_mr succeeds and destruction is deferred to FD release. This complexity is why this cannot live in a driver safely. Link: https://lore.kernel.org/r/20201130075839.278575-4-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/rdma/ib_verbs.h | 7 ++++--- include/rdma/uverbs_types.h | 5 +++++ 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'include/rdma') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 4fcbc6d3d0e0..3be1d1194a17 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2433,9 +2433,10 @@ struct ib_device_ops { struct ib_mr *(*reg_user_mr)(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int mr_access_flags, struct ib_udata *udata); - int (*rereg_user_mr)(struct ib_mr *mr, int flags, u64 start, u64 length, - u64 virt_addr, int mr_access_flags, - struct ib_pd *pd, struct ib_udata *udata); + struct ib_mr *(*rereg_user_mr)(struct ib_mr *mr, int flags, u64 start, + u64 length, u64 virt_addr, + int mr_access_flags, struct ib_pd *pd, + struct ib_udata *udata); int (*dereg_mr)(struct ib_mr *mr, struct ib_udata *udata); struct ib_mr *(*alloc_mr)(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h index a27e9fb4903f..ccd11631c167 100644 --- a/include/rdma/uverbs_types.h +++ b/include/rdma/uverbs_types.h @@ -71,6 +71,8 @@ struct uverbs_obj_type_class { enum rdma_remove_reason why, struct uverbs_attr_bundle *attrs); void (*remove_handle)(struct ib_uobject *uobj); + void (*swap_uobjects)(struct ib_uobject *obj_old, + struct ib_uobject *obj_new); }; struct uverbs_obj_type { @@ -116,6 +118,9 @@ void rdma_alloc_abort_uobject(struct ib_uobject *uobj, bool hw_obj_valid); void rdma_alloc_commit_uobject(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs); +void rdma_assign_uobject(struct ib_uobject *to_uobj, + struct ib_uobject *new_uobj, + struct uverbs_attr_bundle *attrs); /* * uverbs_uobject_get is called in order to increase the reference count on -- cgit From 2988ca08ba65848f2705023b054fd8bfc0109c38 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 1 Dec 2020 13:08:55 +0100 Subject: IB: Fix kernel-doc markups Some functions have different names between their prototypes and the kernel-doc markup. Others need to be fixed, as kernel-doc markups should use this format: identifier - description Link: https://lore.kernel.org/r/78b98c41a5a0f4c0106433d305b143028a4168b0.1606823973.git.mchehab+huawei@kernel.org Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jason Gunthorpe --- include/rdma/ib_verbs.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/rdma') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 3be1d1194a17..06a565203cfc 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -3394,6 +3394,17 @@ enum ib_pd_flags { struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, const char *caller); +/** + * ib_alloc_pd - Allocates an unused protection domain. + * @device: The device on which to allocate the protection domain. + * @flags: protection domain flags + * + * A protection domain object provides an association between QPs, shared + * receive queues, address handles, memory regions, and memory windows. + * + * Every PD has a local_dma_lkey which can be used as the lkey value for local + * memory operations. + */ #define ib_alloc_pd(device, flags) \ __ib_alloc_pd((device), (flags), KBUILD_MODNAME) -- cgit From e0da68994d16b46384cce7b86eb645f1ef7c51ef Mon Sep 17 00:00:00 2001 From: Avihai Horon Date: Tue, 8 Dec 2020 09:35:45 +0200 Subject: RDMA/uverbs: Fix incorrect variable type Fix incorrect type of max_entries in UVERBS_METHOD_QUERY_GID_TABLE - max_entries is of type size_t although it can take negative values. The following static check revealed it: drivers/infiniband/core/uverbs_std_types_device.c:338 ib_uverbs_handler_UVERBS_METHOD_QUERY_GID_TABLE() warn: 'max_entries' unsigned <= 0 Fixes: 9f85cbe50aa0 ("RDMA/uverbs: Expose the new GID query API to user space") Link: https://lore.kernel.org/r/20201208073545.9723-4-leon@kernel.org Reported-by: Dan Carpenter Signed-off-by: Avihai Horon Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/rdma/uverbs_ioctl.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/rdma') diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index bf167ef6c688..39ef204753ec 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -865,6 +865,16 @@ static inline __malloc void *uverbs_zalloc(struct uverbs_attr_bundle *bundle, { return _uverbs_alloc(bundle, size, GFP_KERNEL | __GFP_ZERO); } + +static inline __malloc void *uverbs_kcalloc(struct uverbs_attr_bundle *bundle, + size_t n, size_t size) +{ + size_t bytes; + + if (unlikely(check_mul_overflow(n, size, &bytes))) + return ERR_PTR(-EOVERFLOW); + return uverbs_zalloc(bundle, bytes); +} int _uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle, size_t idx, s64 lower_bound, u64 upper_bound, s64 *def_val); -- cgit