From f25a546e65292b36f15cca0912450c4944fae031 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 12 Nov 2019 16:22:22 -0400 Subject: RDMA/odp: Use mmu_interval_notifier_insert() Replace the internal interval tree based mmu notifier with the new common mmu_interval_notifier_insert() API. This removes a lot of code and fixes a deadlock that can be triggered in ODP: zap_page_range() mmu_notifier_invalidate_range_start() [..] ib_umem_notifier_invalidate_range_start() down_read(&per_mm->umem_rwsem) unmap_single_vma() [..] __split_huge_page_pmd() mmu_notifier_invalidate_range_start() [..] ib_umem_notifier_invalidate_range_start() down_read(&per_mm->umem_rwsem) // DEADLOCK mmu_notifier_invalidate_range_end() up_read(&per_mm->umem_rwsem) mmu_notifier_invalidate_range_end() up_read(&per_mm->umem_rwsem) The umem_rwsem is held across the range_start/end as the ODP algorithm for invalidate_range_end cannot tolerate changes to the interval tree. However, due to the nested invalidation regions the second down_read() can deadlock if there are competing writers. The new core code provides an alternative scheme to solve this problem. Fixes: ca748c39ea3f ("RDMA/umem: Get rid of per_mm->notifier_count") Link: https://lore.kernel.org/r/20191112202231.3856-6-jgg@ziepe.ca Tested-by: Artemy Kovalyov Signed-off-by: Jason Gunthorpe --- include/rdma/ib_umem_odp.h | 68 ++++++++++------------------------------------ include/rdma/ib_verbs.h | 2 -- 2 files changed, 14 insertions(+), 56 deletions(-) (limited to 'include/rdma') diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h index 09b0e4494986..81429acc8257 100644 --- a/include/rdma/ib_umem_odp.h +++ b/include/rdma/ib_umem_odp.h @@ -35,11 +35,11 @@ #include #include -#include struct ib_umem_odp { struct ib_umem umem; - struct ib_ucontext_per_mm *per_mm; + struct mmu_interval_notifier notifier; + struct pid *tgid; /* * An array of the pages included in the on-demand paging umem. @@ -62,13 +62,8 @@ struct ib_umem_odp { struct mutex umem_mutex; void *private; /* for the HW driver to use. */ - int notifiers_seq; - int notifiers_count; int npages; - /* Tree tracking */ - struct interval_tree_node interval_tree; - /* * An implicit odp umem cannot be DMA mapped, has 0 length, and serves * only as an anchor for the driver to hold onto the per_mm. FIXME: @@ -77,7 +72,6 @@ struct ib_umem_odp { */ bool is_implicit_odp; - struct completion notifier_completion; unsigned int page_shift; }; @@ -89,13 +83,13 @@ static inline struct ib_umem_odp *to_ib_umem_odp(struct ib_umem *umem) /* Returns the first page of an ODP umem. */ static inline unsigned long ib_umem_start(struct ib_umem_odp *umem_odp) { - return umem_odp->interval_tree.start; + return umem_odp->notifier.interval_tree.start; } /* Returns the address of the page after the last one of an ODP umem. */ static inline unsigned long ib_umem_end(struct ib_umem_odp *umem_odp) { - return umem_odp->interval_tree.last + 1; + return umem_odp->notifier.interval_tree.last + 1; } static inline size_t ib_umem_odp_num_pages(struct ib_umem_odp *umem_odp) @@ -119,21 +113,15 @@ static inline size_t ib_umem_odp_num_pages(struct ib_umem_odp *umem_odp) #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING -struct ib_ucontext_per_mm { - struct mmu_notifier mn; - struct pid *tgid; - - struct rb_root_cached umem_tree; - /* Protects umem_tree */ - struct rw_semaphore umem_rwsem; -}; - -struct ib_umem_odp *ib_umem_odp_get(struct ib_udata *udata, unsigned long addr, - size_t size, int access); +struct ib_umem_odp * +ib_umem_odp_get(struct ib_udata *udata, unsigned long addr, size_t size, + int access, const struct mmu_interval_notifier_ops *ops); struct ib_umem_odp *ib_umem_odp_alloc_implicit(struct ib_udata *udata, int access); -struct ib_umem_odp *ib_umem_odp_alloc_child(struct ib_umem_odp *root_umem, - unsigned long addr, size_t size); +struct ib_umem_odp * +ib_umem_odp_alloc_child(struct ib_umem_odp *root_umem, unsigned long addr, + size_t size, + const struct mmu_interval_notifier_ops *ops); void ib_umem_odp_release(struct ib_umem_odp *umem_odp); int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 start_offset, @@ -143,39 +131,11 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 start_offset, void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 start_offset, u64 bound); -typedef int (*umem_call_back)(struct ib_umem_odp *item, u64 start, u64 end, - void *cookie); -/* - * Call the callback on each ib_umem in the range. Returns the logical or of - * the return values of the functions called. - */ -int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root, - u64 start, u64 end, - umem_call_back cb, - bool blockable, void *cookie); - -static inline int ib_umem_mmu_notifier_retry(struct ib_umem_odp *umem_odp, - unsigned long mmu_seq) -{ - /* - * This code is strongly based on the KVM code from - * mmu_notifier_retry. Should be called with - * the relevant locks taken (umem_odp->umem_mutex - * and the ucontext umem_mutex semaphore locked for read). - */ - - if (unlikely(umem_odp->notifiers_count)) - return 1; - if (umem_odp->notifiers_seq != mmu_seq) - return 1; - return 0; -} - #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ -static inline struct ib_umem_odp *ib_umem_odp_get(struct ib_udata *udata, - unsigned long addr, - size_t size, int access) +static inline struct ib_umem_odp * +ib_umem_odp_get(struct ib_udata *udata, unsigned long addr, size_t size, + int access, const struct mmu_interval_notifier_ops *ops) { return ERR_PTR(-EINVAL); } diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 6a47ba85c54c..2c30c859ae0d 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2422,8 +2422,6 @@ struct ib_device_ops { u64 iova); int (*unmap_fmr)(struct list_head *fmr_list); int (*dealloc_fmr)(struct ib_fmr *fmr); - void (*invalidate_range)(struct ib_umem_odp *umem_odp, - unsigned long start, unsigned long end); int (*attach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid); int (*detach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid); struct ib_xrcd *(*alloc_xrcd)(struct ib_device *device, -- cgit