diff options
Diffstat (limited to 'drivers/gpu/drm/xe/xe_hmm.c')
-rw-r--r-- | drivers/gpu/drm/xe/xe_hmm.c | 325 |
1 files changed, 325 insertions, 0 deletions
diff --git a/drivers/gpu/drm/xe/xe_hmm.c b/drivers/gpu/drm/xe/xe_hmm.c new file mode 100644 index 000000000000..57b71956ddf4 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_hmm.c @@ -0,0 +1,325 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2024 Intel Corporation + */ + +#include <linux/scatterlist.h> +#include <linux/mmu_notifier.h> +#include <linux/dma-mapping.h> +#include <linux/memremap.h> +#include <linux/swap.h> +#include <linux/hmm.h> +#include <linux/mm.h> +#include "xe_hmm.h" +#include "xe_vm.h" +#include "xe_bo.h" + +static u64 xe_npages_in_range(unsigned long start, unsigned long end) +{ + return (end - start) >> PAGE_SHIFT; +} + +static int xe_alloc_sg(struct xe_device *xe, struct sg_table *st, + struct hmm_range *range, struct rw_semaphore *notifier_sem) +{ + unsigned long i, npages, hmm_pfn; + unsigned long num_chunks = 0; + int ret; + + /* HMM docs says this is needed. */ + ret = down_read_interruptible(notifier_sem); + if (ret) + return ret; + + if (mmu_interval_read_retry(range->notifier, range->notifier_seq)) { + up_read(notifier_sem); + return -EAGAIN; + } + + npages = xe_npages_in_range(range->start, range->end); + for (i = 0; i < npages;) { + unsigned long len; + + hmm_pfn = range->hmm_pfns[i]; + xe_assert(xe, hmm_pfn & HMM_PFN_VALID); + + len = 1UL << hmm_pfn_to_map_order(hmm_pfn); + + /* If order > 0 the page may extend beyond range->start */ + len -= (hmm_pfn & ~HMM_PFN_FLAGS) & (len - 1); + i += len; + num_chunks++; + } + up_read(notifier_sem); + + return sg_alloc_table(st, num_chunks, GFP_KERNEL); +} + +/** + * xe_build_sg() - build a scatter gather table for all the physical pages/pfn + * in a hmm_range. dma-map pages if necessary. dma-address is save in sg table + * and will be used to program GPU page table later. + * @xe: the xe device who will access the dma-address in sg table + * @range: the hmm range that we build the sg table from. range->hmm_pfns[] + * has the pfn numbers of pages that back up this hmm address range. + * @st: pointer to the sg table. + * @notifier_sem: The xe notifier lock. + * @write: whether we write to this range. This decides dma map direction + * for system pages. If write we map it bi-diretional; otherwise + * DMA_TO_DEVICE + * + * All the contiguous pfns will be collapsed into one entry in + * the scatter gather table. This is for the purpose of efficiently + * programming GPU page table. + * + * The dma_address in the sg table will later be used by GPU to + * access memory. So if the memory is system memory, we need to + * do a dma-mapping so it can be accessed by GPU/DMA. + * + * FIXME: This function currently only support pages in system + * memory. If the memory is GPU local memory (of the GPU who + * is going to access memory), we need gpu dpa (device physical + * address), and there is no need of dma-mapping. This is TBD. + * + * FIXME: dma-mapping for peer gpu device to access remote gpu's + * memory. Add this when you support p2p + * + * This function allocates the storage of the sg table. It is + * caller's responsibility to free it calling sg_free_table. + * + * Returns 0 if successful; -ENOMEM if fails to allocate memory + */ +static int xe_build_sg(struct xe_device *xe, struct hmm_range *range, + struct sg_table *st, + struct rw_semaphore *notifier_sem, + bool write) +{ + unsigned long npages = xe_npages_in_range(range->start, range->end); + struct device *dev = xe->drm.dev; + struct scatterlist *sgl; + struct page *page; + unsigned long i, j; + + lockdep_assert_held(notifier_sem); + + i = 0; + for_each_sg(st->sgl, sgl, st->nents, j) { + unsigned long hmm_pfn, size; + + hmm_pfn = range->hmm_pfns[i]; + page = hmm_pfn_to_page(hmm_pfn); + xe_assert(xe, !is_device_private_page(page)); + + size = 1UL << hmm_pfn_to_map_order(hmm_pfn); + size -= page_to_pfn(page) & (size - 1); + i += size; + + if (unlikely(j == st->nents - 1)) { + xe_assert(xe, i >= npages); + if (i > npages) + size -= (i - npages); + + sg_mark_end(sgl); + } else { + xe_assert(xe, i < npages); + } + + sg_set_page(sgl, page, size << PAGE_SHIFT, 0); + } + + return dma_map_sgtable(dev, st, write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE, + DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_NO_KERNEL_MAPPING); +} + +static void xe_hmm_userptr_set_mapped(struct xe_userptr_vma *uvma) +{ + struct xe_userptr *userptr = &uvma->userptr; + struct xe_vm *vm = xe_vma_vm(&uvma->vma); + + lockdep_assert_held_write(&vm->lock); + lockdep_assert_held(&vm->userptr.notifier_lock); + + mutex_lock(&userptr->unmap_mutex); + xe_assert(vm->xe, !userptr->mapped); + userptr->mapped = true; + mutex_unlock(&userptr->unmap_mutex); +} + +void xe_hmm_userptr_unmap(struct xe_userptr_vma *uvma) +{ + struct xe_userptr *userptr = &uvma->userptr; + struct xe_vma *vma = &uvma->vma; + bool write = !xe_vma_read_only(vma); + struct xe_vm *vm = xe_vma_vm(vma); + struct xe_device *xe = vm->xe; + + if (!lockdep_is_held_type(&vm->userptr.notifier_lock, 0) && + !lockdep_is_held_type(&vm->lock, 0) && + !(vma->gpuva.flags & XE_VMA_DESTROYED)) { + /* Don't unmap in exec critical section. */ + xe_vm_assert_held(vm); + /* Don't unmap while mapping the sg. */ + lockdep_assert_held(&vm->lock); + } + + mutex_lock(&userptr->unmap_mutex); + if (userptr->sg && userptr->mapped) + dma_unmap_sgtable(xe->drm.dev, userptr->sg, + write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE, 0); + userptr->mapped = false; + mutex_unlock(&userptr->unmap_mutex); +} + +/** + * xe_hmm_userptr_free_sg() - Free the scatter gather table of userptr + * @uvma: the userptr vma which hold the scatter gather table + * + * With function xe_userptr_populate_range, we allocate storage of + * the userptr sg table. This is a helper function to free this + * sg table, and dma unmap the address in the table. + */ +void xe_hmm_userptr_free_sg(struct xe_userptr_vma *uvma) +{ + struct xe_userptr *userptr = &uvma->userptr; + + xe_assert(xe_vma_vm(&uvma->vma)->xe, userptr->sg); + xe_hmm_userptr_unmap(uvma); + sg_free_table(userptr->sg); + userptr->sg = NULL; +} + +/** + * xe_hmm_userptr_populate_range() - Populate physical pages of a virtual + * address range + * + * @uvma: userptr vma which has information of the range to populate. + * @is_mm_mmap_locked: True if mmap_read_lock is already acquired by caller. + * + * This function populate the physical pages of a virtual + * address range. The populated physical pages is saved in + * userptr's sg table. It is similar to get_user_pages but call + * hmm_range_fault. + * + * This function also read mmu notifier sequence # ( + * mmu_interval_read_begin), for the purpose of later + * comparison (through mmu_interval_read_retry). + * + * This must be called with mmap read or write lock held. + * + * This function allocates the storage of the userptr sg table. + * It is caller's responsibility to free it calling sg_free_table. + * + * returns: 0 for success; negative error no on failure + */ +int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma, + bool is_mm_mmap_locked) +{ + unsigned long timeout = + jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); + unsigned long *pfns; + struct xe_userptr *userptr; + struct xe_vma *vma = &uvma->vma; + u64 userptr_start = xe_vma_userptr(vma); + u64 userptr_end = userptr_start + xe_vma_size(vma); + struct xe_vm *vm = xe_vma_vm(vma); + struct hmm_range hmm_range = { + .pfn_flags_mask = 0, /* ignore pfns */ + .default_flags = HMM_PFN_REQ_FAULT, + .start = userptr_start, + .end = userptr_end, + .notifier = &uvma->userptr.notifier, + .dev_private_owner = vm->xe, + }; + bool write = !xe_vma_read_only(vma); + unsigned long notifier_seq; + u64 npages; + int ret; + + userptr = &uvma->userptr; + + if (is_mm_mmap_locked) + mmap_assert_locked(userptr->notifier.mm); + + if (vma->gpuva.flags & XE_VMA_DESTROYED) + return 0; + + notifier_seq = mmu_interval_read_begin(&userptr->notifier); + if (notifier_seq == userptr->notifier_seq) + return 0; + + if (userptr->sg) + xe_hmm_userptr_free_sg(uvma); + + npages = xe_npages_in_range(userptr_start, userptr_end); + pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL); + if (unlikely(!pfns)) + return -ENOMEM; + + if (write) + hmm_range.default_flags |= HMM_PFN_REQ_WRITE; + + if (!mmget_not_zero(userptr->notifier.mm)) { + ret = -EFAULT; + goto free_pfns; + } + + hmm_range.hmm_pfns = pfns; + + while (true) { + hmm_range.notifier_seq = mmu_interval_read_begin(&userptr->notifier); + + if (!is_mm_mmap_locked) + mmap_read_lock(userptr->notifier.mm); + + ret = hmm_range_fault(&hmm_range); + + if (!is_mm_mmap_locked) + mmap_read_unlock(userptr->notifier.mm); + + if (ret == -EBUSY) { + if (time_after(jiffies, timeout)) + break; + + continue; + } + break; + } + + mmput(userptr->notifier.mm); + + if (ret) + goto free_pfns; + + ret = xe_alloc_sg(vm->xe, &userptr->sgt, &hmm_range, &vm->userptr.notifier_lock); + if (ret) + goto free_pfns; + + ret = down_read_interruptible(&vm->userptr.notifier_lock); + if (ret) + goto free_st; + + if (mmu_interval_read_retry(hmm_range.notifier, hmm_range.notifier_seq)) { + ret = -EAGAIN; + goto out_unlock; + } + + ret = xe_build_sg(vm->xe, &hmm_range, &userptr->sgt, + &vm->userptr.notifier_lock, write); + if (ret) + goto out_unlock; + + userptr->sg = &userptr->sgt; + xe_hmm_userptr_set_mapped(uvma); + userptr->notifier_seq = hmm_range.notifier_seq; + up_read(&vm->userptr.notifier_lock); + kvfree(pfns); + return 0; + +out_unlock: + up_read(&vm->userptr.notifier_lock); +free_st: + sg_free_table(&userptr->sgt); +free_pfns: + kvfree(pfns); + return ret; +} |