diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_svm.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 227 |
1 files changed, 162 insertions, 65 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index c50a0dc9c9c0..9477a4adcd36 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -309,12 +309,13 @@ static void svm_range_free(struct svm_range *prange, bool do_unmap) } static void -svm_range_set_default_attributes(int32_t *location, int32_t *prefetch_loc, - uint8_t *granularity, uint32_t *flags) +svm_range_set_default_attributes(struct svm_range_list *svms, int32_t *location, + int32_t *prefetch_loc, uint8_t *granularity, + uint32_t *flags) { *location = KFD_IOCTL_SVM_LOCATION_UNDEFINED; *prefetch_loc = KFD_IOCTL_SVM_LOCATION_UNDEFINED; - *granularity = 9; + *granularity = svms->default_granularity; *flags = KFD_IOCTL_SVM_FLAG_HOST_ACCESS | KFD_IOCTL_SVM_FLAG_COHERENT; } @@ -358,7 +359,7 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start, bitmap_copy(prange->bitmap_access, svms->bitmap_supported, MAX_GPU_INSTANCE); - svm_range_set_default_attributes(&prange->preferred_loc, + svm_range_set_default_attributes(svms, &prange->preferred_loc, &prange->prefetch_loc, &prange->granularity, &prange->flags); @@ -404,6 +405,27 @@ static void svm_range_bo_release(struct kref *kref) spin_lock(&svm_bo->list_lock); } spin_unlock(&svm_bo->list_lock); + + if (mmget_not_zero(svm_bo->eviction_fence->mm)) { + struct kfd_process_device *pdd; + struct kfd_process *p; + struct mm_struct *mm; + + mm = svm_bo->eviction_fence->mm; + /* + * The forked child process takes svm_bo device pages ref, svm_bo could be + * released after parent process is gone. + */ + p = kfd_lookup_process_by_mm(mm); + if (p) { + pdd = kfd_get_process_device_data(svm_bo->node, p); + if (pdd) + atomic64_sub(amdgpu_bo_size(svm_bo->bo), &pdd->vram_usage); + kfd_unref_process(p); + } + mmput(mm); + } + if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base)) /* We're not in the eviction worker. Signal the fence. */ dma_fence_signal(&svm_bo->eviction_fence->base); @@ -531,6 +553,7 @@ int svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange, bool clear) { + struct kfd_process_device *pdd; struct amdgpu_bo_param bp; struct svm_range_bo *svm_bo; struct amdgpu_bo_user *ubo; @@ -622,6 +645,10 @@ svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange, list_add(&prange->svm_bo_list, &svm_bo->range_list); spin_unlock(&svm_bo->list_lock); + pdd = svm_range_get_pdd_by_node(prange, node); + if (pdd) + atomic64_add(amdgpu_bo_size(bo), &pdd->vram_usage); + return 0; reserve_bo_failed: @@ -1051,6 +1078,7 @@ svm_range_split_adjust(struct svm_range *new, struct svm_range *old, new->mapped_to_gpu = old->mapped_to_gpu; bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE); bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE); + atomic_set(&new->queue_refcount, atomic_read(&old->queue_refcount)); return 0; } @@ -1167,17 +1195,17 @@ svm_range_get_pte_flags(struct kfd_node *node, struct kfd_node *bo_node; uint32_t flags = prange->flags; uint32_t mapping_flags = 0; + uint32_t gc_ip_version = KFD_GC_VERSION(node); uint64_t pte_flags; bool snoop = (domain != SVM_RANGE_VRAM_DOMAIN); bool coherent = flags & (KFD_IOCTL_SVM_FLAG_COHERENT | KFD_IOCTL_SVM_FLAG_EXT_COHERENT); bool ext_coherent = flags & KFD_IOCTL_SVM_FLAG_EXT_COHERENT; - bool uncached = false; /*flags & KFD_IOCTL_SVM_FLAG_UNCACHED;*/ unsigned int mtype_local; if (domain == SVM_RANGE_VRAM_DOMAIN) bo_node = prange->svm_bo->node; - switch (amdgpu_ip_version(node->adev, GC_HWIP, 0)) { + switch (gc_ip_version) { case IP_VERSION(9, 4, 1): if (domain == SVM_RANGE_VRAM_DOMAIN) { if (bo_node == node) { @@ -1213,15 +1241,16 @@ svm_range_get_pte_flags(struct kfd_node *node, } break; case IP_VERSION(9, 4, 3): + case IP_VERSION(9, 4, 4): + case IP_VERSION(9, 5, 0): if (ext_coherent) - mtype_local = node->adev->rev_id ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_UC; + mtype_local = (gc_ip_version < IP_VERSION(9, 5, 0) && !node->adev->rev_id) ? + AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_CC; else mtype_local = amdgpu_mtype_local == 1 ? AMDGPU_VM_MTYPE_NC : amdgpu_mtype_local == 2 ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW; snoop = true; - if (uncached) { - mapping_flags |= AMDGPU_VM_MTYPE_UC; - } else if (domain == SVM_RANGE_VRAM_DOMAIN) { + if (domain == SVM_RANGE_VRAM_DOMAIN) { /* local HBM region close to partition */ if (bo_node->adev == node->adev && (!bo_node->xcp || !node->xcp || bo_node->xcp->mem_id == node->xcp->mem_id)) @@ -1231,9 +1260,13 @@ svm_range_get_pte_flags(struct kfd_node *node, */ else if (svm_nodes_in_same_hive(bo_node, node) && !ext_coherent) mapping_flags |= AMDGPU_VM_MTYPE_NC; - /* PCIe P2P or extended system scope coherence */ - else + /* PCIe P2P on GPUs pre-9.5.0 */ + else if (gc_ip_version < IP_VERSION(9, 5, 0) && + !svm_nodes_in_same_hive(bo_node, node)) mapping_flags |= AMDGPU_VM_MTYPE_UC; + /* Other remote memory */ + else + mapping_flags |= ext_coherent ? AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; /* system memory accessed by the APU */ } else if (node->adev->flags & AMD_IS_APU) { /* On NUMA systems, locality is determined per-page @@ -1245,9 +1278,16 @@ svm_range_get_pte_flags(struct kfd_node *node, mapping_flags |= ext_coherent ? AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; /* system memory accessed by the dGPU */ } else { - mapping_flags |= AMDGPU_VM_MTYPE_UC; + if (gc_ip_version < IP_VERSION(9, 5, 0)) + mapping_flags |= AMDGPU_VM_MTYPE_UC; + else + mapping_flags |= AMDGPU_VM_MTYPE_NC; } break; + case IP_VERSION(12, 0, 0): + case IP_VERSION(12, 0, 1): + mapping_flags |= AMDGPU_VM_MTYPE_NC; + break; default: mapping_flags |= coherent ? AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; @@ -1263,6 +1303,8 @@ svm_range_get_pte_flags(struct kfd_node *node, pte_flags = AMDGPU_PTE_VALID; pte_flags |= (domain == SVM_RANGE_VRAM_DOMAIN) ? 0 : AMDGPU_PTE_SYSTEM; pte_flags |= snoop ? AMDGPU_PTE_SNOOPED : 0; + if (gc_ip_version >= IP_VERSION(12, 0, 0)) + pte_flags |= AMDGPU_PTE_IS_PTE; pte_flags |= amdgpu_gem_va_map_flags(node->adev, mapping_flags); return pte_flags; @@ -1515,9 +1557,9 @@ static int svm_range_reserve_bos(struct svm_validate_context *ctx, bool intr) goto unreserve_out; } - r = amdgpu_vm_validate_pt_bos(pdd->dev->adev, - drm_priv_to_vm(pdd->drm_priv), - svm_range_bo_validate, NULL); + r = amdgpu_vm_validate(pdd->dev->adev, + drm_priv_to_vm(pdd->drm_priv), NULL, + svm_range_bo_validate, NULL); if (r) { pr_debug("failed %d validate pt bos\n", r); goto unreserve_out; @@ -1641,7 +1683,9 @@ static int svm_range_validate_and_map(struct mm_struct *mm, goto free_ctx; } - svm_range_reserve_bos(ctx, intr); + r = svm_range_reserve_bos(ctx, intr); + if (r) + goto free_ctx; p = container_of(prange->svms, struct kfd_process, svms); owner = kfd_svm_page_owner(p, find_first_bit(ctx->bitmap, @@ -1656,7 +1700,7 @@ static int svm_range_validate_and_map(struct mm_struct *mm, start = map_start << PAGE_SHIFT; end = (map_last + 1) << PAGE_SHIFT; for (addr = start; !r && addr < end; ) { - struct hmm_range *hmm_range; + struct hmm_range *hmm_range = NULL; unsigned long map_start_vma; unsigned long map_last_vma; struct vm_area_struct *vma; @@ -1676,11 +1720,8 @@ static int svm_range_validate_and_map(struct mm_struct *mm, readonly, owner, NULL, &hmm_range); WRITE_ONCE(p->svms.faulting_task, NULL); - if (r) { + if (r) pr_debug("failed %d to get svm range pages\n", r); - if (r == -EBUSY) - r = -EAGAIN; - } } else { r = -EFAULT; } @@ -1694,7 +1735,12 @@ static int svm_range_validate_and_map(struct mm_struct *mm, } svm_range_lock(prange); - if (!r && amdgpu_hmm_range_get_pages_done(hmm_range)) { + + /* Free backing memory of hmm_range if it was initialized + * Overrride return value to TRY AGAIN only if prior returns + * were successful + */ + if (hmm_range && amdgpu_hmm_range_get_pages_done(hmm_range) && !r) { pr_debug("hmm update the range, need validate again\n"); r = -EAGAIN; } @@ -1978,6 +2024,7 @@ static struct svm_range *svm_range_clone(struct svm_range *old) new->vram_pages = old->vram_pages; bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE); bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE); + atomic_set(&new->queue_refcount, atomic_read(&old->queue_refcount)); return new; } @@ -2246,16 +2293,10 @@ static void svm_range_drain_retry_fault(struct svm_range_list *svms) { struct kfd_process_device *pdd; struct kfd_process *p; - int drain; uint32_t i; p = container_of(svms, struct kfd_process, svms); -restart: - drain = atomic_read(&svms->drain_pagefaults); - if (!drain) - return; - for_each_set_bit(i, svms->bitmap_supported, p->n_pdds) { pdd = p->pdds[i]; if (!pdd) @@ -2275,8 +2316,6 @@ restart: pr_debug("drain retry fault gpu %d svms 0x%p done\n", i, svms); } - if (atomic_cmpxchg(&svms->drain_pagefaults, drain, 0) != drain) - goto restart; } static void svm_range_deferred_list_work(struct work_struct *work) @@ -2298,17 +2337,8 @@ static void svm_range_deferred_list_work(struct work_struct *work) prange->start, prange->last, prange->work_item.op); mm = prange->work_item.mm; -retry: - mmap_write_lock(mm); - /* Checking for the need to drain retry faults must be inside - * mmap write lock to serialize with munmap notifiers. - */ - if (unlikely(atomic_read(&svms->drain_pagefaults))) { - mmap_write_unlock(mm); - svm_range_drain_retry_fault(svms); - goto retry; - } + mmap_write_lock(mm); /* Remove from deferred_list must be inside mmap write lock, for * two race cases: @@ -2429,6 +2459,17 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange, struct kfd_process *p; unsigned long s, l; bool unmap_parent; + uint32_t i; + + if (atomic_read(&prange->queue_refcount)) { + int r; + + pr_warn("Freeing queue vital buffer 0x%lx, queue evicted\n", + prange->start << PAGE_SHIFT); + r = kgd2kfd_quiesce_mm(mm, KFD_QUEUE_EVICTION_TRIGGER_SVM); + if (r) + pr_debug("failed %d to quiesce KFD queues\n", r); + } p = kfd_lookup_process_by_mm(mm); if (!p) @@ -2438,11 +2479,38 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange, pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] [0x%lx 0x%lx]\n", svms, prange, prange->start, prange->last, start, last); - /* Make sure pending page faults are drained in the deferred worker - * before the range is freed to avoid straggler interrupts on - * unmapped memory causing "phantom faults". + /* calculate time stamps that are used to decide which page faults need be + * dropped or handled before unmap pages from gpu vm */ - atomic_inc(&svms->drain_pagefaults); + for_each_set_bit(i, svms->bitmap_supported, p->n_pdds) { + struct kfd_process_device *pdd; + struct amdgpu_device *adev; + struct amdgpu_ih_ring *ih; + uint32_t checkpoint_wptr; + + pdd = p->pdds[i]; + if (!pdd) + continue; + + adev = pdd->dev->adev; + + /* Check and drain ih1 ring if cam not available */ + if (adev->irq.ih1.ring_size) { + ih = &adev->irq.ih1; + checkpoint_wptr = amdgpu_ih_get_wptr(adev, ih); + if (ih->rptr != checkpoint_wptr) { + svms->checkpoint_ts[i] = + amdgpu_ih_decode_iv_ts(adev, ih, checkpoint_wptr, -1); + continue; + } + } + + /* check if dev->irq.ih_soft is not empty */ + ih = &adev->irq.ih_soft; + checkpoint_wptr = amdgpu_ih_get_wptr(adev, ih); + if (ih->rptr != checkpoint_wptr) + svms->checkpoint_ts[i] = amdgpu_ih_decode_iv_ts(adev, ih, checkpoint_wptr, -1); + } unmap_parent = start <= prange->start && last >= prange->last; @@ -2617,7 +2685,7 @@ svm_range_best_restore_location(struct svm_range *prange, return -1; } - if (node->adev->gmc.is_app_apu) + if (node->adev->flags & AMD_IS_APU) return 0; if (prange->preferred_loc == gpuid || @@ -2666,9 +2734,10 @@ svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr, *is_heap_stack = vma_is_initial_heap(vma) || vma_is_initial_stack(vma); start_limit = max(vma->vm_start >> PAGE_SHIFT, - (unsigned long)ALIGN_DOWN(addr, 2UL << 8)); + (unsigned long)ALIGN_DOWN(addr, 1UL << p->svms.default_granularity)); end_limit = min(vma->vm_end >> PAGE_SHIFT, - (unsigned long)ALIGN(addr + 1, 2UL << 8)); + (unsigned long)ALIGN(addr + 1, 1UL << p->svms.default_granularity)); + /* First range that starts after the fault address */ node = interval_tree_iter_first(&p->svms.objects, addr + 1, ULONG_MAX); if (node) { @@ -2883,7 +2952,7 @@ svm_fault_allowed(struct vm_area_struct *vma, bool write_fault) int svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, uint32_t vmid, uint32_t node_id, - uint64_t addr, bool write_fault) + uint64_t addr, uint64_t ts, bool write_fault) { unsigned long start, last, size; struct mm_struct *mm = NULL; @@ -2893,7 +2962,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, ktime_t timestamp = ktime_get_boottime(); struct kfd_node *node; int32_t best_loc; - int32_t gpuidx = MAX_GPU_INSTANCE; + int32_t gpuid, gpuidx = MAX_GPU_INSTANCE; bool write_locked = false; struct vm_area_struct *vma; bool migration = false; @@ -2914,11 +2983,38 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, pr_debug("restoring svms 0x%p fault address 0x%llx\n", svms, addr); if (atomic_read(&svms->drain_pagefaults)) { - pr_debug("draining retry fault, drop fault 0x%llx\n", addr); + pr_debug("page fault handling disabled, drop fault 0x%llx\n", addr); r = 0; goto out; } + node = kfd_node_by_irq_ids(adev, node_id, vmid); + if (!node) { + pr_debug("kfd node does not exist node_id: %d, vmid: %d\n", node_id, + vmid); + r = -EFAULT; + goto out; + } + + if (kfd_process_gpuid_from_node(p, node, &gpuid, &gpuidx)) { + pr_debug("failed to get gpuid/gpuidex for node_id: %d\n", node_id); + r = -EFAULT; + goto out; + } + + /* check if this page fault time stamp is before svms->checkpoint_ts */ + if (svms->checkpoint_ts[gpuidx] != 0) { + if (amdgpu_ih_ts_after(ts, svms->checkpoint_ts[gpuidx])) { + pr_debug("draining retry fault, drop fault 0x%llx\n", addr); + r = 0; + goto out; + } else + /* ts is after svms->checkpoint_ts now, reset svms->checkpoint_ts + * to zero to avoid following ts wrap around give wrong comparing + */ + svms->checkpoint_ts[gpuidx] = 0; + } + if (!p->xnack_enabled) { pr_debug("XNACK not enabled for pasid 0x%x\n", pasid); r = -EFAULT; @@ -2935,13 +3031,6 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, goto out; } - node = kfd_node_by_irq_ids(adev, node_id, vmid); - if (!node) { - pr_debug("kfd node does not exist node_id: %d, vmid: %d\n", node_id, - vmid); - r = -EFAULT; - goto out; - } mmap_read_lock(mm); retry_write_locked: mutex_lock(&svms->lock); @@ -3026,8 +3115,6 @@ retry_write_locked: start = max_t(unsigned long, ALIGN_DOWN(addr, size), prange->start); last = min_t(unsigned long, ALIGN(addr + 1, size) - 1, prange->last); if (prange->actual_loc != 0 || best_loc != 0) { - migration = true; - if (best_loc) { r = svm_migrate_to_vram(prange, best_loc, start, last, mm, KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU); @@ -3050,7 +3137,9 @@ retry_write_locked: if (r) { pr_debug("failed %d to migrate svms %p [0x%lx 0x%lx]\n", r, svms, start, last); - goto out_unlock_range; + goto out_migrate_fail; + } else { + migration = true; } } @@ -3060,6 +3149,7 @@ retry_write_locked: pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n", r, svms, start, last); +out_migrate_fail: kfd_smi_event_page_fault_end(node, p->lead_thread->pid, addr, migration); @@ -3156,8 +3246,9 @@ void svm_range_list_fini(struct kfd_process *p) /* * Ensure no retry fault comes in afterwards, as page fault handler will * not find kfd process and take mm lock to recover fault. + * stop kfd page fault handing, then wait pending page faults got drained */ - atomic_inc(&p->svms.drain_pagefaults); + atomic_set(&p->svms.drain_pagefaults, 1); svm_range_drain_retry_fault(&p->svms); list_for_each_entry_safe(prange, next, &p->svms.list, list) { @@ -3191,6 +3282,12 @@ int svm_range_list_init(struct kfd_process *p) if (KFD_IS_SVM_API_SUPPORTED(p->pdds[i]->dev->adev)) bitmap_set(svms->bitmap_supported, i, 1); + /* Value of default granularity cannot exceed 0x1B, the + * number of pages supported by a 4-level paging table + */ + svms->default_granularity = min_t(u8, amdgpu_svm_default_granularity, 0x1B); + pr_debug("Default SVM Granularity to use: %d\n", svms->default_granularity); + return 0; } @@ -3335,7 +3432,7 @@ svm_range_best_prefetch_location(struct svm_range *prange) goto out; } - if (bo_node->adev->gmc.is_app_apu) { + if (bo_node->adev->flags & AMD_IS_APU) { best_loc = 0; goto out; } @@ -3424,7 +3521,7 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange, mm, KFD_MIGRATE_TRIGGER_PREFETCH); *migrated = !r; - return r; + return 0; } int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence) @@ -3718,7 +3815,7 @@ svm_range_get_attr(struct kfd_process *p, struct mm_struct *mm, node = interval_tree_iter_first(&svms->objects, start, last); if (!node) { pr_debug("range attrs not found return default values\n"); - svm_range_set_default_attributes(&location, &prefetch_loc, + svm_range_set_default_attributes(svms, &location, &prefetch_loc, &granularity, &flags_and); flags_or = flags_and; if (p->xnack_enabled) |