diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 1379 |
1 files changed, 981 insertions, 398 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index b9441ab457ea..a67285118c37 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -34,7 +34,9 @@ #include <drm/amdgpu_drm.h> #include <drm/drm_drv.h> #include <drm/ttm/ttm_tt.h> +#include <drm/drm_exec.h> #include "amdgpu.h" +#include "amdgpu_vm.h" #include "amdgpu_trace.h" #include "amdgpu_amdkfd.h" #include "amdgpu_gmc.h" @@ -111,9 +113,9 @@ struct amdgpu_prt_cb { }; /** - * struct amdgpu_vm_tlb_seq_cb - Helper to increment the TLB flush sequence + * struct amdgpu_vm_tlb_seq_struct - Helper to increment the TLB flush sequence */ -struct amdgpu_vm_tlb_seq_cb { +struct amdgpu_vm_tlb_seq_struct { /** * @vm: pointer to the amdgpu_vm structure to set the fence sequence on */ @@ -126,43 +128,14 @@ struct amdgpu_vm_tlb_seq_cb { }; /** - * amdgpu_vm_set_pasid - manage pasid and vm ptr mapping - * - * @adev: amdgpu_device pointer - * @vm: amdgpu_vm pointer - * @pasid: the pasid the VM is using on this GPU - * - * Set the pasid this VM is using on this GPU, can also be used to remove the - * pasid by passing in zero. + * amdgpu_vm_assert_locked - check if VM is correctly locked + * @vm: the VM which schould be tested * + * Asserts that the VM root PD is locked. */ -int amdgpu_vm_set_pasid(struct amdgpu_device *adev, struct amdgpu_vm *vm, - u32 pasid) +static void amdgpu_vm_assert_locked(struct amdgpu_vm *vm) { - int r; - - if (vm->pasid == pasid) - return 0; - - if (vm->pasid) { - r = xa_err(xa_erase_irq(&adev->vm_manager.pasids, vm->pasid)); - if (r < 0) - return r; - - vm->pasid = 0; - } - - if (pasid) { - r = xa_err(xa_store_irq(&adev->vm_manager.pasids, pasid, vm, - GFP_KERNEL)); - if (r < 0) - return r; - - vm->pasid = pasid; - } - - - return 0; + dma_resv_assert_held(vm->root.bo->tbo.base.resv); } /** @@ -179,6 +152,7 @@ static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo) struct amdgpu_bo *bo = vm_bo->bo; vm_bo->moved = true; + amdgpu_vm_assert_locked(vm); spin_lock(&vm_bo->vm->status_lock); if (bo->tbo.type == ttm_bo_type_kernel) list_move(&vm_bo->vm_status, &vm->evicted); @@ -196,6 +170,7 @@ static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo) */ static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo) { + amdgpu_vm_assert_locked(vm_bo->vm); spin_lock(&vm_bo->vm->status_lock); list_move(&vm_bo->vm_status, &vm_bo->vm->moved); spin_unlock(&vm_bo->vm->status_lock); @@ -211,6 +186,7 @@ static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo) */ static void amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base *vm_bo) { + amdgpu_vm_assert_locked(vm_bo->vm); spin_lock(&vm_bo->vm->status_lock); list_move(&vm_bo->vm_status, &vm_bo->vm->idle); spin_unlock(&vm_bo->vm->status_lock); @@ -233,6 +209,22 @@ static void amdgpu_vm_bo_invalidated(struct amdgpu_vm_bo_base *vm_bo) } /** + * amdgpu_vm_bo_evicted_user - vm_bo is evicted + * + * @vm_bo: vm_bo which is evicted + * + * State for BOs used by user mode queues which are not at the location they + * should be. + */ +static void amdgpu_vm_bo_evicted_user(struct amdgpu_vm_bo_base *vm_bo) +{ + vm_bo->moved = true; + spin_lock(&vm_bo->vm->status_lock); + list_move(&vm_bo->vm_status, &vm_bo->vm->evicted_user); + spin_unlock(&vm_bo->vm->status_lock); +} + +/** * amdgpu_vm_bo_relocated - vm_bo is reloacted * * @vm_bo: vm_bo which is relocated @@ -242,6 +234,7 @@ static void amdgpu_vm_bo_invalidated(struct amdgpu_vm_bo_base *vm_bo) */ static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo) { + amdgpu_vm_assert_locked(vm_bo->vm); if (vm_bo->bo->parent) { spin_lock(&vm_bo->vm->status_lock); list_move(&vm_bo->vm_status, &vm_bo->vm->relocated); @@ -261,12 +254,149 @@ static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo) */ static void amdgpu_vm_bo_done(struct amdgpu_vm_bo_base *vm_bo) { + amdgpu_vm_assert_locked(vm_bo->vm); spin_lock(&vm_bo->vm->status_lock); list_move(&vm_bo->vm_status, &vm_bo->vm->done); spin_unlock(&vm_bo->vm->status_lock); } /** + * amdgpu_vm_bo_reset_state_machine - reset the vm_bo state machine + * @vm: the VM which state machine to reset + * + * Move all vm_bo object in the VM into a state where they will be updated + * again during validation. + */ +static void amdgpu_vm_bo_reset_state_machine(struct amdgpu_vm *vm) +{ + struct amdgpu_vm_bo_base *vm_bo, *tmp; + + amdgpu_vm_assert_locked(vm); + + spin_lock(&vm->status_lock); + list_splice_init(&vm->done, &vm->invalidated); + list_for_each_entry(vm_bo, &vm->invalidated, vm_status) + vm_bo->moved = true; + + list_for_each_entry_safe(vm_bo, tmp, &vm->idle, vm_status) { + struct amdgpu_bo *bo = vm_bo->bo; + + vm_bo->moved = true; + if (!bo || bo->tbo.type != ttm_bo_type_kernel) + list_move(&vm_bo->vm_status, &vm_bo->vm->moved); + else if (bo->parent) + list_move(&vm_bo->vm_status, &vm_bo->vm->relocated); + } + spin_unlock(&vm->status_lock); +} + +/** + * amdgpu_vm_update_shared - helper to update shared memory stat + * @base: base structure for tracking BO usage in a VM + * + * Takes the vm status_lock and updates the shared memory stat. If the basic + * stat changed (e.g. buffer was moved) amdgpu_vm_update_stats need to be called + * as well. + */ +static void amdgpu_vm_update_shared(struct amdgpu_vm_bo_base *base) +{ + struct amdgpu_vm *vm = base->vm; + struct amdgpu_bo *bo = base->bo; + uint64_t size = amdgpu_bo_size(bo); + uint32_t bo_memtype = amdgpu_bo_mem_stats_placement(bo); + bool shared; + + dma_resv_assert_held(bo->tbo.base.resv); + spin_lock(&vm->status_lock); + shared = drm_gem_object_is_shared_for_memory_stats(&bo->tbo.base); + if (base->shared != shared) { + base->shared = shared; + if (shared) { + vm->stats[bo_memtype].drm.shared += size; + vm->stats[bo_memtype].drm.private -= size; + } else { + vm->stats[bo_memtype].drm.shared -= size; + vm->stats[bo_memtype].drm.private += size; + } + } + spin_unlock(&vm->status_lock); +} + +/** + * amdgpu_vm_bo_update_shared - callback when bo gets shared/unshared + * @bo: amdgpu buffer object + * + * Update the per VM stats for all the vm if needed from private to shared or + * vice versa. + */ +void amdgpu_vm_bo_update_shared(struct amdgpu_bo *bo) +{ + struct amdgpu_vm_bo_base *base; + + for (base = bo->vm_bo; base; base = base->next) + amdgpu_vm_update_shared(base); +} + +/** + * amdgpu_vm_update_stats_locked - helper to update normal memory stat + * @base: base structure for tracking BO usage in a VM + * @res: the ttm_resource to use for the purpose of accounting, may or may not + * be bo->tbo.resource + * @sign: if we should add (+1) or subtract (-1) from the stat + * + * Caller need to have the vm status_lock held. Useful for when multiple update + * need to happen at the same time. + */ +static void amdgpu_vm_update_stats_locked(struct amdgpu_vm_bo_base *base, + struct ttm_resource *res, int sign) +{ + struct amdgpu_vm *vm = base->vm; + struct amdgpu_bo *bo = base->bo; + int64_t size = sign * amdgpu_bo_size(bo); + uint32_t bo_memtype = amdgpu_bo_mem_stats_placement(bo); + + /* For drm-total- and drm-shared-, BO are accounted by their preferred + * placement, see also amdgpu_bo_mem_stats_placement. + */ + if (base->shared) + vm->stats[bo_memtype].drm.shared += size; + else + vm->stats[bo_memtype].drm.private += size; + + if (res && res->mem_type < __AMDGPU_PL_NUM) { + uint32_t res_memtype = res->mem_type; + + vm->stats[res_memtype].drm.resident += size; + /* BO only count as purgeable if it is resident, + * since otherwise there's nothing to purge. + */ + if (bo->flags & AMDGPU_GEM_CREATE_DISCARDABLE) + vm->stats[res_memtype].drm.purgeable += size; + if (!(bo->preferred_domains & amdgpu_mem_type_to_domain(res_memtype))) + vm->stats[bo_memtype].evicted += size; + } +} + +/** + * amdgpu_vm_update_stats - helper to update normal memory stat + * @base: base structure for tracking BO usage in a VM + * @res: the ttm_resource to use for the purpose of accounting, may or may not + * be bo->tbo.resource + * @sign: if we should add (+1) or subtract (-1) from the stat + * + * Updates the basic memory stat when bo is added/deleted/moved. + */ +void amdgpu_vm_update_stats(struct amdgpu_vm_bo_base *base, + struct ttm_resource *res, int sign) +{ + struct amdgpu_vm *vm = base->vm; + + spin_lock(&vm->status_lock); + amdgpu_vm_update_stats_locked(base, res, sign); + spin_unlock(&vm->status_lock); +} + +/** * amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm * * @base: base structure for tracking BO usage in a VM @@ -289,7 +419,12 @@ void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, base->next = bo->vm_bo; bo->vm_bo = base; - if (bo->tbo.base.resv != vm->root.bo->tbo.base.resv) + spin_lock(&vm->status_lock); + base->shared = drm_gem_object_is_shared_for_memory_stats(&bo->tbo.base); + amdgpu_vm_update_stats_locked(base, bo->tbo.resource, +1); + spin_unlock(&vm->status_lock); + + if (!amdgpu_vm_is_bo_always_valid(vm, bo)) return; dma_resv_assert_held(vm->root.bo->tbo.base.resv); @@ -313,25 +448,60 @@ void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, } /** - * amdgpu_vm_get_pd_bo - add the VM PD to a validation list + * amdgpu_vm_lock_pd - lock PD in drm_exec * * @vm: vm providing the BOs - * @validated: head of validation list - * @entry: entry to add + * @exec: drm execution context + * @num_fences: number of extra fences to reserve * - * Add the page directory to the list of BOs to - * validate for command submission. + * Lock the VM root PD in the DRM execution context. */ -void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, - struct list_head *validated, - struct amdgpu_bo_list_entry *entry) +int amdgpu_vm_lock_pd(struct amdgpu_vm *vm, struct drm_exec *exec, + unsigned int num_fences) { - entry->priority = 0; - entry->tv.bo = &vm->root.bo->tbo; - /* Two for VM updates, one for TTM and one for the CS job */ - entry->tv.num_shared = 4; - entry->user_pages = NULL; - list_add(&entry->tv.head, validated); + /* We need at least two fences for the VM PD/PT updates */ + return drm_exec_prepare_obj(exec, &vm->root.bo->tbo.base, + 2 + num_fences); +} + +/** + * amdgpu_vm_lock_done_list - lock all BOs on the done list + * @vm: vm providing the BOs + * @exec: drm execution context + * @num_fences: number of extra fences to reserve + * + * Lock the BOs on the done list in the DRM execution context. + */ +int amdgpu_vm_lock_done_list(struct amdgpu_vm *vm, struct drm_exec *exec, + unsigned int num_fences) +{ + struct list_head *prev = &vm->done; + struct amdgpu_bo_va *bo_va; + struct amdgpu_bo *bo; + int ret; + + /* We can only trust prev->next while holding the lock */ + spin_lock(&vm->status_lock); + while (!list_is_head(prev->next, &vm->done)) { + bo_va = list_entry(prev->next, typeof(*bo_va), base.vm_status); + + bo = bo_va->base.bo; + if (bo) { + amdgpu_bo_ref(bo); + spin_unlock(&vm->status_lock); + + ret = drm_exec_prepare_obj(exec, &bo->tbo.base, 1); + amdgpu_bo_unref(&bo); + if (unlikely(ret)) + return ret; + + spin_lock(&vm->status_lock); + } + prev = prev->next; + } + spin_unlock(&vm->status_lock); + + return 0; } /** @@ -351,28 +521,93 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, spin_unlock(&adev->mman.bdev.lru_lock); } +/* Create scheduler entities for page table updates */ +static int amdgpu_vm_init_entities(struct amdgpu_device *adev, + struct amdgpu_vm *vm) +{ + int r; + + r = drm_sched_entity_init(&vm->immediate, DRM_SCHED_PRIORITY_NORMAL, + adev->vm_manager.vm_pte_scheds, + adev->vm_manager.vm_pte_num_scheds, NULL); + if (r) + goto error; + + return drm_sched_entity_init(&vm->delayed, DRM_SCHED_PRIORITY_NORMAL, + adev->vm_manager.vm_pte_scheds, + adev->vm_manager.vm_pte_num_scheds, NULL); + +error: + drm_sched_entity_destroy(&vm->immediate); + return r; +} + +/* Destroy the entities for page table updates again */ +static void amdgpu_vm_fini_entities(struct amdgpu_vm *vm) +{ + drm_sched_entity_destroy(&vm->immediate); + drm_sched_entity_destroy(&vm->delayed); +} + +/** + * amdgpu_vm_generation - return the page table re-generation counter + * @adev: the amdgpu_device + * @vm: optional VM to check, might be NULL + * + * Returns a page table re-generation token to allow checking if submissions + * are still valid to use this VM. The VM parameter might be NULL in which case + * just the VRAM lost counter will be used. + */ +uint64_t amdgpu_vm_generation(struct amdgpu_device *adev, struct amdgpu_vm *vm) +{ + uint64_t result = (u64)atomic_read(&adev->vram_lost_counter) << 32; + + if (!vm) + return result; + + result += lower_32_bits(vm->generation); + /* Add one if the page tables will be re-generated on next CS */ + if (drm_sched_entity_error(&vm->delayed)) + ++result; + + return result; +} + /** - * amdgpu_vm_validate_pt_bos - validate the page table BOs + * amdgpu_vm_validate - validate evicted BOs tracked in the VM * * @adev: amdgpu device pointer * @vm: vm providing the BOs + * @ticket: optional reservation ticket used to reserve the VM * @validate: callback to do the validation * @param: parameter for the validation callback * - * Validate the page table BOs on command submission if neccessary. + * Validate the page table BOs and per-VM BOs on command submission if + * necessary. If a ticket is given, also try to validate evicted user queue + * BOs. They must already be reserved with the given ticket. * * Returns: * Validation result. */ -int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, - int (*validate)(void *p, struct amdgpu_bo *bo), - void *param) +int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm, + struct ww_acquire_ctx *ticket, + int (*validate)(void *p, struct amdgpu_bo *bo), + void *param) { + uint64_t new_vm_generation = amdgpu_vm_generation(adev, vm); struct amdgpu_vm_bo_base *bo_base; - struct amdgpu_bo *shadow; struct amdgpu_bo *bo; int r; + if (vm->generation != new_vm_generation) { + vm->generation = new_vm_generation; + amdgpu_vm_bo_reset_state_machine(vm); + amdgpu_vm_fini_entities(vm); + r = amdgpu_vm_init_entities(adev, vm); + if (r) + return r; + } + spin_lock(&vm->status_lock); while (!list_empty(&vm->evicted)) { bo_base = list_first_entry(&vm->evicted, @@ -381,16 +616,10 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, spin_unlock(&vm->status_lock); bo = bo_base->bo; - shadow = amdgpu_bo_shadowed(bo); r = validate(param, bo); if (r) return r; - if (shadow) { - r = validate(param, shadow); - if (r) - return r; - } if (bo->tbo.type != ttm_bo_type_kernel) { amdgpu_vm_bo_moved(bo_base); @@ -400,6 +629,23 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, } spin_lock(&vm->status_lock); } + while (ticket && !list_empty(&vm->evicted_user)) { + bo_base = list_first_entry(&vm->evicted_user, + struct amdgpu_vm_bo_base, + vm_status); + spin_unlock(&vm->status_lock); + + bo = bo_base->bo; + dma_resv_assert_held(bo->tbo.base.resv); + + r = validate(param, bo); + if (r) + return r; + + amdgpu_vm_bo_invalidated(bo_base); + + spin_lock(&vm->status_lock); + } spin_unlock(&vm->status_lock); amdgpu_vm_eviction_lock(vm); @@ -417,22 +663,31 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, * Check if all VM PDs/PTs are ready for updates * * Returns: - * True if VM is not evicting. + * True if VM is not evicting and all VM entities are not stopped */ bool amdgpu_vm_ready(struct amdgpu_vm *vm) { - bool empty; bool ret; + amdgpu_vm_assert_locked(vm); + amdgpu_vm_eviction_lock(vm); ret = !vm->evicting; amdgpu_vm_eviction_unlock(vm); spin_lock(&vm->status_lock); - empty = list_empty(&vm->evicted); + ret &= list_empty(&vm->evicted); spin_unlock(&vm->status_lock); - return ret && empty; + spin_lock(&vm->immediate.lock); + ret &= !vm->immediate.stopped; + spin_unlock(&vm->immediate.lock); + + spin_lock(&vm->delayed.lock); + ret &= !vm->delayed.stopped; + spin_unlock(&vm->delayed.lock); + + return ret; } /** @@ -483,7 +738,7 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, struct amdgpu_job *job) { struct amdgpu_device *adev = ring->adev; - unsigned vmhub = ring->funcs->vmhub; + unsigned vmhub = ring->vm_hub; struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; if (job->vmid == 0) @@ -517,16 +772,18 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync) { struct amdgpu_device *adev = ring->adev; - unsigned vmhub = ring->funcs->vmhub; + struct amdgpu_isolation *isolation = &adev->isolation[ring->xcp_id]; + unsigned vmhub = ring->vm_hub; struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; struct amdgpu_vmid *id = &id_mgr->ids[job->vmid]; bool spm_update_needed = job->spm_update_needed; bool gds_switch_needed = ring->funcs->emit_gds_switch && job->gds_switch_needed; bool vm_flush_needed = job->vm_needs_flush; - struct dma_fence *fence = NULL; + bool cleaner_shader_needed = false; bool pasid_mapping_needed = false; - unsigned patch_offset = 0; + struct dma_fence *fence = NULL; + unsigned int patch; int r; if (amdgpu_vmid_had_gpu_reset(adev, id)) { @@ -548,16 +805,26 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping && ring->funcs->emit_wreg; - if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync) + cleaner_shader_needed = job->run_cleaner_shader && + adev->gfx.enable_cleaner_shader && + ring->funcs->emit_cleaner_shader && job->base.s_fence && + &job->base.s_fence->scheduled == isolation->spearhead; + + if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync && + !cleaner_shader_needed) return 0; amdgpu_ring_ib_begin(ring); if (ring->funcs->init_cond_exec) - patch_offset = amdgpu_ring_init_cond_exec(ring); + patch = amdgpu_ring_init_cond_exec(ring, + ring->cond_exe_gpu_addr); if (need_pipe_sync) amdgpu_ring_emit_pipeline_sync(ring); + if (cleaner_shader_needed) + ring->funcs->emit_cleaner_shader(ring); + if (vm_flush_needed) { trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr); amdgpu_ring_emit_vm_flush(ring, job->vmid, job->vm_pd_addr); @@ -567,9 +834,9 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid); if (spm_update_needed && adev->gfx.rlc.funcs->update_spm_vmid) - adev->gfx.rlc.funcs->update_spm_vmid(adev, job->vmid); + adev->gfx.rlc.funcs->update_spm_vmid(adev, ring, job->vmid); - if (!ring->is_mes_queue && ring->funcs->emit_gds_switch && + if (ring->funcs->emit_gds_switch && gds_switch_needed) { amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base, job->gds_size, job->gws_base, @@ -577,10 +844,13 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, job->oa_size); } - if (vm_flush_needed || pasid_mapping_needed) { - r = amdgpu_fence_emit(ring, &fence, NULL, 0); + if (vm_flush_needed || pasid_mapping_needed || cleaner_shader_needed) { + r = amdgpu_fence_emit(ring, job->hw_vm_fence, 0); if (r) return r; + fence = &job->hw_vm_fence->base; + /* get a ref for the job */ + dma_fence_get(fence); } if (vm_flush_needed) { @@ -599,16 +869,28 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, id->pasid_mapping = dma_fence_get(fence); mutex_unlock(&id_mgr->lock); } + + /* + * Make sure that all other submissions wait for the cleaner shader to + * finish before we push them to the HW. + */ + if (cleaner_shader_needed) { + trace_amdgpu_cleaner_shader(ring, fence); + mutex_lock(&adev->enforce_isolation_mutex); + dma_fence_put(isolation->spearhead); + isolation->spearhead = dma_fence_get(fence); + mutex_unlock(&adev->enforce_isolation_mutex); + } dma_fence_put(fence); - if (ring->funcs->patch_cond_exec) - amdgpu_ring_patch_cond_exec(ring, patch_offset); + amdgpu_ring_patch_cond_exec(ring, patch); /* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */ if (ring->funcs->emit_switch_buffer) { amdgpu_ring_emit_switch_buffer(ring); amdgpu_ring_emit_switch_buffer(ring); } + amdgpu_ring_ib_end(ring); return 0; } @@ -690,6 +972,8 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, LIST_HEAD(relocated); int r, idx; + amdgpu_vm_assert_locked(vm); + spin_lock(&vm->status_lock); list_splice_init(&vm->relocated, &relocated); spin_unlock(&vm->status_lock); @@ -705,7 +989,8 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, params.vm = vm; params.immediate = immediate; - r = vm->update_funcs->prepare(¶ms, NULL, AMDGPU_SYNC_EXPLICIT); + r = vm->update_funcs->prepare(¶ms, NULL, + AMDGPU_KERNEL_JOB_ID_VM_UPDATE_PDES); if (r) goto error; @@ -746,7 +1031,7 @@ error: static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence, struct dma_fence_cb *cb) { - struct amdgpu_vm_tlb_seq_cb *tlb_cb; + struct amdgpu_vm_tlb_seq_struct *tlb_cb; tlb_cb = container_of(cb, typeof(*tlb_cb), cb); atomic64_inc(&tlb_cb->vm->tlb_seq); @@ -754,6 +1039,46 @@ static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence, } /** + * amdgpu_vm_tlb_flush - prepare TLB flush + * + * @params: parameters for update + * @fence: input fence to sync TLB flush with + * @tlb_cb: the callback structure + * + * Increments the tlb sequence to make sure that future CS execute a VM flush. + */ +static void +amdgpu_vm_tlb_flush(struct amdgpu_vm_update_params *params, + struct dma_fence **fence, + struct amdgpu_vm_tlb_seq_struct *tlb_cb) +{ + struct amdgpu_vm *vm = params->vm; + + tlb_cb->vm = vm; + if (!fence || !*fence) { + amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb); + return; + } + + if (!dma_fence_add_callback(*fence, &tlb_cb->cb, + amdgpu_vm_tlb_seq_cb)) { + dma_fence_put(vm->last_tlb_flush); + vm->last_tlb_flush = dma_fence_get(*fence); + } else { + amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb); + } + + /* Prepare a TLB flush fence to be attached to PTs */ + if (!params->unlocked) { + amdgpu_vm_tlb_fence_create(params->adev, vm, fence); + + /* Makes sure no PD/PT is freed before the flush */ + dma_resv_add_fence(vm->root.bo->tbo.base.resv, *fence, + DMA_RESV_USAGE_BOOKKEEP); + } +} + +/** * amdgpu_vm_update_range - update a range in the vm page table * * @adev: amdgpu_device pointer to use for commands @@ -761,7 +1086,8 @@ static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence, * @immediate: immediate submission in a page fault * @unlocked: unlocked invalidation during MM callback * @flush_tlb: trigger tlb invalidation after update completed - * @resv: fences we need to sync to + * @allow_override: change MTYPE for local NUMA nodes + * @sync: fences we need to sync to * @start: start of mapped range * @last: last mapped entry * @flags: flags for the entries @@ -778,15 +1104,15 @@ static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence, */ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, bool immediate, bool unlocked, bool flush_tlb, - struct dma_resv *resv, uint64_t start, uint64_t last, - uint64_t flags, uint64_t offset, uint64_t vram_base, + bool allow_override, struct amdgpu_sync *sync, + uint64_t start, uint64_t last, uint64_t flags, + uint64_t offset, uint64_t vram_base, struct ttm_resource *res, dma_addr_t *pages_addr, struct dma_fence **fence) { + struct amdgpu_vm_tlb_seq_struct *tlb_cb; struct amdgpu_vm_update_params params; - struct amdgpu_vm_tlb_seq_cb *tlb_cb; struct amdgpu_res_cursor cursor; - enum amdgpu_sync_mode sync_mode; int r, idx; if (!drm_dev_enter(adev_to_drm(adev), &idx)) @@ -794,20 +1120,20 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, tlb_cb = kmalloc(sizeof(*tlb_cb), GFP_KERNEL); if (!tlb_cb) { - r = -ENOMEM; - goto error_unlock; + drm_dev_exit(idx); + return -ENOMEM; } /* Vega20+XGMI where PTEs get inadvertently cached in L2 texture cache, * heavy-weight flush TLB unconditionally. */ flush_tlb |= adev->gmc.xgmi.num_physical_nodes && - adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0); + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 0); /* * On GFX8 and older any 8 PTE block with a valid bit set enters the TLB */ - flush_tlb |= adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 0, 0); + flush_tlb |= amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(9, 0, 0); memset(¶ms, 0, sizeof(params)); params.adev = adev; @@ -815,14 +1141,9 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, params.immediate = immediate; params.pages_addr = pages_addr; params.unlocked = unlocked; - - /* Implicitly sync to command submissions in the same VM before - * unmapping. Sync to moving fences before mapping. - */ - if (!(flags & AMDGPU_PTE_VALID)) - sync_mode = AMDGPU_SYNC_EQ_OWNER; - else - sync_mode = AMDGPU_SYNC_EXPLICIT; + params.needs_flush = flush_tlb; + params.allow_override = allow_override; + INIT_LIST_HEAD(¶ms.tlb_flush_waitlist); amdgpu_vm_eviction_lock(vm); if (vm->evicting) { @@ -838,7 +1159,8 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, dma_fence_put(tmp); } - r = vm->update_funcs->prepare(¶ms, resv, sync_mode); + r = vm->update_funcs->prepare(¶ms, sync, + AMDGPU_KERNEL_JOB_ID_VM_UPDATE_RANGE); if (r) goto error_free; @@ -867,6 +1189,8 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, pages_addr[idx - 1] + PAGE_SIZE)) break; } + if (!contiguous) + count--; num_entries = count * AMDGPU_GPU_PAGES_IN_CPU_PAGE; } @@ -879,7 +1203,7 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, params.pages_addr = NULL; } - } else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) { + } else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT_FLAG(adev))) { addr = vram_base + cursor.start; } else { addr = 0; @@ -895,73 +1219,31 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, } r = vm->update_funcs->commit(¶ms, fence); + if (r) + goto error_free; - if (flush_tlb || params.table_freed) { - tlb_cb->vm = vm; - if (fence && *fence && - !dma_fence_add_callback(*fence, &tlb_cb->cb, - amdgpu_vm_tlb_seq_cb)) { - dma_fence_put(vm->last_tlb_flush); - vm->last_tlb_flush = dma_fence_get(*fence); - } else { - amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb); - } + if (params.needs_flush) { + amdgpu_vm_tlb_flush(¶ms, fence, tlb_cb); tlb_cb = NULL; } + amdgpu_vm_pt_free_list(adev, ¶ms); + error_free: kfree(tlb_cb); - -error_unlock: amdgpu_vm_eviction_unlock(vm); drm_dev_exit(idx); return r; } -void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem, - uint64_t *gtt_mem, uint64_t *cpu_mem) +void amdgpu_vm_get_memory(struct amdgpu_vm *vm, + struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM]) { - struct amdgpu_bo_va *bo_va, *tmp; - spin_lock(&vm->status_lock); - list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status) { - if (!bo_va->base.bo) - continue; - amdgpu_bo_get_memory(bo_va->base.bo, vram_mem, - gtt_mem, cpu_mem); - } - list_for_each_entry_safe(bo_va, tmp, &vm->evicted, base.vm_status) { - if (!bo_va->base.bo) - continue; - amdgpu_bo_get_memory(bo_va->base.bo, vram_mem, - gtt_mem, cpu_mem); - } - list_for_each_entry_safe(bo_va, tmp, &vm->relocated, base.vm_status) { - if (!bo_va->base.bo) - continue; - amdgpu_bo_get_memory(bo_va->base.bo, vram_mem, - gtt_mem, cpu_mem); - } - list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) { - if (!bo_va->base.bo) - continue; - amdgpu_bo_get_memory(bo_va->base.bo, vram_mem, - gtt_mem, cpu_mem); - } - list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, base.vm_status) { - if (!bo_va->base.bo) - continue; - amdgpu_bo_get_memory(bo_va->base.bo, vram_mem, - gtt_mem, cpu_mem); - } - list_for_each_entry_safe(bo_va, tmp, &vm->done, base.vm_status) { - if (!bo_va->base.bo) - continue; - amdgpu_bo_get_memory(bo_va->base.bo, vram_mem, - gtt_mem, cpu_mem); - } + memcpy(stats, vm->stats, sizeof(*stats) * __AMDGPU_PL_NUM); spin_unlock(&vm->status_lock); } + /** * amdgpu_vm_bo_update - update all BO mappings in the vm page table * @@ -980,34 +1262,59 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, struct amdgpu_bo *bo = bo_va->base.bo; struct amdgpu_vm *vm = bo_va->base.vm; struct amdgpu_bo_va_mapping *mapping; + struct dma_fence **last_update; dma_addr_t *pages_addr = NULL; struct ttm_resource *mem; - struct dma_fence **last_update; + struct amdgpu_sync sync; bool flush_tlb = clear; - struct dma_resv *resv; uint64_t vram_base; uint64_t flags; + bool uncached; int r; - if (clear || !bo) { + amdgpu_sync_create(&sync); + if (clear) { + mem = NULL; + + /* Implicitly sync to command submissions in the same VM before + * unmapping. + */ + r = amdgpu_sync_resv(adev, &sync, vm->root.bo->tbo.base.resv, + AMDGPU_SYNC_EQ_OWNER, vm); + if (r) + goto error_free; + if (bo) { + r = amdgpu_sync_kfd(&sync, bo->tbo.base.resv); + if (r) + goto error_free; + } + } else if (!bo) { mem = NULL; - resv = vm->root.bo->tbo.base.resv; + + /* PRT map operations don't need to sync to anything. */ + } else { struct drm_gem_object *obj = &bo->tbo.base; - resv = bo->tbo.base.resv; - if (obj->import_attach && bo_va->is_xgmi) { + if (drm_gem_is_imported(obj) && bo_va->is_xgmi) { struct dma_buf *dma_buf = obj->import_attach->dmabuf; struct drm_gem_object *gobj = dma_buf->priv; struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj); - if (abo->tbo.resource->mem_type == TTM_PL_VRAM) + if (abo->tbo.resource && + abo->tbo.resource->mem_type == TTM_PL_VRAM) bo = gem_to_amdgpu_bo(gobj); } mem = bo->tbo.resource; - if (mem->mem_type == TTM_PL_TT || - mem->mem_type == AMDGPU_PL_PREEMPT) + if (mem && (mem->mem_type == TTM_PL_TT || + mem->mem_type == AMDGPU_PL_PREEMPT)) pages_addr = bo->tbo.ttm->dma_address; + + /* Implicitly sync to moving fences before mapping anything */ + r = amdgpu_sync_resv(adev, &sync, bo->tbo.base.resv, + AMDGPU_SYNC_EXPLICIT, vm); + if (r) + goto error_free; } if (bo) { @@ -1020,13 +1327,14 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, bo_adev = amdgpu_ttm_adev(bo->tbo.bdev); vram_base = bo_adev->vm_manager.vram_base_offset; + uncached = (bo->flags & AMDGPU_GEM_CREATE_UNCACHED) != 0; } else { flags = 0x0; vram_base = 0; + uncached = false; } - if (clear || (bo && bo->tbo.base.resv == - vm->root.bo->tbo.base.resv)) + if (clear || amdgpu_vm_is_bo_always_valid(vm, bo)) last_update = &vm->last_update; else last_update = &bo_va->last_pt_update; @@ -1045,34 +1353,34 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here * but in case of something, we filter the flags in first place */ - if (!(mapping->flags & AMDGPU_PTE_READABLE)) + if (!(mapping->flags & AMDGPU_VM_PAGE_READABLE)) update_flags &= ~AMDGPU_PTE_READABLE; - if (!(mapping->flags & AMDGPU_PTE_WRITEABLE)) + if (!(mapping->flags & AMDGPU_VM_PAGE_WRITEABLE)) update_flags &= ~AMDGPU_PTE_WRITEABLE; /* Apply ASIC specific mapping flags */ - amdgpu_gmc_get_vm_pte(adev, mapping, &update_flags); + amdgpu_gmc_get_vm_pte(adev, vm, bo, mapping->flags, + &update_flags); trace_amdgpu_vm_bo_update(mapping); r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb, - resv, mapping->start, mapping->last, - update_flags, mapping->offset, - vram_base, mem, pages_addr, - last_update); + !uncached, &sync, mapping->start, + mapping->last, update_flags, + mapping->offset, vram_base, mem, + pages_addr, last_update); if (r) - return r; + goto error_free; } /* If the BO is not in its preferred location add it back to * the evicted list so that it gets validated again on the * next command submission. */ - if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv) { - uint32_t mem_type = bo->tbo.resource->mem_type; - - if (!(bo->preferred_domains & - amdgpu_mem_type_to_domain(mem_type))) + if (amdgpu_vm_is_bo_always_valid(vm, bo)) { + if (bo->tbo.resource && + !(bo->preferred_domains & + amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type))) amdgpu_vm_bo_evicted(&bo_va->base); else amdgpu_vm_bo_idle(&bo_va->base); @@ -1089,7 +1397,9 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, trace_amdgpu_vm_bo_mapping(mapping); } - return 0; +error_free: + amdgpu_sync_free(&sync); + return r; } /** @@ -1191,7 +1501,7 @@ static void amdgpu_vm_free_mapping(struct amdgpu_device *adev, struct amdgpu_bo_va_mapping *mapping, struct dma_fence *fence) { - if (mapping->flags & AMDGPU_PTE_PRT) + if (mapping->flags & AMDGPU_VM_PAGE_PRT) amdgpu_vm_add_prt_cb(adev, fence); kfree(mapping); } @@ -1236,29 +1546,34 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct dma_fence **fence) { - struct dma_resv *resv = vm->root.bo->tbo.base.resv; struct amdgpu_bo_va_mapping *mapping; - uint64_t init_pte_value = 0; struct dma_fence *f = NULL; + struct amdgpu_sync sync; int r; + + /* + * Implicitly sync to command submissions in the same VM before + * unmapping. + */ + amdgpu_sync_create(&sync); + r = amdgpu_sync_resv(adev, &sync, vm->root.bo->tbo.base.resv, + AMDGPU_SYNC_EQ_OWNER, vm); + if (r) + goto error_free; + while (!list_empty(&vm->freed)) { mapping = list_first_entry(&vm->freed, struct amdgpu_bo_va_mapping, list); list_del(&mapping->list); - if (vm->pte_support_ats && - mapping->start < AMDGPU_GMC_HOLE_START) - init_pte_value = AMDGPU_PTE_DEFAULT_ATC; - - r = amdgpu_vm_update_range(adev, vm, false, false, true, resv, - mapping->start, mapping->last, - init_pte_value, 0, 0, NULL, NULL, - &f); + r = amdgpu_vm_update_range(adev, vm, false, false, true, false, + &sync, mapping->start, mapping->last, + 0, 0, 0, NULL, NULL, &f); amdgpu_vm_free_mapping(adev, vm, mapping, f); if (r) { dma_fence_put(f); - return r; + goto error_free; } } @@ -1269,7 +1584,9 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, dma_fence_put(f); } - return 0; +error_free: + amdgpu_sync_free(&sync); + return r; } @@ -1278,6 +1595,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, * * @adev: amdgpu_device pointer * @vm: requested vm + * @ticket: optional reservation ticket used to reserve the VM * * Make sure all BOs which are moved are updated in the PTs. * @@ -1287,11 +1605,12 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, * PTs have to be reserved! */ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, - struct amdgpu_vm *vm) + struct amdgpu_vm *vm, + struct ww_acquire_ctx *ticket) { struct amdgpu_bo_va *bo_va; struct dma_resv *resv; - bool clear; + bool clear, unlock; int r; spin_lock(&vm->status_lock); @@ -1314,18 +1633,35 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, spin_unlock(&vm->status_lock); /* Try to reserve the BO to avoid clearing its ptes */ - if (!amdgpu_vm_debug && dma_resv_trylock(resv)) + if (!adev->debug_vm && dma_resv_trylock(resv)) { + clear = false; + unlock = true; + /* The caller is already holding the reservation lock */ + } else if (ticket && dma_resv_locking_ctx(resv) == ticket) { clear = false; + unlock = false; /* Somebody else is using the BO right now */ - else + } else { clear = true; + unlock = false; + } r = amdgpu_vm_bo_update(adev, bo_va, clear); + + if (unlock) + dma_resv_unlock(resv); if (r) return r; - if (!clear) - dma_resv_unlock(resv); + /* Remember evicted DMABuf imports in compute VMs for later + * validation + */ + if (vm->is_compute_context && + drm_gem_is_imported(&bo_va->base.bo->tbo.base) && + (!bo_va->base.bo->tbo.resource || + bo_va->base.bo->tbo.resource->mem_type == TTM_PL_SYSTEM)) + amdgpu_vm_bo_evicted_user(&bo_va->base); + spin_lock(&vm->status_lock); } spin_unlock(&vm->status_lock); @@ -1334,6 +1670,51 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, } /** + * amdgpu_vm_flush_compute_tlb - Flush TLB on compute VM + * + * @adev: amdgpu_device pointer + * @vm: requested vm + * @flush_type: flush type + * @xcc_mask: mask of XCCs that belong to the compute partition in need of a TLB flush. + * + * Flush TLB if needed for a compute VM. + * + * Returns: + * 0 for success. + */ +int amdgpu_vm_flush_compute_tlb(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + uint32_t flush_type, + uint32_t xcc_mask) +{ + uint64_t tlb_seq = amdgpu_vm_tlb_seq(vm); + bool all_hub = false; + int xcc = 0, r = 0; + + WARN_ON_ONCE(!vm->is_compute_context); + + /* + * It can be that we race and lose here, but that is extremely unlikely + * and the worst thing which could happen is that we flush the changes + * into the TLB once more which is harmless. + */ + if (atomic64_xchg(&vm->kfd_last_flushed_seq, tlb_seq) == tlb_seq) + return 0; + + if (adev->family == AMDGPU_FAMILY_AI || + adev->family == AMDGPU_FAMILY_RV) + all_hub = true; + + for_each_inst(xcc, xcc_mask) { + r = amdgpu_gmc_flush_gpu_tlb_pasid(adev, vm->pasid, flush_type, + all_hub, xcc); + if (r) + break; + } + return r; +} + +/** * amdgpu_vm_bo_add - add a bo to a specific vm * * @adev: amdgpu_device pointer @@ -1361,6 +1742,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, amdgpu_vm_bo_base_init(&bo_va->base, vm, bo); bo_va->ref_count = 1; + bo_va->last_pt_update = dma_fence_get_stub(); INIT_LIST_HEAD(&bo_va->valids); INIT_LIST_HEAD(&bo_va->invalids); @@ -1398,16 +1780,46 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev, list_add(&mapping->list, &bo_va->invalids); amdgpu_vm_it_insert(mapping, &vm->va); - if (mapping->flags & AMDGPU_PTE_PRT) + if (mapping->flags & AMDGPU_VM_PAGE_PRT) amdgpu_vm_prt_get(adev); - if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv && - !bo_va->base.moved) { + if (amdgpu_vm_is_bo_always_valid(vm, bo) && !bo_va->base.moved) amdgpu_vm_bo_moved(&bo_va->base); - } + trace_amdgpu_vm_bo_map(bo_va, mapping); } +/* Validate operation parameters to prevent potential abuse */ +static int amdgpu_vm_verify_parameters(struct amdgpu_device *adev, + struct amdgpu_bo *bo, + uint64_t saddr, + uint64_t offset, + uint64_t size) +{ + uint64_t tmp, lpfn; + + if (saddr & AMDGPU_GPU_PAGE_MASK + || offset & AMDGPU_GPU_PAGE_MASK + || size & AMDGPU_GPU_PAGE_MASK) + return -EINVAL; + + if (check_add_overflow(saddr, size, &tmp) + || check_add_overflow(offset, size, &tmp) + || size == 0 /* which also leads to end < begin */) + return -EINVAL; + + /* make sure object fit at this offset */ + if (bo && offset + size > amdgpu_bo_size(bo)) + return -EINVAL; + + /* Ensure last pfn not exceed max_pfn */ + lpfn = (saddr + size - 1) >> AMDGPU_GPU_PAGE_SHIFT; + if (lpfn >= adev->vm_manager.max_pfn) + return -EINVAL; + + return 0; +} + /** * amdgpu_vm_bo_map - map bo inside a vm * @@ -1428,27 +1840,20 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev, int amdgpu_vm_bo_map(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, uint64_t saddr, uint64_t offset, - uint64_t size, uint64_t flags) + uint64_t size, uint32_t flags) { struct amdgpu_bo_va_mapping *mapping, *tmp; struct amdgpu_bo *bo = bo_va->base.bo; struct amdgpu_vm *vm = bo_va->base.vm; uint64_t eaddr; + int r; - /* validate the parameters */ - if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || - size == 0 || size & ~PAGE_MASK) - return -EINVAL; - - /* make sure object fit at this offset */ - eaddr = saddr + size - 1; - if (saddr >= eaddr || - (bo && offset + size > amdgpu_bo_size(bo)) || - (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT)) - return -EINVAL; + r = amdgpu_vm_verify_parameters(adev, bo, saddr, offset, size); + if (r) + return r; saddr /= AMDGPU_GPU_PAGE_SIZE; - eaddr /= AMDGPU_GPU_PAGE_SIZE; + eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE; tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr); if (tmp) { @@ -1494,24 +1899,16 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, uint64_t saddr, uint64_t offset, - uint64_t size, uint64_t flags) + uint64_t size, uint32_t flags) { struct amdgpu_bo_va_mapping *mapping; struct amdgpu_bo *bo = bo_va->base.bo; uint64_t eaddr; int r; - /* validate the parameters */ - if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || - size == 0 || size & ~PAGE_MASK) - return -EINVAL; - - /* make sure object fit at this offset */ - eaddr = saddr + size - 1; - if (saddr >= eaddr || - (bo && offset + size > amdgpu_bo_size(bo)) || - (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT)) - return -EINVAL; + r = amdgpu_vm_verify_parameters(adev, bo, saddr, offset, size); + if (r) + return r; /* Allocate all the needed memory */ mapping = kmalloc(sizeof(*mapping), GFP_KERNEL); @@ -1525,7 +1922,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, } saddr /= AMDGPU_GPU_PAGE_SIZE; - eaddr /= AMDGPU_GPU_PAGE_SIZE; + eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE; mapping->start = saddr; mapping->last = eaddr; @@ -1558,6 +1955,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, struct amdgpu_bo_va_mapping *mapping; struct amdgpu_vm *vm = bo_va->base.vm; bool valid = true; + int r; saddr /= AMDGPU_GPU_PAGE_SIZE; @@ -1578,6 +1976,17 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, return -ENOENT; } + /* It's unlikely to happen that the mapping userq hasn't been idled + * during user requests GEM unmap IOCTL except for forcing the unmap + * from user space. + */ + if (unlikely(atomic_read(&bo_va->userq_va_mapped) > 0)) { + r = amdgpu_userq_gem_va_unmap_validate(adev, mapping, saddr); + if (unlikely(r == -EBUSY)) + dev_warn_once(adev->dev, + "Attempt to unmap an active userq buffer\n"); + } + list_del(&mapping->list); amdgpu_vm_it_remove(mapping, &vm->va); mapping->bo_va = NULL; @@ -1612,10 +2021,14 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, struct amdgpu_bo_va_mapping *before, *after, *tmp, *next; LIST_HEAD(removed); uint64_t eaddr; + int r; + + r = amdgpu_vm_verify_parameters(adev, NULL, saddr, 0, size); + if (r) + return r; - eaddr = saddr + size - 1; saddr /= AMDGPU_GPU_PAGE_SIZE; - eaddr /= AMDGPU_GPU_PAGE_SIZE; + eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE; /* Allocate all the needed memory */ before = kzalloc(sizeof(*before), GFP_KERNEL); @@ -1677,18 +2090,30 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, /* Insert partial mapping before the range */ if (!list_empty(&before->list)) { + struct amdgpu_bo *bo = before->bo_va->base.bo; + amdgpu_vm_it_insert(before, &vm->va); - if (before->flags & AMDGPU_PTE_PRT) + if (before->flags & AMDGPU_VM_PAGE_PRT) amdgpu_vm_prt_get(adev); + + if (amdgpu_vm_is_bo_always_valid(vm, bo) && + !before->bo_va->base.moved) + amdgpu_vm_bo_moved(&before->bo_va->base); } else { kfree(before); } /* Insert partial mapping after the range */ if (!list_empty(&after->list)) { + struct amdgpu_bo *bo = after->bo_va->base.bo; + amdgpu_vm_it_insert(after, &vm->va); - if (after->flags & AMDGPU_PTE_PRT) + if (after->flags & AMDGPU_VM_PAGE_PRT) amdgpu_vm_prt_get(adev); + + if (amdgpu_vm_is_bo_always_valid(vm, bo) && + !after->bo_va->base.moved) + amdgpu_vm_bo_moved(&after->bo_va->base); } else { kfree(after); } @@ -1766,7 +2191,7 @@ void amdgpu_vm_bo_del(struct amdgpu_device *adev, if (bo) { dma_resv_assert_held(bo->tbo.base.resv); - if (bo->tbo.base.resv == vm->root.bo->tbo.base.resv) + if (amdgpu_vm_is_bo_always_valid(vm, bo)) ttm_bo_set_bulk_move(&bo->tbo, NULL); for (base = &bo_va->base.bo->vm_bo; *base; @@ -1774,6 +2199,7 @@ void amdgpu_vm_bo_del(struct amdgpu_device *adev, if (*base != &bo_va->base) continue; + amdgpu_vm_update_stats(*base, bo->tbo.resource, -1); *base = bo_va->base.next; break; } @@ -1842,25 +2268,19 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo) /** * amdgpu_vm_bo_invalidate - mark the bo as invalid * - * @adev: amdgpu_device pointer * @bo: amdgpu buffer object * @evicted: is the BO evicted * * Mark @bo as invalid. */ -void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, - struct amdgpu_bo *bo, bool evicted) +void amdgpu_vm_bo_invalidate(struct amdgpu_bo *bo, bool evicted) { struct amdgpu_vm_bo_base *bo_base; - /* shadow bo doesn't have bo base, its validation needs its parent */ - if (bo->parent && (amdgpu_bo_shadowed(bo->parent) == bo)) - bo = bo->parent; - for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) { struct amdgpu_vm *vm = bo_base->vm; - if (evicted && bo->tbo.base.resv == vm->root.bo->tbo.base.resv) { + if (evicted && amdgpu_vm_is_bo_always_valid(vm, bo)) { amdgpu_vm_bo_evicted(bo_base); continue; } @@ -1871,7 +2291,7 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, if (bo->tbo.type == ttm_bo_type_kernel) amdgpu_vm_bo_relocated(bo_base); - else if (bo->tbo.base.resv == vm->root.bo->tbo.base.resv) + else if (amdgpu_vm_is_bo_always_valid(vm, bo)) amdgpu_vm_bo_moved(bo_base); else amdgpu_vm_bo_invalidated(bo_base); @@ -1879,6 +2299,32 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, } /** + * amdgpu_vm_bo_move - handle BO move + * + * @bo: amdgpu buffer object + * @new_mem: the new placement of the BO move + * @evicted: is the BO evicted + * + * Update the memory stats for the new placement and mark @bo as invalid. + */ +void amdgpu_vm_bo_move(struct amdgpu_bo *bo, struct ttm_resource *new_mem, + bool evicted) +{ + struct amdgpu_vm_bo_base *bo_base; + + for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) { + struct amdgpu_vm *vm = bo_base->vm; + + spin_lock(&vm->status_lock); + amdgpu_vm_update_stats_locked(bo_base, bo->tbo.resource, -1); + amdgpu_vm_update_stats_locked(bo_base, new_mem, +1); + spin_unlock(&vm->status_lock); + } + + amdgpu_vm_bo_invalidate(bo, evicted); +} + +/** * amdgpu_vm_get_block_size - calculate VM page table size as power of two * * @vm_size: VM size @@ -1948,7 +2394,7 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size, phys_ram_gb = ((uint64_t)si.totalram * si.mem_unit + (1 << 30) - 1) >> 30; vm_size = roundup_pow_of_two( - min(max(phys_ram_gb * 3, min_vm_size), max_size)); + clamp(phys_ram_gb * 3, min_vm_size, max_size)); } adev->vm_manager.max_pfn = (uint64_t)vm_size << 18; @@ -1957,7 +2403,7 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size, if (amdgpu_vm_block_size != -1) tmp >>= amdgpu_vm_block_size - 9; tmp = DIV_ROUND_UP(fls64(tmp) - 1, 9) - 1; - adev->vm_manager.num_level = min(max_level, (unsigned)tmp); + adev->vm_manager.num_level = min_t(unsigned int, max_level, tmp); switch (adev->vm_manager.num_level) { case 3: adev->vm_manager.root_level = AMDGPU_VM_PDB2; @@ -1987,10 +2433,11 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size, else adev->vm_manager.fragment_size = amdgpu_vm_fragment_size; - DRM_INFO("vm size is %u GB, %u levels, block size is %u-bit, fragment size is %u-bit\n", - vm_size, adev->vm_manager.num_level + 1, - adev->vm_manager.block_size, - adev->vm_manager.fragment_size); + dev_info( + adev->dev, + "vm size is %u GB, %u levels, block size is %u-bit, fragment size is %u-bit\n", + vm_size, adev->vm_manager.num_level + 1, + adev->vm_manager.block_size, adev->vm_manager.fragment_size); } /** @@ -2001,13 +2448,114 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size, */ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout) { - timeout = dma_resv_wait_timeout(vm->root.bo->tbo.base.resv, - DMA_RESV_USAGE_BOOKKEEP, - true, timeout); + timeout = drm_sched_entity_flush(&vm->immediate, timeout); if (timeout <= 0) return timeout; - return dma_fence_wait_timeout(vm->last_unlocked, true, timeout); + return drm_sched_entity_flush(&vm->delayed, timeout); +} + +static void amdgpu_vm_destroy_task_info(struct kref *kref) +{ + struct amdgpu_task_info *ti = container_of(kref, struct amdgpu_task_info, refcount); + + kfree(ti); +} + +static inline struct amdgpu_vm * +amdgpu_vm_get_vm_from_pasid(struct amdgpu_device *adev, u32 pasid) +{ + struct amdgpu_vm *vm; + unsigned long flags; + + xa_lock_irqsave(&adev->vm_manager.pasids, flags); + vm = xa_load(&adev->vm_manager.pasids, pasid); + xa_unlock_irqrestore(&adev->vm_manager.pasids, flags); + + return vm; +} + +/** + * amdgpu_vm_put_task_info - reference down the vm task_info ptr + * + * @task_info: task_info struct under discussion. + * + * frees the vm task_info ptr at the last put + */ +void amdgpu_vm_put_task_info(struct amdgpu_task_info *task_info) +{ + if (task_info) + kref_put(&task_info->refcount, amdgpu_vm_destroy_task_info); +} + +/** + * amdgpu_vm_get_task_info_vm - Extracts task info for a vm. + * + * @vm: VM to get info from + * + * Returns the reference counted task_info structure, which must be + * referenced down with amdgpu_vm_put_task_info. + */ +struct amdgpu_task_info * +amdgpu_vm_get_task_info_vm(struct amdgpu_vm *vm) +{ + struct amdgpu_task_info *ti = NULL; + + if (vm) { + ti = vm->task_info; + kref_get(&vm->task_info->refcount); + } + + return ti; +} + +/** + * amdgpu_vm_get_task_info_pasid - Extracts task info for a PASID. + * + * @adev: drm device pointer + * @pasid: PASID identifier for VM + * + * Returns the reference counted task_info structure, which must be + * referenced down with amdgpu_vm_put_task_info. + */ +struct amdgpu_task_info * +amdgpu_vm_get_task_info_pasid(struct amdgpu_device *adev, u32 pasid) +{ + return amdgpu_vm_get_task_info_vm( + amdgpu_vm_get_vm_from_pasid(adev, pasid)); +} + +static int amdgpu_vm_create_task_info(struct amdgpu_vm *vm) +{ + vm->task_info = kzalloc(sizeof(struct amdgpu_task_info), GFP_KERNEL); + if (!vm->task_info) + return -ENOMEM; + + kref_init(&vm->task_info->refcount); + return 0; +} + +/** + * amdgpu_vm_set_task_info - Sets VMs task info. + * + * @vm: vm for which to set the info + */ +void amdgpu_vm_set_task_info(struct amdgpu_vm *vm) +{ + if (!vm->task_info) + return; + + if (vm->task_info->task.pid == current->pid) + return; + + vm->task_info->task.pid = current->pid; + get_task_comm(vm->task_info->task.comm, current); + + if (current->group_leader->mm != current->mm) + return; + + vm->task_info->tgid = current->group_leader->pid; + get_task_comm(vm->task_info->process_name, current->group_leader); } /** @@ -2015,13 +2563,16 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout) * * @adev: amdgpu_device pointer * @vm: requested vm + * @xcp_id: GPU partition selection id + * @pasid: the pasid the VM is using on this GPU * * Init @vm fields. * * Returns: * 0 for success, error for failure. */ -int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) +int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, + int32_t xcp_id, uint32_t pasid) { struct amdgpu_bo *root_bo; struct amdgpu_bo_vm *root; @@ -2031,6 +2582,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) vm->reserved_vmid[i] = NULL; INIT_LIST_HEAD(&vm->evicted); + INIT_LIST_HEAD(&vm->evicted_user); INIT_LIST_HEAD(&vm->relocated); INIT_LIST_HEAD(&vm->moved); INIT_LIST_HEAD(&vm->idle); @@ -2038,30 +2590,21 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) spin_lock_init(&vm->status_lock); INIT_LIST_HEAD(&vm->freed); INIT_LIST_HEAD(&vm->done); - INIT_LIST_HEAD(&vm->pt_freed); - INIT_WORK(&vm->pt_free_work, amdgpu_vm_pt_free_work); + INIT_KFIFO(vm->faults); - /* create scheduler entities for page table updates */ - r = drm_sched_entity_init(&vm->immediate, DRM_SCHED_PRIORITY_NORMAL, - adev->vm_manager.vm_pte_scheds, - adev->vm_manager.vm_pte_num_scheds, NULL); + r = amdgpu_vm_init_entities(adev, vm); if (r) return r; - r = drm_sched_entity_init(&vm->delayed, DRM_SCHED_PRIORITY_NORMAL, - adev->vm_manager.vm_pte_scheds, - adev->vm_manager.vm_pte_num_scheds, NULL); - if (r) - goto error_free_immediate; + ttm_lru_bulk_move_init(&vm->lru_bulk_move); - vm->pte_support_ats = false; vm->is_compute_context = false; vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & AMDGPU_VM_USE_CPU_FOR_GFX); - DRM_DEBUG_DRIVER("VM update mode is %s\n", - vm->use_cpu_for_update ? "CPU" : "SDMA"); + dev_dbg(adev->dev, "VM update mode is %s\n", + vm->use_cpu_for_update ? "CPU" : "SDMA"); WARN_ONCE((vm->use_cpu_for_update && !amdgpu_gmc_vram_full_visible(&adev->gmc)), "CPU update of VM recommended only for large BAR system\n"); @@ -2070,53 +2613,70 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) vm->update_funcs = &amdgpu_vm_cpu_funcs; else vm->update_funcs = &amdgpu_vm_sdma_funcs; - vm->last_update = NULL; + + vm->last_update = dma_fence_get_stub(); vm->last_unlocked = dma_fence_get_stub(); vm->last_tlb_flush = dma_fence_get_stub(); + vm->generation = amdgpu_vm_generation(adev, NULL); mutex_init(&vm->eviction_lock); vm->evicting = false; + vm->tlb_fence_context = dma_fence_context_alloc(1); r = amdgpu_vm_pt_create(adev, vm, adev->vm_manager.root_level, - false, &root); + false, &root, xcp_id); if (r) goto error_free_delayed; - root_bo = &root->bo; + + root_bo = amdgpu_bo_ref(&root->bo); r = amdgpu_bo_reserve(root_bo, true); - if (r) - goto error_free_root; + if (r) { + amdgpu_bo_unref(&root_bo); + goto error_free_delayed; + } + amdgpu_vm_bo_base_init(&vm->root, vm, root_bo); r = dma_resv_reserve_fences(root_bo->tbo.base.resv, 1); if (r) - goto error_unreserve; - - amdgpu_vm_bo_base_init(&vm->root, vm, root_bo); + goto error_free_root; r = amdgpu_vm_pt_clear(adev, vm, root, false); if (r) - goto error_unreserve; + goto error_free_root; - amdgpu_bo_unreserve(vm->root.bo); + r = amdgpu_vm_create_task_info(vm); + if (r) + dev_dbg(adev->dev, "Failed to create task info for VM\n"); - INIT_KFIFO(vm->faults); + /* Store new PASID in XArray (if non-zero) */ + if (pasid != 0) { + r = xa_err(xa_store_irq(&adev->vm_manager.pasids, pasid, vm, GFP_KERNEL)); + if (r < 0) + goto error_free_root; - return 0; + vm->pasid = pasid; + } -error_unreserve: amdgpu_bo_unreserve(vm->root.bo); + amdgpu_bo_unref(&root_bo); + + return 0; error_free_root: - amdgpu_bo_unref(&root->shadow); + /* If PASID was partially set, erase it from XArray before failing */ + if (vm->pasid != 0) { + xa_erase_irq(&adev->vm_manager.pasids, vm->pasid); + vm->pasid = 0; + } + amdgpu_vm_pt_free_root(adev, vm); + amdgpu_bo_unreserve(vm->root.bo); amdgpu_bo_unref(&root_bo); - vm->root.bo = NULL; error_free_delayed: dma_fence_put(vm->last_tlb_flush); dma_fence_put(vm->last_unlocked); - drm_sched_entity_destroy(&vm->delayed); - -error_free_immediate: - drm_sched_entity_destroy(&vm->immediate); + ttm_lru_bulk_move_fini(&adev->mman.bdev, &vm->lru_bulk_move); + amdgpu_vm_fini_entities(vm); return r; } @@ -2142,35 +2702,17 @@ error_free_immediate: */ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) { - bool pte_support_ats = (adev->asic_type == CHIP_RAVEN); int r; r = amdgpu_bo_reserve(vm->root.bo, true); if (r) return r; - /* Sanity checks */ - if (!amdgpu_vm_pt_is_root_clean(adev, vm)) { - r = -EINVAL; - goto unreserve_bo; - } - - /* Check if PD needs to be reinitialized and do it before - * changing any other state, in case it fails. - */ - if (pte_support_ats != vm->pte_support_ats) { - vm->pte_support_ats = pte_support_ats; - r = amdgpu_vm_pt_clear(adev, vm, to_amdgpu_bo_vm(vm->root.bo), - false); - if (r) - goto unreserve_bo; - } - /* Update VM state */ vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & AMDGPU_VM_USE_CPU_FOR_COMPUTE); - DRM_DEBUG_DRIVER("VM update mode is %s\n", - vm->use_cpu_for_update ? "CPU" : "SDMA"); + dev_dbg(adev->dev, "VM update mode is %s\n", + vm->use_cpu_for_update ? "CPU" : "SDMA"); WARN_ONCE((vm->use_cpu_for_update && !amdgpu_gmc_vram_full_visible(&adev->gmc)), "CPU update of VM recommended only for large BAR system\n"); @@ -2183,43 +2725,31 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) goto unreserve_bo; vm->update_funcs = &amdgpu_vm_cpu_funcs; + r = amdgpu_vm_pt_map_tables(adev, vm); + if (r) + goto unreserve_bo; + } else { vm->update_funcs = &amdgpu_vm_sdma_funcs; } - /* - * Make sure root PD gets mapped. As vm_update_mode could be changed - * when turning a GFX VM into a compute VM. - */ - r = vm->update_funcs->map_table(to_amdgpu_bo_vm(vm->root.bo)); - if (r) - goto unreserve_bo; dma_fence_put(vm->last_update); - vm->last_update = NULL; + vm->last_update = dma_fence_get_stub(); vm->is_compute_context = true; - /* Free the shadow bo for compute VM */ - amdgpu_bo_unref(&to_amdgpu_bo_vm(vm->root.bo)->shadow); - - goto unreserve_bo; - unreserve_bo: amdgpu_bo_unreserve(vm->root.bo); return r; } -/** - * amdgpu_vm_release_compute - release a compute vm - * @adev: amdgpu_device pointer - * @vm: a vm turned into compute vm by calling amdgpu_vm_make_compute - * - * This is a correspondant of amdgpu_vm_make_compute. It decouples compute - * pasid from vm. Compute should stop use of vm after this call. - */ -void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) +static int amdgpu_vm_stats_is_zero(struct amdgpu_vm *vm) { - amdgpu_vm_set_pasid(adev, vm, 0); - vm->is_compute_context = false; + for (int i = 0; i < __AMDGPU_PL_NUM; ++i) { + if (!(drm_memory_stats_is_zero(&vm->stats[i].drm) && + vm->stats[i].evicted == 0)) + return false; + } + return true; } /** @@ -2241,11 +2771,13 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm); - flush_work(&vm->pt_free_work); - root = amdgpu_bo_ref(vm->root.bo); amdgpu_bo_reserve(root, true); - amdgpu_vm_set_pasid(adev, vm, 0); + /* Remove PASID mapping before destroying VM */ + if (vm->pasid != 0) { + xa_erase_irq(&adev->vm_manager.pasids, vm->pasid); + vm->pasid = 0; + } dma_fence_wait(vm->last_unlocked, false); dma_fence_put(vm->last_unlocked); dma_fence_wait(vm->last_tlb_flush, false); @@ -2255,7 +2787,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) dma_fence_put(vm->last_tlb_flush); list_for_each_entry_safe(mapping, tmp, &vm->freed, list) { - if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) { + if (mapping->flags & AMDGPU_VM_PAGE_PRT && prt_fini_needed) { amdgpu_vm_prt_fini(adev, vm); prt_fini_needed = false; } @@ -2269,8 +2801,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) amdgpu_bo_unref(&root); WARN_ON(vm->root.bo); - drm_sched_entity_destroy(&vm->immediate); - drm_sched_entity_destroy(&vm->delayed); + amdgpu_vm_fini_entities(vm); if (!RB_EMPTY_ROOT(&vm->va.rb_root)) { dev_err(adev->dev, "still active bo inside vm\n"); @@ -2285,8 +2816,22 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) } dma_fence_put(vm->last_update); - for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) + + for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) { amdgpu_vmid_free_reserved(adev, vm, i); + } + + ttm_lru_bulk_move_fini(&adev->mman.bdev, &vm->lru_bulk_move); + + if (!amdgpu_vm_stats_is_zero(vm)) { + struct amdgpu_task_info *ti = vm->task_info; + + dev_warn(adev->dev, + "VM memory stats for proc %s(%d) task %s(%d) is non-zero when fini\n", + ti->process_name, ti->task.pid, ti->task.comm, ti->tgid); + } + + amdgpu_vm_put_task_info(vm->task_info); } /** @@ -2298,8 +2843,6 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) */ void amdgpu_vm_manager_init(struct amdgpu_device *adev) { - unsigned i; - /* Concurrent flushes are only possible starting with Vega10 and * are broken on Navi10 and Navi14. */ @@ -2308,11 +2851,6 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) adev->asic_type == CHIP_NAVI14); amdgpu_vmid_mgr_init(adev); - adev->vm_manager.fence_context = - dma_fence_context_alloc(AMDGPU_MAX_RINGS); - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) - adev->vm_manager.seqno[i] = 0; - spin_lock_init(&adev->vm_manager.prt_lock); atomic_set(&adev->vm_manager.num_prt_users, 0); @@ -2369,18 +2907,18 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) union drm_amdgpu_vm *args = data; struct amdgpu_device *adev = drm_to_adev(dev); struct amdgpu_fpriv *fpriv = filp->driver_priv; - int r; + struct amdgpu_vm *vm = &fpriv->vm; + + /* No valid flags defined yet */ + if (args->in.flags) + return -EINVAL; switch (args->in.op) { case AMDGPU_VM_OP_RESERVE_VMID: /* We only have requirement to reserve vmid from gfxhub */ - r = amdgpu_vmid_alloc_reserved(adev, &fpriv->vm, - AMDGPU_GFXHUB_0); - if (r) - return r; - break; + return amdgpu_vmid_alloc_reserved(adev, vm, AMDGPU_GFXHUB(0)); case AMDGPU_VM_OP_UNRESERVE_VMID: - amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB_0); + amdgpu_vmid_free_reserved(adev, vm, AMDGPU_GFXHUB(0)); break; default: return -EINVAL; @@ -2390,51 +2928,13 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) } /** - * amdgpu_vm_get_task_info - Extracts task info for a PASID. - * - * @adev: drm device pointer - * @pasid: PASID identifier for VM - * @task_info: task_info to fill. - */ -void amdgpu_vm_get_task_info(struct amdgpu_device *adev, u32 pasid, - struct amdgpu_task_info *task_info) -{ - struct amdgpu_vm *vm; - unsigned long flags; - - xa_lock_irqsave(&adev->vm_manager.pasids, flags); - - vm = xa_load(&adev->vm_manager.pasids, pasid); - if (vm) - *task_info = vm->task_info; - - xa_unlock_irqrestore(&adev->vm_manager.pasids, flags); -} - -/** - * amdgpu_vm_set_task_info - Sets VMs task info. - * - * @vm: vm for which to set the info - */ -void amdgpu_vm_set_task_info(struct amdgpu_vm *vm) -{ - if (vm->task_info.pid) - return; - - vm->task_info.pid = current->pid; - get_task_comm(vm->task_info.task_name, current); - - if (current->group_leader->mm != current->mm) - return; - - vm->task_info.tgid = current->group_leader->pid; - get_task_comm(vm->task_info.process_name, current->group_leader); -} - -/** * amdgpu_vm_handle_fault - graceful handling of VM faults. * @adev: amdgpu device pointer * @pasid: PASID of the VM + * @ts: Timestamp of the fault + * @vmid: VMID, only used for GFX 9.4.3. + * @node_id: Node_id received in IH cookie. Only applicable for + * GFX 9.4.3. * @addr: Address of the fault * @write_fault: true is write fault, false is read fault * @@ -2442,7 +2942,8 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm) * shouldn't be reported any more. */ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, - uint64_t addr, bool write_fault) + u32 vmid, u32 node_id, uint64_t addr, uint64_t ts, + bool write_fault) { bool is_compute_context = false; struct amdgpu_bo *root; @@ -2466,8 +2967,8 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, addr /= AMDGPU_GPU_PAGE_SIZE; - if (is_compute_context && - !svm_range_restore_pages(adev, pasid, addr, write_fault)) { + if (is_compute_context && !svm_range_restore_pages(adev, pasid, vmid, + node_id, addr, ts, write_fault)) { amdgpu_bo_unref(&root); return true; } @@ -2492,7 +2993,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, /* Intentionally setting invalid PTE flag * combination to force a no-retry-fault */ - flags = AMDGPU_PTE_SNOOPED | AMDGPU_PTE_PRT; + flags = AMDGPU_VM_NORETRY_FLAGS; value = 0; } else if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) { /* Redirect the access to the dummy page */ @@ -2511,8 +3012,8 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, goto error_unlock; } - r = amdgpu_vm_update_range(adev, vm, true, false, false, NULL, addr, - addr, flags, value, 0, NULL, NULL, NULL); + r = amdgpu_vm_update_range(adev, vm, true, false, false, false, + NULL, addr, addr, flags, value, 0, NULL, NULL, NULL); if (r) goto error_unlock; @@ -2521,7 +3022,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, error_unlock: amdgpu_bo_unreserve(root); if (r < 0) - DRM_ERROR("Can't handle page fault (%d)\n", r); + dev_err(adev->dev, "Can't handle page fault (%d)\n", r); error_unref: amdgpu_bo_unref(&root); @@ -2555,6 +3056,8 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m) unsigned int total_done_objs = 0; unsigned int id = 0; + amdgpu_vm_assert_locked(vm); + spin_lock(&vm->status_lock); seq_puts(m, "\tIdle BOs:\n"); list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status) { @@ -2624,3 +3127,83 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m) total_done_objs); } #endif + +/** + * amdgpu_vm_update_fault_cache - update cached fault into. + * @adev: amdgpu device pointer + * @pasid: PASID of the VM + * @addr: Address of the fault + * @status: GPUVM fault status register + * @vmhub: which vmhub got the fault + * + * Cache the fault info for later use by userspace in debugging. + */ +void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev, + unsigned int pasid, + uint64_t addr, + uint32_t status, + unsigned int vmhub) +{ + struct amdgpu_vm *vm; + unsigned long flags; + + xa_lock_irqsave(&adev->vm_manager.pasids, flags); + + vm = xa_load(&adev->vm_manager.pasids, pasid); + /* Don't update the fault cache if status is 0. In the multiple + * fault case, subsequent faults will return a 0 status which is + * useless for userspace and replaces the useful fault status, so + * only update if status is non-0. + */ + if (vm && status) { + vm->fault_info.addr = addr; + vm->fault_info.status = status; + /* + * Update the fault information globally for later usage + * when vm could be stale or freed. + */ + adev->vm_manager.fault_info.addr = addr; + adev->vm_manager.fault_info.vmhub = vmhub; + adev->vm_manager.fault_info.status = status; + + if (AMDGPU_IS_GFXHUB(vmhub)) { + vm->fault_info.vmhub = AMDGPU_VMHUB_TYPE_GFX; + vm->fault_info.vmhub |= + (vmhub - AMDGPU_GFXHUB_START) << AMDGPU_VMHUB_IDX_SHIFT; + } else if (AMDGPU_IS_MMHUB0(vmhub)) { + vm->fault_info.vmhub = AMDGPU_VMHUB_TYPE_MM0; + vm->fault_info.vmhub |= + (vmhub - AMDGPU_MMHUB0_START) << AMDGPU_VMHUB_IDX_SHIFT; + } else if (AMDGPU_IS_MMHUB1(vmhub)) { + vm->fault_info.vmhub = AMDGPU_VMHUB_TYPE_MM1; + vm->fault_info.vmhub |= + (vmhub - AMDGPU_MMHUB1_START) << AMDGPU_VMHUB_IDX_SHIFT; + } else { + WARN_ONCE(1, "Invalid vmhub %u\n", vmhub); + } + } + xa_unlock_irqrestore(&adev->vm_manager.pasids, flags); +} + +/** + * amdgpu_vm_is_bo_always_valid - check if the BO is VM always valid + * + * @vm: VM to test against. + * @bo: BO to be tested. + * + * Returns true if the BO shares the dma_resv object with the root PD and is + * always guaranteed to be valid inside the VM. + */ +bool amdgpu_vm_is_bo_always_valid(struct amdgpu_vm *vm, struct amdgpu_bo *bo) +{ + return bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv; +} + +void amdgpu_vm_print_task_info(struct amdgpu_device *adev, + struct amdgpu_task_info *task_info) +{ + dev_err(adev->dev, + " Process %s pid %d thread %s pid %d\n", + task_info->process_name, task_info->tgid, + task_info->task.comm, task_info->task.pid); +} |
