From b1d128689f9c602a3dbea37b47a27a568d55754d Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 5 Jan 2018 10:25:57 -0500 Subject: drm/amdgpu: adjust HDP write queue flushing for tlb invalidation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Separate tlb invalidation and hdp flushing and move the HDP flush to the caller. Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 5afbc5e714d0..df0f99741b73 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -856,6 +856,7 @@ restart: if (vm->use_cpu_for_update) { /* Flush HDP */ mb(); + amdgpu_asic_flush_hdp(adev); amdgpu_gart_flush_gpu_tlb(adev, 0); } else if (params.ib->length_dw == 0) { amdgpu_job_free(job); @@ -1457,6 +1458,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, if (vm->use_cpu_for_update) { /* Flush HDP */ mb(); + amdgpu_asic_flush_hdp(adev); amdgpu_gart_flush_gpu_tlb(adev, 0); } -- cgit From 64b9342f31363eee93d0d3e1fa87622fe2929732 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 5 Jan 2018 10:33:48 -0500 Subject: drm/amdgpu: drop extra tlb invalidation in gpuvm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We only need to flush the HDP here, not invalidate the TLB. Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index df0f99741b73..114571f29c7a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -857,7 +857,6 @@ restart: /* Flush HDP */ mb(); amdgpu_asic_flush_hdp(adev); - amdgpu_gart_flush_gpu_tlb(adev, 0); } else if (params.ib->length_dw == 0) { amdgpu_job_free(job); } else { @@ -1459,7 +1458,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, /* Flush HDP */ mb(); amdgpu_asic_flush_hdp(adev); - amdgpu_gart_flush_gpu_tlb(adev, 0); } spin_lock(&vm->status_lock); -- cgit From 770d13b19fdf365a99e559f1d47f1380910a947d Mon Sep 17 00:00:00 2001 From: Christian König Date: Fri, 12 Jan 2018 14:52:22 +0100 Subject: drm/amdgpu: move struct amdgpu_mc into amdgpu_gmc.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit And rename it to amdgpu_gmc as well. Signed-off-by: Christian König Reviewed-by: Samuel Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 114571f29c7a..988ccb248b54 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -465,7 +465,7 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, static bool amdgpu_vm_is_large_bar(struct amdgpu_device *adev) { - return (adev->mc.real_vram_size == adev->mc.visible_vram_size); + return (adev->gmc.real_vram_size == adev->gmc.visible_vram_size); } /** -- cgit From 132f34e4b558488cc8d153a1d18833054a76e44c Mon Sep 17 00:00:00 2001 From: Christian König Date: Fri, 12 Jan 2018 15:26:08 +0100 Subject: drm/amdgpu: move struct gart_funcs into amdgpu_gmc.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit And rename it to struct gmc_funcs. Signed-off-by: Christian König Reviewed-by: Samuel Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 988ccb248b54..da634ae6ca8f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -679,8 +679,8 @@ static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params, value = params->pages_addr ? amdgpu_vm_map_gart(params->pages_addr, addr) : addr; - amdgpu_gart_set_pte_pde(params->adev, (void *)(uintptr_t)pe, - i, value, flags); + amdgpu_gmc_set_pte_pde(params->adev, (void *)(uintptr_t)pe, + i, value, flags); addr += incr; } } @@ -738,7 +738,7 @@ static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params, level += params->adev->vm_manager.root_level; pt = amdgpu_bo_gpu_offset(bo); flags = AMDGPU_PTE_VALID; - amdgpu_gart_get_vm_pde(params->adev, level, &pt, &flags); + amdgpu_gmc_get_vm_pde(params->adev, level, &pt, &flags); if (shadow) { pde = shadow_addr + (entry - parent->entries) * 8; params->func(params, pde, pt, 1, 0, flags); @@ -967,8 +967,7 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, } entry->huge = true; - amdgpu_gart_get_vm_pde(p->adev, AMDGPU_VM_PDB0, - &dst, &flags); + amdgpu_gmc_get_vm_pde(p->adev, AMDGPU_VM_PDB0, &dst, &flags); if (p->func == amdgpu_vm_cpu_set_ptes) { pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo); @@ -1485,7 +1484,7 @@ static void amdgpu_vm_update_prt_state(struct amdgpu_device *adev) spin_lock_irqsave(&adev->vm_manager.prt_lock, flags); enable = !!atomic_read(&adev->vm_manager.num_prt_users); - adev->gart.gart_funcs->set_prt(adev, enable); + adev->gmc.gmc_funcs->set_prt(adev, enable); spin_unlock_irqrestore(&adev->vm_manager.prt_lock, flags); } @@ -1494,7 +1493,7 @@ static void amdgpu_vm_update_prt_state(struct amdgpu_device *adev) */ static void amdgpu_vm_prt_get(struct amdgpu_device *adev) { - if (!adev->gart.gart_funcs->set_prt) + if (!adev->gmc.gmc_funcs->set_prt) return; if (atomic_inc_return(&adev->vm_manager.num_prt_users) == 1) @@ -1529,7 +1528,7 @@ static void amdgpu_vm_add_prt_cb(struct amdgpu_device *adev, { struct amdgpu_prt_cb *cb; - if (!adev->gart.gart_funcs->set_prt) + if (!adev->gmc.gmc_funcs->set_prt) return; cb = kmalloc(sizeof(struct amdgpu_prt_cb), GFP_KERNEL); @@ -2405,7 +2404,7 @@ static void amdgpu_vm_free_levels(struct amdgpu_device *adev, void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) { struct amdgpu_bo_va_mapping *mapping, *tmp; - bool prt_fini_needed = !!adev->gart.gart_funcs->set_prt; + bool prt_fini_needed = !!adev->gmc.gmc_funcs->set_prt; struct amdgpu_bo *root; u64 fault; int i, r; -- cgit From 373ac645c9b83ffd93f9905458166b25191a8df6 Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 16 Jan 2018 16:54:25 +0100 Subject: drm/amdgpu: move PD/PT address calculation into backend function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This way we can better handle the differences for CPU based updates. Signed-off-by: Christian König Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 79 +++++++++++++--------------------- 1 file changed, 29 insertions(+), 50 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index da634ae6ca8f..21b3915bfec1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -75,7 +75,8 @@ struct amdgpu_pte_update_params { /* indirect buffer to fill with commands */ struct amdgpu_ib *ib; /* Function which actually does the update */ - void (*func)(struct amdgpu_pte_update_params *params, uint64_t pe, + void (*func)(struct amdgpu_pte_update_params *params, + struct amdgpu_bo *bo, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint64_t flags); /* The next two are used during VM update by CPU @@ -578,6 +579,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, * amdgpu_vm_do_set_ptes - helper to call the right asic function * * @params: see amdgpu_pte_update_params definition + * @bo: PD/PT to update * @pe: addr of the page entry * @addr: dst addr to write into pe * @count: number of page entries to update @@ -588,10 +590,12 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, * to setup the page table using the DMA. */ static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params, + struct amdgpu_bo *bo, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint64_t flags) { + pe += amdgpu_bo_gpu_offset(bo); trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags); if (count < 3) { @@ -608,6 +612,7 @@ static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params, * amdgpu_vm_do_copy_ptes - copy the PTEs from the GART * * @params: see amdgpu_pte_update_params definition + * @bo: PD/PT to update * @pe: addr of the page entry * @addr: dst addr to write into pe * @count: number of page entries to update @@ -617,13 +622,14 @@ static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params, * Traces the parameters and calls the DMA function to copy the PTEs. */ static void amdgpu_vm_do_copy_ptes(struct amdgpu_pte_update_params *params, + struct amdgpu_bo *bo, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint64_t flags) { uint64_t src = (params->src + (addr >> 12) * 8); - + pe += amdgpu_bo_gpu_offset(bo); trace_amdgpu_vm_copy_ptes(pe, src, count); amdgpu_vm_copy_pte(params->adev, params->ib, pe, src, count); @@ -657,6 +663,7 @@ static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr) * amdgpu_vm_cpu_set_ptes - helper to update page tables via CPU * * @params: see amdgpu_pte_update_params definition + * @bo: PD/PT to update * @pe: kmap addr of the page entry * @addr: dst addr to write into pe * @count: number of page entries to update @@ -666,6 +673,7 @@ static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr) * Write count number of PT/PD entries directly. */ static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params, + struct amdgpu_bo *bo, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint64_t flags) @@ -673,6 +681,8 @@ static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params, unsigned int i; uint64_t value; + pe += (unsigned long)amdgpu_bo_kptr(bo); + trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags); for (i = 0; i < count; i++) { @@ -714,8 +724,7 @@ static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params, struct amdgpu_vm_pt *parent, struct amdgpu_vm_pt *entry) { - struct amdgpu_bo *bo = entry->base.bo, *shadow = NULL, *pbo; - uint64_t pd_addr, shadow_addr = 0; + struct amdgpu_bo *bo = parent->base.bo, *pbo; uint64_t pde, pt, flags; unsigned level; @@ -723,29 +732,17 @@ static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params, if (entry->huge) return; - if (vm->use_cpu_for_update) { - pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo); - } else { - pd_addr = amdgpu_bo_gpu_offset(parent->base.bo); - shadow = parent->base.bo->shadow; - if (shadow) - shadow_addr = amdgpu_bo_gpu_offset(shadow); - } - - for (level = 0, pbo = parent->base.bo->parent; pbo; ++level) + for (level = 0, pbo = bo->parent; pbo; ++level) pbo = pbo->parent; level += params->adev->vm_manager.root_level; - pt = amdgpu_bo_gpu_offset(bo); + pt = amdgpu_bo_gpu_offset(entry->base.bo); flags = AMDGPU_PTE_VALID; amdgpu_gmc_get_vm_pde(params->adev, level, &pt, &flags); - if (shadow) { - pde = shadow_addr + (entry - parent->entries) * 8; - params->func(params, pde, pt, 1, 0, flags); - } - - pde = pd_addr + (entry - parent->entries) * 8; - params->func(params, pde, pt, 1, 0, flags); + pde = (entry - parent->entries) * 8; + if (bo->shadow) + params->func(params, bo->shadow, pde, pt, 1, 0, flags); + params->func(params, bo, pde, pt, 1, 0, flags); } /* @@ -946,7 +943,7 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, unsigned nptes, uint64_t dst, uint64_t flags) { - uint64_t pd_addr, pde; + uint64_t pde; /* In the case of a mixed PT the PDE must point to it*/ if (p->adev->asic_type >= CHIP_VEGA10 && !p->src && @@ -969,18 +966,10 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, entry->huge = true; amdgpu_gmc_get_vm_pde(p->adev, AMDGPU_VM_PDB0, &dst, &flags); - if (p->func == amdgpu_vm_cpu_set_ptes) { - pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo); - } else { - if (parent->base.bo->shadow) { - pd_addr = amdgpu_bo_gpu_offset(parent->base.bo->shadow); - pde = pd_addr + (entry - parent->entries) * 8; - p->func(p, pde, dst, 1, 0, flags); - } - pd_addr = amdgpu_bo_gpu_offset(parent->base.bo); - } - pde = pd_addr + (entry - parent->entries) * 8; - p->func(p, pde, dst, 1, 0, flags); + pde = (entry - parent->entries) * 8; + if (parent->base.bo->shadow) + p->func(p, parent->base.bo->shadow, pde, dst, 1, 0, flags); + p->func(p, parent->base.bo, pde, dst, 1, 0, flags); } /** @@ -1006,7 +995,6 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, uint64_t addr, pe_start; struct amdgpu_bo *pt; unsigned nptes; - bool use_cpu_update = (params->func == amdgpu_vm_cpu_set_ptes); /* walk over the address space and update the page tables */ for (addr = start; addr < end; addr += nptes, @@ -1029,20 +1017,11 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, continue; pt = entry->base.bo; - if (use_cpu_update) { - pe_start = (unsigned long)amdgpu_bo_kptr(pt); - } else { - if (pt->shadow) { - pe_start = amdgpu_bo_gpu_offset(pt->shadow); - pe_start += (addr & mask) * 8; - params->func(params, pe_start, dst, nptes, - AMDGPU_GPU_PAGE_SIZE, flags); - } - pe_start = amdgpu_bo_gpu_offset(pt); - } - - pe_start += (addr & mask) * 8; - params->func(params, pe_start, dst, nptes, + pe_start = (addr & mask) * 8; + if (pt->shadow) + params->func(params, pt->shadow, pe_start, dst, nptes, + AMDGPU_GPU_PAGE_SIZE, flags); + params->func(params, pt, pe_start, dst, nptes, AMDGPU_GPU_PAGE_SIZE, flags); } -- cgit From 5a4633c4b880cf8d1fe7df9c55766205cf9bc295 Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 8 Jan 2018 14:48:11 +0100 Subject: drm/amdgpu: forward pasid to backend flush implementations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit rd the pasid from the VM code to the emit_vm_flush function and update all implementations with the new parameter. Signed-off-by: Christian König Reviewed-by: Chunming Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 21b3915bfec1..2dca47ad4f09 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -513,7 +513,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_ struct dma_fence *fence; trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr); - amdgpu_ring_emit_vm_flush(ring, job->vmid, job->vm_pd_addr); + amdgpu_ring_emit_vm_flush(ring, job->vmid, job->pasid, + job->vm_pd_addr); r = amdgpu_fence_emit(ring, &fence); if (r) -- cgit From 698825653fdf1a696e1b9458ed9fc4aa2c6587d4 Mon Sep 17 00:00:00 2001 From: Christian König Date: Fri, 19 Jan 2018 14:17:40 +0100 Subject: drm/amdgpu: add optional ring to *_hdp callbacks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds an optional ring to the invalidate_hdp and flush_hdp callbacks. If the ring isn't specified or the emit_wreg function not available the HDP operation will be done with the CPU otherwise by writing on the ring. Signed-off-by: Christian König Acked-by: Chunming Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 2dca47ad4f09..0df52cb1765b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -854,7 +854,7 @@ restart: if (vm->use_cpu_for_update) { /* Flush HDP */ mb(); - amdgpu_asic_flush_hdp(adev); + amdgpu_asic_flush_hdp(adev, NULL); } else if (params.ib->length_dw == 0) { amdgpu_job_free(job); } else { @@ -1436,7 +1436,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, if (vm->use_cpu_for_update) { /* Flush HDP */ mb(); - amdgpu_asic_flush_hdp(adev); + amdgpu_asic_flush_hdp(adev, NULL); } spin_lock(&vm->status_lock); -- cgit From 13307f7e1d0c05a68f4ba19193cbd213573a8680 Mon Sep 17 00:00:00 2001 From: Christian König Date: Wed, 24 Jan 2018 17:19:04 +0100 Subject: drm/amdgpu: revert "drm/amdgpu: use AMDGPU_GEM_CREATE_VRAM_CLEARED for VM PD/PTs" v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using the standard clear turned out to be to inflexible. First of all it is executed on the system queue, together with buffer moves instead on the per VM queue. And second we need to fill in the page tables with more than just zero. We keep the new functionality of initializing the PDEs/PTEs with ATC routing entries intact. v2: update commit message. Signed-off-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 116 ++++++++++++++++++++++++++------- 1 file changed, 92 insertions(+), 24 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 0df52cb1765b..5cdd8d9c3311 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -257,6 +257,74 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm) return ready; } +/** + * amdgpu_vm_clear_bo - initially clear the PDs/PTs + * + * @adev: amdgpu_device pointer + * @bo: BO to clear + * @level: level this BO is at + * + * Root PD needs to be reserved when calling this. + */ +static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + struct amdgpu_bo *bo, + unsigned level) +{ + struct ttm_operation_ctx ctx = { true, false }; + struct dma_fence *fence = NULL; + uint64_t addr, init_value; + struct amdgpu_ring *ring; + struct amdgpu_job *job; + unsigned entries; + int r; + + if (vm->pte_support_ats) { + init_value = AMDGPU_PTE_DEFAULT_ATC; + if (level != AMDGPU_VM_PTB) + init_value |= AMDGPU_PDE_PTE; + } else { + init_value = 0; + } + + ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); + + r = reservation_object_reserve_shared(bo->tbo.resv); + if (r) + return r; + + r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (r) + goto error; + + addr = amdgpu_bo_gpu_offset(bo); + entries = amdgpu_bo_size(bo) / 8; + + r = amdgpu_job_alloc_with_ib(adev, 64, &job); + if (r) + goto error; + + amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0, + entries, 0, init_value); + amdgpu_ring_pad_ib(ring, &job->ibs[0]); + + WARN_ON(job->ibs[0].length_dw > 64); + r = amdgpu_job_submit(job, ring, &vm->entity, + AMDGPU_FENCE_OWNER_UNDEFINED, &fence); + if (r) + goto error_free; + + amdgpu_bo_fence(bo, fence, true); + dma_fence_put(fence); + return 0; + +error_free: + amdgpu_job_free(job); + +error: + return r; +} + /** * amdgpu_vm_alloc_levels - allocate the PD/PT levels * @@ -275,9 +343,8 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, { unsigned shift = amdgpu_vm_level_shift(adev, level); unsigned pt_idx, from, to; - int r; u64 flags; - uint64_t init_value = 0; + int r; if (!parent->entries) { unsigned num_entries = amdgpu_vm_num_entries(adev, level); @@ -300,21 +367,13 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, saddr = saddr & ((1 << shift) - 1); eaddr = eaddr & ((1 << shift) - 1); - flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | - AMDGPU_GEM_CREATE_VRAM_CLEARED; + flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; if (vm->use_cpu_for_update) flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; else flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS | AMDGPU_GEM_CREATE_SHADOW); - if (vm->pte_support_ats) { - init_value = AMDGPU_PTE_DEFAULT_ATC; - if (level != AMDGPU_VM_PTB) - init_value |= AMDGPU_PDE_PTE; - - } - /* walk over the address space and allocate the page tables */ for (pt_idx = from; pt_idx <= to; ++pt_idx) { struct reservation_object *resv = vm->root.base.bo->tbo.resv; @@ -325,12 +384,17 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, level), AMDGPU_GPU_PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_VRAM, - flags, - NULL, resv, init_value, &pt); + AMDGPU_GEM_DOMAIN_VRAM, flags, + NULL, resv, 0, &pt); if (r) return r; + r = amdgpu_vm_clear_bo(adev, vm, pt, level); + if (r) { + amdgpu_bo_unref(&pt); + return r; + } + if (vm->use_cpu_for_update) { r = amdgpu_bo_kmap(pt, NULL); if (r) { @@ -2241,11 +2305,11 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, { const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE, AMDGPU_VM_PTE_COUNT(adev) * 8); - uint64_t init_pde_value = 0, flags; unsigned ring_instance; struct amdgpu_ring *ring; struct drm_sched_rq *rq; unsigned long size; + uint64_t flags; int r, i; vm->va = RB_ROOT_CACHED; @@ -2274,23 +2338,19 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & AMDGPU_VM_USE_CPU_FOR_COMPUTE); - if (adev->asic_type == CHIP_RAVEN) { + if (adev->asic_type == CHIP_RAVEN) vm->pte_support_ats = true; - init_pde_value = AMDGPU_PTE_DEFAULT_ATC - | AMDGPU_PDE_PTE; - - } - } else + } else { vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & AMDGPU_VM_USE_CPU_FOR_GFX); + } DRM_DEBUG_DRIVER("VM update mode is %s\n", vm->use_cpu_for_update ? "CPU" : "SDMA"); WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)), "CPU update of VM recommended only for large BAR system\n"); vm->last_update = NULL; - flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | - AMDGPU_GEM_CREATE_VRAM_CLEARED; + flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; if (vm->use_cpu_for_update) flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; else @@ -2299,7 +2359,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level); r = amdgpu_bo_create(adev, size, align, true, AMDGPU_GEM_DOMAIN_VRAM, - flags, NULL, NULL, init_pde_value, + flags, NULL, NULL, 0, &vm->root.base.bo); if (r) goto error_free_sched_entity; @@ -2308,6 +2368,11 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (r) goto error_free_root; + r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo, + adev->vm_manager.root_level); + if (r) + goto error_unreserve; + vm->root.base.vm = vm; list_add_tail(&vm->root.base.bo_list, &vm->root.base.bo->va); list_add_tail(&vm->root.base.vm_status, &vm->evicted); @@ -2331,6 +2396,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, return 0; +error_unreserve: + amdgpu_bo_unreserve(vm->root.base.bo); + error_free_root: amdgpu_bo_unref(&vm->root.base.bo->shadow); amdgpu_bo_unref(&vm->root.base.bo); -- cgit From 4c77edbf742540216898a7acb3d46a69ed4508a1 Mon Sep 17 00:00:00 2001 From: Christian König Date: Fri, 26 Jan 2018 20:21:26 +0100 Subject: drm/amdgpu: drop root shadow sync MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Completely pointless, it is the same reservation object as the root PD anyway. Signed-off-by: Christian König Reviewed-by: Roger He Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 5cdd8d9c3311..8ac3bcf9873f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -932,11 +932,6 @@ restart: amdgpu_ring_pad_ib(ring, params.ib); amdgpu_sync_resv(adev, &job->sync, root->tbo.resv, AMDGPU_FENCE_OWNER_VM, false); - if (root->shadow) - amdgpu_sync_resv(adev, &job->sync, - root->shadow->tbo.resv, - AMDGPU_FENCE_OWNER_VM, false); - WARN_ON(params.ib->length_dw > ndw); r = amdgpu_job_submit(job, ring, &vm->entity, AMDGPU_FENCE_OWNER_VM, &fence); -- cgit From 8febe617d8d9a3562895cb9bcb52fd1d0467fdef Mon Sep 17 00:00:00 2001 From: Christian König Date: Wed, 24 Jan 2018 19:55:32 +0100 Subject: drm/amdgpu: revert "Add a parameter to amdgpu_bo_create()" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 2046d46db9166bddc84778f0b3477f6d1e9068ea. Not needed any more. Signed-off-by: Christian König Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 8ac3bcf9873f..cecdb216abff 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -385,7 +385,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, amdgpu_vm_bo_size(adev, level), AMDGPU_GPU_PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, flags, - NULL, resv, 0, &pt); + NULL, resv, &pt); if (r) return r; @@ -2354,8 +2354,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level); r = amdgpu_bo_create(adev, size, align, true, AMDGPU_GEM_DOMAIN_VRAM, - flags, NULL, NULL, 0, - &vm->root.base.bo); + flags, NULL, NULL, &vm->root.base.bo); if (r) goto error_free_sched_entity; -- cgit From 44e1baeb6321fb4ce1dbc50c4cb895b671b2fbf9 Mon Sep 17 00:00:00 2001 From: Christian König Date: Wed, 24 Jan 2018 19:58:45 +0100 Subject: drm/amdgpu: revert "Add support for filling a buffer with 64 bit value" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 7bdc53f925af085ffa0580f10489f82b36cc2f1c and commit 330df03b3abf944f8f5180f2abc61367749984c0. Neither are needed any more. Signed-off-by: Christian König Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index cecdb216abff..e584c203c357 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1242,11 +1242,10 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, } else { /* set page commands needed */ - ndw += ncmds * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw; + ndw += ncmds * 10; /* extra commands for begin/end fragments */ - ndw += 2 * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw - * adev->vm_manager.fragment_size; + ndw += 2 * 10 * adev->vm_manager.fragment_size; params.func = amdgpu_vm_do_set_ptes; } -- cgit From 4584312d387f758534a51d7dd0a8c0f3b23ccc6e Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 25 Jan 2018 18:36:15 +0100 Subject: drm/amdgpu: fill only the lower range with ATS entries v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit At least on x86-64 the upper range is purely used by the kernel, avoid creating any ATS mappings there as security precaution and to allow proper page fault reporting in the upper range. v2: remove unused variable Signed-off-by: Christian König Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 82 ++++++++++++++++++++++------------ 1 file changed, 53 insertions(+), 29 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index e584c203c357..61cf93867b8e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -267,24 +267,33 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm) * Root PD needs to be reserved when calling this. */ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - struct amdgpu_bo *bo, - unsigned level) + struct amdgpu_vm *vm, struct amdgpu_bo *bo, + unsigned level, bool pte_support_ats) { struct ttm_operation_ctx ctx = { true, false }; struct dma_fence *fence = NULL; - uint64_t addr, init_value; + unsigned entries, ats_entries; struct amdgpu_ring *ring; struct amdgpu_job *job; - unsigned entries; + uint64_t addr; int r; - if (vm->pte_support_ats) { - init_value = AMDGPU_PTE_DEFAULT_ATC; - if (level != AMDGPU_VM_PTB) - init_value |= AMDGPU_PDE_PTE; + addr = amdgpu_bo_gpu_offset(bo); + entries = amdgpu_bo_size(bo) / 8; + + if (pte_support_ats) { + if (level == adev->vm_manager.root_level) { + ats_entries = amdgpu_vm_level_shift(adev, level); + ats_entries += AMDGPU_GPU_PAGE_SHIFT; + ats_entries = AMDGPU_VA_HOLE_START >> ats_entries; + ats_entries = min(ats_entries, entries); + entries -= ats_entries; + } else { + ats_entries = entries; + entries = 0; + } } else { - init_value = 0; + ats_entries = 0; } ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); @@ -297,15 +306,26 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, if (r) goto error; - addr = amdgpu_bo_gpu_offset(bo); - entries = amdgpu_bo_size(bo) / 8; - r = amdgpu_job_alloc_with_ib(adev, 64, &job); if (r) goto error; - amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0, - entries, 0, init_value); + if (ats_entries) { + uint64_t ats_value; + + ats_value = AMDGPU_PTE_DEFAULT_ATC; + if (level != AMDGPU_VM_PTB) + ats_value |= AMDGPU_PDE_PTE; + + amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0, + ats_entries, 0, ats_value); + addr += ats_entries * 8; + } + + if (entries) + amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0, + entries, 0, 0); + amdgpu_ring_pad_ib(ring, &job->ibs[0]); WARN_ON(job->ibs[0].length_dw > 64); @@ -339,7 +359,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_vm_pt *parent, uint64_t saddr, uint64_t eaddr, - unsigned level) + unsigned level, bool ats) { unsigned shift = amdgpu_vm_level_shift(adev, level); unsigned pt_idx, from, to; @@ -389,7 +409,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, if (r) return r; - r = amdgpu_vm_clear_bo(adev, vm, pt, level); + r = amdgpu_vm_clear_bo(adev, vm, pt, level, ats); if (r) { amdgpu_bo_unref(&pt); return r; @@ -421,7 +441,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, uint64_t sub_eaddr = (pt_idx == to) ? eaddr : ((1 << shift) - 1); r = amdgpu_vm_alloc_levels(adev, vm, entry, sub_saddr, - sub_eaddr, level); + sub_eaddr, level, ats); if (r) return r; } @@ -444,26 +464,29 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, struct amdgpu_vm *vm, uint64_t saddr, uint64_t size) { - uint64_t last_pfn; uint64_t eaddr; + bool ats = false; /* validate the parameters */ if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK) return -EINVAL; eaddr = saddr + size - 1; - last_pfn = eaddr / AMDGPU_GPU_PAGE_SIZE; - if (last_pfn >= adev->vm_manager.max_pfn) { - dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n", - last_pfn, adev->vm_manager.max_pfn); - return -EINVAL; - } + + if (vm->pte_support_ats) + ats = saddr < AMDGPU_VA_HOLE_START; saddr /= AMDGPU_GPU_PAGE_SIZE; eaddr /= AMDGPU_GPU_PAGE_SIZE; + if (eaddr >= adev->vm_manager.max_pfn) { + dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n", + eaddr, adev->vm_manager.max_pfn); + return -EINVAL; + } + return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr, - adev->vm_manager.root_level); + adev->vm_manager.root_level, ats); } /** @@ -1660,16 +1683,16 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, struct dma_fence **fence) { struct amdgpu_bo_va_mapping *mapping; + uint64_t init_pte_value = 0; struct dma_fence *f = NULL; int r; - uint64_t init_pte_value = 0; while (!list_empty(&vm->freed)) { mapping = list_first_entry(&vm->freed, struct amdgpu_bo_va_mapping, list); list_del(&mapping->list); - if (vm->pte_support_ats) + if (vm->pte_support_ats && mapping->start < AMDGPU_VA_HOLE_START) init_pte_value = AMDGPU_PTE_DEFAULT_ATC; r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm, @@ -2362,7 +2385,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, goto error_free_root; r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo, - adev->vm_manager.root_level); + adev->vm_manager.root_level, + vm->pte_support_ats); if (r) goto error_unreserve; -- cgit From e5197a4c3dbff322efe1f70e23453318554d1598 Mon Sep 17 00:00:00 2001 From: Christian König Date: Fri, 2 Feb 2018 21:00:44 +0100 Subject: drm/amdgpu: release the VM shadow in the error path as well MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Without it we run into a memory leak. Signed-off-by: Christian König Reviewed-by: Chunming Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 61cf93867b8e..b43098f02a40 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -411,6 +411,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, r = amdgpu_vm_clear_bo(adev, vm, pt, level, ats); if (r) { + amdgpu_bo_unref(&pt->shadow); amdgpu_bo_unref(&pt); return r; } @@ -418,6 +419,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, if (vm->use_cpu_for_update) { r = amdgpu_bo_kmap(pt, NULL); if (r) { + amdgpu_bo_unref(&pt->shadow); amdgpu_bo_unref(&pt); return r; } -- cgit From e61736daa9b2890c895b9e3e56ee639f36de83b7 Mon Sep 17 00:00:00 2001 From: Christian König Date: Fri, 2 Feb 2018 21:05:40 +0100 Subject: drm/amdgpu: clear the shadow fence as well MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It also needs to be initialized. Signed-off-by: Christian König Reviewed-by: Chunming Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index b43098f02a40..18ce47608bf1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -336,6 +336,11 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, amdgpu_bo_fence(bo, fence, true); dma_fence_put(fence); + + if (bo->shadow) + return amdgpu_vm_clear_bo(adev, vm, bo->shadow, + level, pte_support_ats); + return 0; error_free: -- cgit From 29e8357b4cbbfcee6d375f2d183b674b678923d7 Mon Sep 17 00:00:00 2001 From: Christian König Date: Sun, 4 Feb 2018 19:36:52 +0100 Subject: drm/amdgpu: sync the VM PD/PT before clearing it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise we might overwrite stuff which is still in use. Signed-off-by: Christian König Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 18ce47608bf1..0572d6072baa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -329,6 +329,11 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, amdgpu_ring_pad_ib(ring, &job->ibs[0]); WARN_ON(job->ibs[0].length_dw > 64); + r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.resv, + AMDGPU_FENCE_OWNER_UNDEFINED, false); + if (r) + goto error_free; + r = amdgpu_job_submit(job, ring, &vm->entity, AMDGPU_FENCE_OWNER_UNDEFINED, &fence); if (r) -- cgit From c633c00bf06779ec6d5e2c01748d4753ede98f8a Mon Sep 17 00:00:00 2001 From: Christian König Date: Sun, 4 Feb 2018 10:32:35 +0100 Subject: drm/amdgpu: separate PASID mapping from VM flush v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Stuffing the PASID mapping into the VM flush isn't flexible enough since the PASID mapping changes not as often as we need a VM flush. v2: add missing use of gmc_v7_0_emit_pasid_mapping Signed-off-by: Christian König Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 0572d6072baa..afa16a862eaa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -612,8 +612,11 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_ struct dma_fence *fence; trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr); - amdgpu_ring_emit_vm_flush(ring, job->vmid, job->pasid, - job->vm_pd_addr); + amdgpu_ring_emit_vm_flush(ring, job->vmid, job->vm_pd_addr); + if (adev->gmc.gmc_funcs->emit_pasid_mapping && + ring->funcs->emit_wreg) + amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, + job->pasid); r = amdgpu_fence_emit(ring, &fence); if (r) -- cgit From b3cd285fa68d162a53c2eb4e23bc4fc1ab7d97f6 Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 5 Feb 2018 17:38:01 +0100 Subject: drm/amdgpu: update the PASID mapping only on demand MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Updating the PASID is rather heavyweight and shouldn't be done all the time. Signed-off-by: Christian König Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 37 +++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index afa16a862eaa..0b237e027cab 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -591,14 +591,24 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_ id->oa_base != job->oa_base || id->oa_size != job->oa_size); bool vm_flush_needed = job->vm_needs_flush; + bool pasid_mapping_needed = id->pasid != job->pasid || + !id->pasid_mapping || + !dma_fence_is_signaled(id->pasid_mapping); + struct dma_fence *fence = NULL; unsigned patch_offset = 0; int r; if (amdgpu_vmid_had_gpu_reset(adev, id)) { gds_switch_needed = true; vm_flush_needed = true; + pasid_mapping_needed = true; } + gds_switch_needed &= !!ring->funcs->emit_gds_switch; + vm_flush_needed &= !!ring->funcs->emit_vm_flush; + pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping && + ring->funcs->emit_wreg; + if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync) return 0; @@ -608,27 +618,36 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_ if (need_pipe_sync) amdgpu_ring_emit_pipeline_sync(ring); - if (ring->funcs->emit_vm_flush && vm_flush_needed) { - struct dma_fence *fence; - + if (vm_flush_needed) { trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr); amdgpu_ring_emit_vm_flush(ring, job->vmid, job->vm_pd_addr); - if (adev->gmc.gmc_funcs->emit_pasid_mapping && - ring->funcs->emit_wreg) - amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, - job->pasid); + } + if (pasid_mapping_needed) + amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid); + + if (vm_flush_needed || pasid_mapping_needed) { r = amdgpu_fence_emit(ring, &fence); if (r) return r; + } + if (vm_flush_needed) { mutex_lock(&id_mgr->lock); dma_fence_put(id->last_flush); - id->last_flush = fence; - id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter); + id->last_flush = dma_fence_get(fence); + id->current_gpu_reset_count = + atomic_read(&adev->gpu_reset_counter); mutex_unlock(&id_mgr->lock); } + if (pasid_mapping_needed) { + id->pasid = job->pasid; + dma_fence_put(id->pasid_mapping); + id->pasid_mapping = dma_fence_get(fence); + } + dma_fence_put(fence); + if (ring->funcs->emit_gds_switch && gds_switch_needed) { id->gds_base = job->gds_base; id->gds_size = job->gds_size; -- cgit From eab3de23a1639ec9419c1f9239ce651d3c82e7d6 Mon Sep 17 00:00:00 2001 From: Christian König Date: Wed, 14 Mar 2018 14:48:17 -0500 Subject: drm/amdgpu: explicit give BO type to amdgpu_bo_create MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop the "kernel" and sg parameter and give the BO type to create explicit to amdgpu_bo_create instead of figuring it out from the parameters. Signed-off-by: Christian König Reviewed-by: Roger He Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 0b237e027cab..24474294c92a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -413,9 +413,9 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, if (!entry->base.bo) { r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, level), - AMDGPU_GPU_PAGE_SIZE, true, + AMDGPU_GPU_PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, flags, - NULL, resv, &pt); + ttm_bo_type_kernel, resv, &pt); if (r) return r; @@ -2409,8 +2409,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, AMDGPU_GEM_CREATE_SHADOW); size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level); - r = amdgpu_bo_create(adev, size, align, true, AMDGPU_GEM_DOMAIN_VRAM, - flags, NULL, NULL, &vm->root.base.bo); + r = amdgpu_bo_create(adev, size, align, AMDGPU_GEM_DOMAIN_VRAM, flags, + ttm_bo_type_kernel, NULL, &vm->root.base.bo); if (r) goto error_free_sched_entity; -- cgit From b236fa1d339670cc997b68c31be57855bbabc126 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Thu, 15 Mar 2018 17:27:42 -0400 Subject: drm/amdgpu: Add helper to turn an existing VM into a compute VM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v2: Removed updating and checking of vm->vm_context v3: Enable amdgpu_vm_clear_bo in amdgpu_vm_make_compute Signed-off-by: Felix Kuehling Reviewed-by: Christian König Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 67 ++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 24474294c92a..ea39ccf288ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2461,6 +2461,73 @@ error_free_sched_entity: return r; } +/** + * amdgpu_vm_make_compute - Turn a GFX VM into a compute VM + * + * This only works on GFX VMs that don't have any BOs added and no + * page tables allocated yet. + * + * Changes the following VM parameters: + * - use_cpu_for_update + * - pte_supports_ats + * - pasid (old PASID is released, because compute manages its own PASIDs) + * + * Reinitializes the page directory to reflect the changed ATS + * setting. May leave behind an unused shadow BO for the page + * directory when switching from SDMA updates to CPU updates. + * + * Returns 0 for success, -errno for errors. + */ +int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) +{ + bool pte_support_ats = (adev->asic_type == CHIP_RAVEN); + int r; + + r = amdgpu_bo_reserve(vm->root.base.bo, true); + if (r) + return r; + + /* Sanity checks */ + if (!RB_EMPTY_ROOT(&vm->va.rb_root) || vm->root.entries) { + r = -EINVAL; + goto error; + } + + /* Check if PD needs to be reinitialized and do it before + * changing any other state, in case it fails. + */ + if (pte_support_ats != vm->pte_support_ats) { + r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo, + adev->vm_manager.root_level, + pte_support_ats); + if (r) + goto error; + } + + /* Update VM state */ + vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & + AMDGPU_VM_USE_CPU_FOR_COMPUTE); + vm->pte_support_ats = pte_support_ats; + DRM_DEBUG_DRIVER("VM update mode is %s\n", + vm->use_cpu_for_update ? "CPU" : "SDMA"); + WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)), + "CPU update of VM recommended only for large BAR system\n"); + + if (vm->pasid) { + unsigned long flags; + + spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); + idr_remove(&adev->vm_manager.pasid_idr, vm->pasid); + spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); + + vm->pasid = 0; + } + +error: + amdgpu_bo_unreserve(vm->root.base.bo); + return r; +} + /** * amdgpu_vm_free_levels - free PD/PT levels * -- cgit From ede0dd86f45adf2b7083bb161f6bc81da5fe2bad Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Thu, 15 Mar 2018 17:27:43 -0400 Subject: drm/amdgpu: Add kfd2kgd interface to acquire an existing VM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This allows acquiring an existing VM from a render node FD to use it for a compute process. Such VMs get destroyed when the original file descriptor is released. Added a callback from amdgpu_vm_fini to handle KFD VM destruction correctly in this case. v2: * Removed vm->vm_context check in amdgpu_amdkfd_gpuvm_destroy_cb, check vm->process_info earlier instead Signed-off-by: Felix Kuehling Acked-by: Christian König Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index ea39ccf288ed..cbf1421c1963 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -32,6 +32,7 @@ #include #include "amdgpu.h" #include "amdgpu_trace.h" +#include "amdgpu_amdkfd.h" /* * GPUVM @@ -2575,6 +2576,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) u64 fault; int i, r; + amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm); + /* Clear pending page faults from IH when the VM is destroyed */ while (kfifo_get(&vm->faults, &fault)) amdgpu_ih_clear_fault(adev, fault); -- cgit From 810955ba712fc5c517b5e999fd69bfd20251effb Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Fri, 23 Mar 2018 15:30:35 -0400 Subject: drm/amdgpu: Fix acquiring VM on large-BAR systems MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On large-BAR systems the VM page tables for compute are accessed by the CPU. Always allow CPU access to the page directory so that it can be used later by the CPU when a VM is converted to a compute VM. Signed-off-by: Felix Kuehling Reviewed-by: Christian König Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index cbf1421c1963..da55a78d7380 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2406,8 +2406,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (vm->use_cpu_for_update) flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; else - flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS | - AMDGPU_GEM_CREATE_SHADOW); + flags |= AMDGPU_GEM_CREATE_SHADOW; size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level); r = amdgpu_bo_create(adev, size, align, AMDGPU_GEM_DOMAIN_VRAM, flags, -- cgit