From 1303c73c61fea8cc5509e5b0e3cbe5253e260ca1 Mon Sep 17 00:00:00 2001 From: Christian König Date: Wed, 3 Aug 2016 17:46:42 +0200 Subject: drm/amdgpu: cleanup VM fragment defines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We can actually do way more than just the 64KB we currently used as default. Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 8e642fc48df4..ac209a51772b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -739,7 +739,7 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev, */ /* SI and newer are optimized for 64KB */ - uint64_t frag_flags = AMDGPU_PTE_FRAG_64KB; + uint64_t frag_flags = AMDGPU_PTE_FRAG(AMDGPU_LOG2_PAGES_PER_FRAG); uint64_t frag_align = 0x80; uint64_t frag_start = ALIGN(pe_start, frag_align); -- cgit From 29efc4f5dfe47e992b04f92c4a4d990d03816e78 Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 4 Aug 2016 14:52:50 +0200 Subject: drm/amdgpu: rename amdgpu_vm_update_params MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Well those are actually page table entry parameters. This also makes the variable names used a bit shorter. Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 91 ++++++++++++++++------------------ 1 file changed, 44 insertions(+), 47 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index ac209a51772b..577abfd3879e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -57,7 +57,7 @@ /* Local structure. Encapsulate some VM table update parameters to reduce * the number of function parameters */ -struct amdgpu_vm_update_params { +struct amdgpu_pte_update_params { /* address where to copy page table entries from */ uint64_t src; /* DMA addresses to use for mapping */ @@ -470,7 +470,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, * amdgpu_vm_update_pages - helper to call the right asic function * * @adev: amdgpu_device pointer - * @vm_update_params: see amdgpu_vm_update_params definition + * @params: see amdgpu_pte_update_params definition * @pe: addr of the page entry * @addr: dst addr to write into pe * @count: number of page entries to update @@ -481,29 +481,28 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, * to setup the page table using the DMA. */ static void amdgpu_vm_update_pages(struct amdgpu_device *adev, - struct amdgpu_vm_update_params - *vm_update_params, + struct amdgpu_pte_update_params *params, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint32_t flags) { trace_amdgpu_vm_set_page(pe, addr, count, incr, flags); - if (vm_update_params->src) { - amdgpu_vm_copy_pte(adev, vm_update_params->ib, - pe, (vm_update_params->src + (addr >> 12) * 8), count); + if (params->src) { + amdgpu_vm_copy_pte(adev, params->ib, + pe, (params->src + (addr >> 12) * 8), count); - } else if (vm_update_params->pages_addr) { - amdgpu_vm_write_pte(adev, vm_update_params->ib, - vm_update_params->pages_addr, + } else if (params->pages_addr) { + amdgpu_vm_write_pte(adev, params->ib, + params->pages_addr, pe, addr, count, incr, flags); } else if (count < 3) { - amdgpu_vm_write_pte(adev, vm_update_params->ib, NULL, pe, addr, + amdgpu_vm_write_pte(adev, params->ib, NULL, pe, addr, count, incr, flags); } else { - amdgpu_vm_set_pte_pde(adev, vm_update_params->ib, pe, addr, + amdgpu_vm_set_pte_pde(adev, params->ib, pe, addr, count, incr, flags); } } @@ -523,12 +522,12 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, struct amdgpu_ring *ring; struct fence *fence = NULL; struct amdgpu_job *job; - struct amdgpu_vm_update_params vm_update_params; + struct amdgpu_pte_update_params params; unsigned entries; uint64_t addr; int r; - memset(&vm_update_params, 0, sizeof(vm_update_params)); + memset(¶ms, 0, sizeof(params)); ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); r = reservation_object_reserve_shared(bo->tbo.resv); @@ -546,8 +545,8 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, if (r) goto error; - vm_update_params.ib = &job->ibs[0]; - amdgpu_vm_update_pages(adev, &vm_update_params, addr, 0, entries, + params.ib = &job->ibs[0]; + amdgpu_vm_update_pages(adev, ¶ms, addr, 0, entries, 0, 0); amdgpu_ring_pad_ib(ring, &job->ibs[0]); @@ -620,12 +619,12 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, uint64_t last_pde = ~0, last_pt = ~0; unsigned count = 0, pt_idx, ndw; struct amdgpu_job *job; - struct amdgpu_vm_update_params vm_update_params; + struct amdgpu_pte_update_params params; struct fence *fence = NULL; int r; - memset(&vm_update_params, 0, sizeof(vm_update_params)); + memset(¶ms, 0, sizeof(params)); ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); /* padding, etc. */ @@ -638,7 +637,7 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, if (r) return r; - vm_update_params.ib = &job->ibs[0]; + params.ib = &job->ibs[0]; /* walk over the address space and update the page directory */ for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) { @@ -658,7 +657,7 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, ((last_pt + incr * count) != pt)) { if (count) { - amdgpu_vm_update_pages(adev, &vm_update_params, + amdgpu_vm_update_pages(adev, ¶ms, last_pde, last_pt, count, incr, AMDGPU_PTE_VALID); @@ -673,15 +672,15 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, } if (count) - amdgpu_vm_update_pages(adev, &vm_update_params, + amdgpu_vm_update_pages(adev, ¶ms, last_pde, last_pt, count, incr, AMDGPU_PTE_VALID); - if (vm_update_params.ib->length_dw != 0) { - amdgpu_ring_pad_ib(ring, vm_update_params.ib); + if (params.ib->length_dw != 0) { + amdgpu_ring_pad_ib(ring, params.ib); amdgpu_sync_resv(adev, &job->sync, pd->tbo.resv, AMDGPU_FENCE_OWNER_VM); - WARN_ON(vm_update_params.ib->length_dw > ndw); + WARN_ON(params.ib->length_dw > ndw); r = amdgpu_job_submit(job, ring, &vm->entity, AMDGPU_FENCE_OWNER_VM, &fence); if (r) @@ -707,15 +706,14 @@ error_free: * amdgpu_vm_frag_ptes - add fragment information to PTEs * * @adev: amdgpu_device pointer - * @vm_update_params: see amdgpu_vm_update_params definition + * @params: see amdgpu_pte_update_params definition * @pe_start: first PTE to handle * @pe_end: last PTE to handle * @addr: addr those PTEs should point to * @flags: hw mapping flags */ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev, - struct amdgpu_vm_update_params - *vm_update_params, + struct amdgpu_pte_update_params *params, uint64_t pe_start, uint64_t pe_end, uint64_t addr, uint32_t flags) { @@ -752,11 +750,11 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev, return; /* system pages are non continuously */ - if (vm_update_params->src || vm_update_params->pages_addr || + if (params->src || params->pages_addr || !(flags & AMDGPU_PTE_VALID) || (frag_start >= frag_end)) { count = (pe_end - pe_start) / 8; - amdgpu_vm_update_pages(adev, vm_update_params, pe_start, + amdgpu_vm_update_pages(adev, params, pe_start, addr, count, AMDGPU_GPU_PAGE_SIZE, flags); return; @@ -765,21 +763,21 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev, /* handle the 4K area at the beginning */ if (pe_start != frag_start) { count = (frag_start - pe_start) / 8; - amdgpu_vm_update_pages(adev, vm_update_params, pe_start, addr, + amdgpu_vm_update_pages(adev, params, pe_start, addr, count, AMDGPU_GPU_PAGE_SIZE, flags); addr += AMDGPU_GPU_PAGE_SIZE * count; } /* handle the area in the middle */ count = (frag_end - frag_start) / 8; - amdgpu_vm_update_pages(adev, vm_update_params, frag_start, addr, count, + amdgpu_vm_update_pages(adev, params, frag_start, addr, count, AMDGPU_GPU_PAGE_SIZE, flags | frag_flags); /* handle the 4K area at the end */ if (frag_end != pe_end) { addr += AMDGPU_GPU_PAGE_SIZE * count; count = (pe_end - frag_end) / 8; - amdgpu_vm_update_pages(adev, vm_update_params, frag_end, addr, + amdgpu_vm_update_pages(adev, params, frag_end, addr, count, AMDGPU_GPU_PAGE_SIZE, flags); } } @@ -788,7 +786,7 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev, * amdgpu_vm_update_ptes - make sure that page tables are valid * * @adev: amdgpu_device pointer - * @vm_update_params: see amdgpu_vm_update_params definition + * @params: see amdgpu_pte_update_params definition * @vm: requested vm * @start: start of GPU address range * @end: end of GPU address range @@ -798,8 +796,7 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev, * Update the page tables in the range @start - @end. */ static void amdgpu_vm_update_ptes(struct amdgpu_device *adev, - struct amdgpu_vm_update_params - *vm_update_params, + struct amdgpu_pte_update_params *params, struct amdgpu_vm *vm, uint64_t start, uint64_t end, uint64_t dst, uint32_t flags) @@ -852,7 +849,7 @@ static void amdgpu_vm_update_ptes(struct amdgpu_device *adev, */ cur_pe_end += 8 * nptes; } else { - amdgpu_vm_frag_ptes(adev, vm_update_params, + amdgpu_vm_frag_ptes(adev, params, cur_pe_start, cur_pe_end, cur_dst, flags); @@ -866,7 +863,7 @@ static void amdgpu_vm_update_ptes(struct amdgpu_device *adev, dst += nptes * AMDGPU_GPU_PAGE_SIZE; } - amdgpu_vm_frag_ptes(adev, vm_update_params, cur_pe_start, + amdgpu_vm_frag_ptes(adev, params, cur_pe_start, cur_pe_end, cur_dst, flags); } @@ -900,14 +897,14 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, void *owner = AMDGPU_FENCE_OWNER_VM; unsigned nptes, ncmds, ndw; struct amdgpu_job *job; - struct amdgpu_vm_update_params vm_update_params; + struct amdgpu_pte_update_params params; struct fence *f = NULL; int r; ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); - memset(&vm_update_params, 0, sizeof(vm_update_params)); - vm_update_params.src = src; - vm_update_params.pages_addr = pages_addr; + memset(¶ms, 0, sizeof(params)); + params.src = src; + params.pages_addr = pages_addr; /* sync to everything on unmapping */ if (!(flags & AMDGPU_PTE_VALID)) @@ -924,11 +921,11 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, /* padding, etc. */ ndw = 64; - if (vm_update_params.src) { + if (params.src) { /* only copy commands needed */ ndw += ncmds * 7; - } else if (vm_update_params.pages_addr) { + } else if (params.pages_addr) { /* header for write data commands */ ndw += ncmds * 4; @@ -947,7 +944,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, if (r) return r; - vm_update_params.ib = &job->ibs[0]; + params.ib = &job->ibs[0]; r = amdgpu_sync_fence(adev, &job->sync, exclusive); if (r) @@ -962,11 +959,11 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, if (r) goto error_free; - amdgpu_vm_update_ptes(adev, &vm_update_params, vm, start, + amdgpu_vm_update_ptes(adev, ¶ms, vm, start, last + 1, addr, flags); - amdgpu_ring_pad_ib(ring, vm_update_params.ib); - WARN_ON(vm_update_params.ib->length_dw > ndw); + amdgpu_ring_pad_ib(ring, params.ib); + WARN_ON(params.ib->length_dw > ndw); r = amdgpu_job_submit(job, ring, &vm->entity, AMDGPU_FENCE_OWNER_VM, &f); if (r) -- cgit From 27c5f36fe138e29d63eea7d1445bda1ca64921d9 Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 4 Aug 2016 15:02:49 +0200 Subject: drm/amdgpu: add adev to the pte_update_params MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No need to carry that forward as a separate parameter. Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 65 +++++++++++++++------------------- 1 file changed, 29 insertions(+), 36 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 577abfd3879e..fd7901c1320f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -58,6 +58,8 @@ * the number of function parameters */ struct amdgpu_pte_update_params { + /* amdgpu device we do this update for */ + struct amdgpu_device *adev; /* address where to copy page table entries from */ uint64_t src; /* DMA addresses to use for mapping */ @@ -469,7 +471,6 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, /** * amdgpu_vm_update_pages - helper to call the right asic function * - * @adev: amdgpu_device pointer * @params: see amdgpu_pte_update_params definition * @pe: addr of the page entry * @addr: dst addr to write into pe @@ -480,8 +481,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, * Traces the parameters and calls the right asic functions * to setup the page table using the DMA. */ -static void amdgpu_vm_update_pages(struct amdgpu_device *adev, - struct amdgpu_pte_update_params *params, +static void amdgpu_vm_update_pages(struct amdgpu_pte_update_params *params, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint32_t flags) @@ -489,20 +489,20 @@ static void amdgpu_vm_update_pages(struct amdgpu_device *adev, trace_amdgpu_vm_set_page(pe, addr, count, incr, flags); if (params->src) { - amdgpu_vm_copy_pte(adev, params->ib, + amdgpu_vm_copy_pte(params->adev, params->ib, pe, (params->src + (addr >> 12) * 8), count); } else if (params->pages_addr) { - amdgpu_vm_write_pte(adev, params->ib, + amdgpu_vm_write_pte(params->adev, params->ib, params->pages_addr, pe, addr, count, incr, flags); } else if (count < 3) { - amdgpu_vm_write_pte(adev, params->ib, NULL, pe, addr, + amdgpu_vm_write_pte(params->adev, params->ib, NULL, pe, addr, count, incr, flags); } else { - amdgpu_vm_set_pte_pde(adev, params->ib, pe, addr, + amdgpu_vm_set_pte_pde(params->adev, params->ib, pe, addr, count, incr, flags); } } @@ -527,7 +527,6 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, uint64_t addr; int r; - memset(¶ms, 0, sizeof(params)); ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); r = reservation_object_reserve_shared(bo->tbo.resv); @@ -545,9 +544,10 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, if (r) goto error; + memset(¶ms, 0, sizeof(params)); + params.adev = adev; params.ib = &job->ibs[0]; - amdgpu_vm_update_pages(adev, ¶ms, addr, 0, entries, - 0, 0); + amdgpu_vm_update_pages(¶ms, addr, 0, entries, 0, 0); amdgpu_ring_pad_ib(ring, &job->ibs[0]); WARN_ON(job->ibs[0].length_dw > 64); @@ -624,7 +624,6 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, int r; - memset(¶ms, 0, sizeof(params)); ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); /* padding, etc. */ @@ -637,6 +636,8 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, if (r) return r; + memset(¶ms, 0, sizeof(params)); + params.adev = adev; params.ib = &job->ibs[0]; /* walk over the address space and update the page directory */ @@ -657,9 +658,8 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, ((last_pt + incr * count) != pt)) { if (count) { - amdgpu_vm_update_pages(adev, ¶ms, - last_pde, last_pt, - count, incr, + amdgpu_vm_update_pages(¶ms, last_pde, + last_pt, count, incr, AMDGPU_PTE_VALID); } @@ -672,8 +672,7 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, } if (count) - amdgpu_vm_update_pages(adev, ¶ms, - last_pde, last_pt, + amdgpu_vm_update_pages(¶ms, last_pde, last_pt, count, incr, AMDGPU_PTE_VALID); if (params.ib->length_dw != 0) { @@ -705,15 +704,13 @@ error_free: /** * amdgpu_vm_frag_ptes - add fragment information to PTEs * - * @adev: amdgpu_device pointer * @params: see amdgpu_pte_update_params definition * @pe_start: first PTE to handle * @pe_end: last PTE to handle * @addr: addr those PTEs should point to * @flags: hw mapping flags */ -static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev, - struct amdgpu_pte_update_params *params, +static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, uint64_t pe_start, uint64_t pe_end, uint64_t addr, uint32_t flags) { @@ -754,38 +751,36 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev, !(flags & AMDGPU_PTE_VALID) || (frag_start >= frag_end)) { count = (pe_end - pe_start) / 8; - amdgpu_vm_update_pages(adev, params, pe_start, - addr, count, AMDGPU_GPU_PAGE_SIZE, - flags); + amdgpu_vm_update_pages(params, pe_start, addr, count, + AMDGPU_GPU_PAGE_SIZE, flags); return; } /* handle the 4K area at the beginning */ if (pe_start != frag_start) { count = (frag_start - pe_start) / 8; - amdgpu_vm_update_pages(adev, params, pe_start, addr, - count, AMDGPU_GPU_PAGE_SIZE, flags); + amdgpu_vm_update_pages(params, pe_start, addr, count, + AMDGPU_GPU_PAGE_SIZE, flags); addr += AMDGPU_GPU_PAGE_SIZE * count; } /* handle the area in the middle */ count = (frag_end - frag_start) / 8; - amdgpu_vm_update_pages(adev, params, frag_start, addr, count, + amdgpu_vm_update_pages(params, frag_start, addr, count, AMDGPU_GPU_PAGE_SIZE, flags | frag_flags); /* handle the 4K area at the end */ if (frag_end != pe_end) { addr += AMDGPU_GPU_PAGE_SIZE * count; count = (pe_end - frag_end) / 8; - amdgpu_vm_update_pages(adev, params, frag_end, addr, - count, AMDGPU_GPU_PAGE_SIZE, flags); + amdgpu_vm_update_pages(params, frag_end, addr, count, + AMDGPU_GPU_PAGE_SIZE, flags); } } /** * amdgpu_vm_update_ptes - make sure that page tables are valid * - * @adev: amdgpu_device pointer * @params: see amdgpu_pte_update_params definition * @vm: requested vm * @start: start of GPU address range @@ -795,8 +790,7 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev, * * Update the page tables in the range @start - @end. */ -static void amdgpu_vm_update_ptes(struct amdgpu_device *adev, - struct amdgpu_pte_update_params *params, +static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, struct amdgpu_vm *vm, uint64_t start, uint64_t end, uint64_t dst, uint32_t flags) @@ -849,8 +843,7 @@ static void amdgpu_vm_update_ptes(struct amdgpu_device *adev, */ cur_pe_end += 8 * nptes; } else { - amdgpu_vm_frag_ptes(adev, params, - cur_pe_start, cur_pe_end, + amdgpu_vm_frag_ptes(params, cur_pe_start, cur_pe_end, cur_dst, flags); cur_pe_start = next_pe_start; @@ -863,8 +856,7 @@ static void amdgpu_vm_update_ptes(struct amdgpu_device *adev, dst += nptes * AMDGPU_GPU_PAGE_SIZE; } - amdgpu_vm_frag_ptes(adev, params, cur_pe_start, - cur_pe_end, cur_dst, flags); + amdgpu_vm_frag_ptes(params, cur_pe_start, cur_pe_end, cur_dst, flags); } /** @@ -902,7 +894,9 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, int r; ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); + memset(¶ms, 0, sizeof(params)); + params.adev = adev; params.src = src; params.pages_addr = pages_addr; @@ -959,8 +953,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, if (r) goto error_free; - amdgpu_vm_update_ptes(adev, ¶ms, vm, start, - last + 1, addr, flags); + amdgpu_vm_update_ptes(¶ms, vm, start, last + 1, addr, flags); amdgpu_ring_pad_ib(ring, params.ib); WARN_ON(params.ib->length_dw > ndw); -- cgit From 92696dd52e58b5caaee1dc027cf14f327f91d2e1 Mon Sep 17 00:00:00 2001 From: Christian König Date: Fri, 5 Aug 2016 13:56:35 +0200 Subject: drm/amdgpu: flip frag_ptes and update_pts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We can add the fragment params before we split the update for the page tables. That should save a few CPU cycles for larger updates. Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 166 ++++++++++++++++----------------- 1 file changed, 79 insertions(+), 87 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index fd7901c1320f..d2796bb4004b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -701,83 +701,6 @@ error_free: return r; } -/** - * amdgpu_vm_frag_ptes - add fragment information to PTEs - * - * @params: see amdgpu_pte_update_params definition - * @pe_start: first PTE to handle - * @pe_end: last PTE to handle - * @addr: addr those PTEs should point to - * @flags: hw mapping flags - */ -static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, - uint64_t pe_start, uint64_t pe_end, - uint64_t addr, uint32_t flags) -{ - /** - * The MC L1 TLB supports variable sized pages, based on a fragment - * field in the PTE. When this field is set to a non-zero value, page - * granularity is increased from 4KB to (1 << (12 + frag)). The PTE - * flags are considered valid for all PTEs within the fragment range - * and corresponding mappings are assumed to be physically contiguous. - * - * The L1 TLB can store a single PTE for the whole fragment, - * significantly increasing the space available for translation - * caching. This leads to large improvements in throughput when the - * TLB is under pressure. - * - * The L2 TLB distributes small and large fragments into two - * asymmetric partitions. The large fragment cache is significantly - * larger. Thus, we try to use large fragments wherever possible. - * Userspace can support this by aligning virtual base address and - * allocation size to the fragment size. - */ - - /* SI and newer are optimized for 64KB */ - uint64_t frag_flags = AMDGPU_PTE_FRAG(AMDGPU_LOG2_PAGES_PER_FRAG); - uint64_t frag_align = 0x80; - - uint64_t frag_start = ALIGN(pe_start, frag_align); - uint64_t frag_end = pe_end & ~(frag_align - 1); - - unsigned count; - - /* Abort early if there isn't anything to do */ - if (pe_start == pe_end) - return; - - /* system pages are non continuously */ - if (params->src || params->pages_addr || - !(flags & AMDGPU_PTE_VALID) || (frag_start >= frag_end)) { - - count = (pe_end - pe_start) / 8; - amdgpu_vm_update_pages(params, pe_start, addr, count, - AMDGPU_GPU_PAGE_SIZE, flags); - return; - } - - /* handle the 4K area at the beginning */ - if (pe_start != frag_start) { - count = (frag_start - pe_start) / 8; - amdgpu_vm_update_pages(params, pe_start, addr, count, - AMDGPU_GPU_PAGE_SIZE, flags); - addr += AMDGPU_GPU_PAGE_SIZE * count; - } - - /* handle the area in the middle */ - count = (frag_end - frag_start) / 8; - amdgpu_vm_update_pages(params, frag_start, addr, count, - AMDGPU_GPU_PAGE_SIZE, flags | frag_flags); - - /* handle the 4K area at the end */ - if (frag_end != pe_end) { - addr += AMDGPU_GPU_PAGE_SIZE * count; - count = (pe_end - frag_end) / 8; - amdgpu_vm_update_pages(params, frag_end, addr, count, - AMDGPU_GPU_PAGE_SIZE, flags); - } -} - /** * amdgpu_vm_update_ptes - make sure that page tables are valid * @@ -797,7 +720,7 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, { const uint64_t mask = AMDGPU_VM_PTE_COUNT - 1; - uint64_t cur_pe_start, cur_pe_end, cur_dst; + uint64_t cur_pe_start, cur_nptes, cur_dst; uint64_t addr; /* next GPU address to be updated */ uint64_t pt_idx; struct amdgpu_bo *pt; @@ -816,7 +739,7 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, cur_pe_start = amdgpu_bo_gpu_offset(pt); cur_pe_start += (addr & mask) * 8; - cur_pe_end = cur_pe_start + 8 * nptes; + cur_nptes = nptes; cur_dst = dst; /* for next ptb*/ @@ -836,18 +759,19 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, next_pe_start = amdgpu_bo_gpu_offset(pt); next_pe_start += (addr & mask) * 8; - if (cur_pe_end == next_pe_start) { + if ((cur_pe_start + 8 * cur_nptes) == next_pe_start) { /* The next ptb is consecutive to current ptb. - * Don't call amdgpu_vm_frag_ptes now. + * Don't call amdgpu_vm_update_pages now. * Will update two ptbs together in future. */ - cur_pe_end += 8 * nptes; + cur_nptes += nptes; } else { - amdgpu_vm_frag_ptes(params, cur_pe_start, cur_pe_end, - cur_dst, flags); + amdgpu_vm_update_pages(params, cur_pe_start, cur_dst, + cur_nptes, AMDGPU_GPU_PAGE_SIZE, + flags); cur_pe_start = next_pe_start; - cur_pe_end = next_pe_start + 8 * nptes; + cur_nptes = nptes; cur_dst = dst; } @@ -856,7 +780,75 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, dst += nptes * AMDGPU_GPU_PAGE_SIZE; } - amdgpu_vm_frag_ptes(params, cur_pe_start, cur_pe_end, cur_dst, flags); + amdgpu_vm_update_pages(params, cur_pe_start, cur_dst, cur_nptes, + AMDGPU_GPU_PAGE_SIZE, flags); +} + +/* + * amdgpu_vm_frag_ptes - add fragment information to PTEs + * + * @params: see amdgpu_pte_update_params definition + * @vm: requested vm + * @start: first PTE to handle + * @end: last PTE to handle + * @dst: addr those PTEs should point to + * @flags: hw mapping flags + */ +static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, + struct amdgpu_vm *vm, + uint64_t start, uint64_t end, + uint64_t dst, uint32_t flags) +{ + /** + * The MC L1 TLB supports variable sized pages, based on a fragment + * field in the PTE. When this field is set to a non-zero value, page + * granularity is increased from 4KB to (1 << (12 + frag)). The PTE + * flags are considered valid for all PTEs within the fragment range + * and corresponding mappings are assumed to be physically contiguous. + * + * The L1 TLB can store a single PTE for the whole fragment, + * significantly increasing the space available for translation + * caching. This leads to large improvements in throughput when the + * TLB is under pressure. + * + * The L2 TLB distributes small and large fragments into two + * asymmetric partitions. The large fragment cache is significantly + * larger. Thus, we try to use large fragments wherever possible. + * Userspace can support this by aligning virtual base address and + * allocation size to the fragment size. + */ + + /* SI and newer are optimized for 64KB */ + uint64_t frag_flags = AMDGPU_PTE_FRAG(AMDGPU_LOG2_PAGES_PER_FRAG); + uint64_t frag_align = 1 << AMDGPU_LOG2_PAGES_PER_FRAG; + + uint64_t frag_start = ALIGN(start, frag_align); + uint64_t frag_end = end & ~(frag_align - 1); + + /* system pages are non continuously */ + if (params->src || params->pages_addr || !(flags & AMDGPU_PTE_VALID) || + (frag_start >= frag_end)) { + + amdgpu_vm_update_ptes(params, vm, start, end, dst, flags); + return; + } + + /* handle the 4K area at the beginning */ + if (start != frag_start) { + amdgpu_vm_update_ptes(params, vm, start, frag_start, + dst, flags); + dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE; + } + + /* handle the area in the middle */ + amdgpu_vm_update_ptes(params, vm, frag_start, frag_end, dst, + flags | frag_flags); + + /* handle the 4K area at the end */ + if (frag_end != end) { + dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE; + amdgpu_vm_update_ptes(params, vm, frag_end, end, dst, flags); + } } /** @@ -953,7 +945,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, if (r) goto error_free; - amdgpu_vm_update_ptes(¶ms, vm, start, last + 1, addr, flags); + amdgpu_vm_frag_ptes(¶ms, vm, start, last + 1, addr, flags); amdgpu_ring_pad_ib(ring, params.ib); WARN_ON(params.ib->length_dw > ndw); -- cgit From e2b84e4be37462e77f34b5bd057d2483fcd3ca98 Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 8 Aug 2016 14:40:18 +0200 Subject: drm/amdgpu: use more than 64KB fragment size if possible MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We align to 64KB, but when userspace aligns even more we can easily use more. Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index d2796bb4004b..7f385e78a265 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -818,13 +818,13 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, * allocation size to the fragment size. */ - /* SI and newer are optimized for 64KB */ - uint64_t frag_flags = AMDGPU_PTE_FRAG(AMDGPU_LOG2_PAGES_PER_FRAG); - uint64_t frag_align = 1 << AMDGPU_LOG2_PAGES_PER_FRAG; + const uint64_t frag_align = 1 << AMDGPU_LOG2_PAGES_PER_FRAG; uint64_t frag_start = ALIGN(start, frag_align); uint64_t frag_end = end & ~(frag_align - 1); + uint32_t frag; + /* system pages are non continuously */ if (params->src || params->pages_addr || !(flags & AMDGPU_PTE_VALID) || (frag_start >= frag_end)) { @@ -833,6 +833,10 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, return; } + /* use more than 64KB fragment size if possible */ + frag = lower_32_bits(frag_start | frag_end); + frag = likely(frag) ? __ffs(frag) : 31; + /* handle the 4K area at the beginning */ if (start != frag_start) { amdgpu_vm_update_ptes(params, vm, start, frag_start, @@ -842,7 +846,7 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, /* handle the area in the middle */ amdgpu_vm_update_ptes(params, vm, frag_start, frag_end, dst, - flags | frag_flags); + flags | AMDGPU_PTE_FRAG(frag)); /* handle the 4K area at the end */ if (frag_end != end) { -- cgit From 1baa439fb2f4e586bf387cbbc515468ebb16e587 Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Thu, 4 Aug 2016 13:59:32 +0800 Subject: drm/amdgpu: allocate shadow for pd/pt bo V2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The pd/pt shadow bo will be used to backup page table, when gpu reset happens, we can restore the page table by them. V2: Free shadow bo. Signed-off-by: Chunming Zhou Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 7f385e78a265..1a474fa1f441 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1328,7 +1328,8 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, r = amdgpu_bo_create(adev, AMDGPU_VM_PTE_COUNT * 8, AMDGPU_GPU_PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_CREATE_NO_CPU_ACCESS, + AMDGPU_GEM_CREATE_NO_CPU_ACCESS | + AMDGPU_GEM_CREATE_SHADOW, NULL, resv, &pt); if (r) goto error_free; @@ -1527,7 +1528,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) r = amdgpu_bo_create(adev, pd_size, align, true, AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_CREATE_NO_CPU_ACCESS, + AMDGPU_GEM_CREATE_NO_CPU_ACCESS | + AMDGPU_GEM_CREATE_SHADOW, NULL, NULL, &vm->page_directory); if (r) goto error_free_sched_entity; @@ -1583,10 +1585,16 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) kfree(mapping); } - for (i = 0; i < amdgpu_vm_num_pdes(adev); i++) + for (i = 0; i < amdgpu_vm_num_pdes(adev); i++) { + if (vm->page_tables[i].entry.robj && + vm->page_tables[i].entry.robj->shadow) + amdgpu_bo_unref(&vm->page_tables[i].entry.robj->shadow); amdgpu_bo_unref(&vm->page_tables[i].entry.robj); + } drm_free_large(vm->page_tables); + if (vm->page_directory->shadow) + amdgpu_bo_unref(&vm->page_directory->shadow); amdgpu_bo_unref(&vm->page_directory); fence_put(vm->page_directory_fence); } -- cgit From b0456f93063ec8629cfeee6d03758f92793d96cb Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 11 Aug 2016 14:06:54 +0200 Subject: drm/amdgpu: write PTEs directly into the IB. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Write the PTEs at the end of the IB instead of directly into the SDMA commands. This can save quite some CPU cycles building the entries. This doesn't change the DW estimation because PTEs where embedded into the IB before as well. It just moves them to the end of the IB. Signed-off-by: Christian König Reviewed-by: Alex Deucher Reviewed-by: Edward O'Callaghan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 1a474fa1f441..e5095b5e10e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -911,15 +911,15 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, /* padding, etc. */ ndw = 64; - if (params.src) { + if (src) { /* only copy commands needed */ ndw += ncmds * 7; - } else if (params.pages_addr) { - /* header for write data commands */ - ndw += ncmds * 4; + } else if (pages_addr) { + /* copy commands needed */ + ndw += ncmds * 7; - /* body of write data command */ + /* and also PTEs */ ndw += nptes * 2; } else { @@ -936,6 +936,22 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, params.ib = &job->ibs[0]; + if (!src && pages_addr) { + uint64_t *pte; + unsigned i; + + /* Put the PTEs at the end of the IB. */ + i = ndw - nptes * 2; + pte= (uint64_t *)&(job->ibs->ptr[i]); + params.src = job->ibs->gpu_addr + i * 4; + + for (i = 0; i < nptes; ++i) { + pte[i] = amdgpu_vm_map_gart(pages_addr, addr + i * + AMDGPU_GPU_PAGE_SIZE); + pte[i] |= flags; + } + } + r = amdgpu_sync_fence(adev, &job->sync, exclusive); if (r) goto error_free; -- cgit From b7fc2cbd5e9fd4fbac1c3a9fda6eff2b28b7bc4d Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 11 Aug 2016 16:44:15 +0200 Subject: drm/amdgpu: remove pages_addr handling from the VM code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Not needed any more. Signed-off-by: Christian König Reviewed-by: Alex Deucher Reviewed-by: Edward O'Callaghan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index e5095b5e10e5..51db44abdfa5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -62,8 +62,6 @@ struct amdgpu_pte_update_params { struct amdgpu_device *adev; /* address where to copy page table entries from */ uint64_t src; - /* DMA addresses to use for mapping */ - dma_addr_t *pages_addr; /* indirect buffer to fill with commands */ struct amdgpu_ib *ib; }; @@ -492,11 +490,6 @@ static void amdgpu_vm_update_pages(struct amdgpu_pte_update_params *params, amdgpu_vm_copy_pte(params->adev, params->ib, pe, (params->src + (addr >> 12) * 8), count); - } else if (params->pages_addr) { - amdgpu_vm_write_pte(params->adev, params->ib, - params->pages_addr, - pe, addr, count, incr, flags); - } else if (count < 3) { amdgpu_vm_write_pte(params->adev, params->ib, NULL, pe, addr, count, incr, flags); @@ -826,7 +819,7 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, uint32_t frag; /* system pages are non continuously */ - if (params->src || params->pages_addr || !(flags & AMDGPU_PTE_VALID) || + if (params->src || !(flags & AMDGPU_PTE_VALID) || (frag_start >= frag_end)) { amdgpu_vm_update_ptes(params, vm, start, end, dst, flags); @@ -894,7 +887,6 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, memset(¶ms, 0, sizeof(params)); params.adev = adev; params.src = src; - params.pages_addr = pages_addr; /* sync to everything on unmapping */ if (!(flags & AMDGPU_PTE_VALID)) -- cgit From de9ea7bd366009d379043d49b5b2660c5b462483 Mon Sep 17 00:00:00 2001 From: Christian König Date: Fri, 12 Aug 2016 11:33:30 +0200 Subject: drm/amdgpu: cleanup the write_pte implementations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We don't need the gart mapping handling here any more. Signed-off-by: Christian König Reviewed-by: Alex Deucher Reviewed-by: Edward O'Callaghan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 51db44abdfa5..e0e40aca0f78 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -491,8 +491,8 @@ static void amdgpu_vm_update_pages(struct amdgpu_pte_update_params *params, pe, (params->src + (addr >> 12) * 8), count); } else if (count < 3) { - amdgpu_vm_write_pte(params->adev, params->ib, NULL, pe, addr, - count, incr, flags); + amdgpu_vm_write_pte(params->adev, params->ib, pe, + addr | flags, count, incr); } else { amdgpu_vm_set_pte_pde(params->adev, params->ib, pe, addr, @@ -569,21 +569,15 @@ error: * Look up the physical address of the page that the pte resolves * to and return the pointer for the page table entry. */ -uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr) +static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr) { uint64_t result; - if (pages_addr) { - /* page table offset */ - result = pages_addr[addr >> PAGE_SHIFT]; - - /* in case cpu page size != gpu page size*/ - result |= addr & (~PAGE_MASK); + /* page table offset */ + result = pages_addr[addr >> PAGE_SHIFT]; - } else { - /* No mapping required */ - result = addr; - } + /* in case cpu page size != gpu page size*/ + result |= addr & (~PAGE_MASK); result &= 0xFFFFFFFFFFFFF000ULL; -- cgit From dc157c6daa317c75058def8f9753909e5224cbec Mon Sep 17 00:00:00 2001 From: Christian König Date: Fri, 12 Aug 2016 11:40:11 +0200 Subject: drm/amdgpu: remove AMDGPU_VM_NO_FLUSH define MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Not used any more. Signed-off-by: Christian König Reviewed-by: Alex Deucher Reviewed-by: Edward O'Callaghan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index e0e40aca0f78..12925016370b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -51,9 +51,6 @@ * SI supports 16. */ -/* Special value that no flush is necessary */ -#define AMDGPU_VM_NO_FLUSH (~0ll) - /* Local structure. Encapsulate some VM table update parameters to reduce * the number of function parameters */ -- cgit From 96105e5375892f63cc56fa707a1db0d74abc764d Mon Sep 17 00:00:00 2001 From: Christian König Date: Fri, 12 Aug 2016 12:59:59 +0200 Subject: drm/amdgpu: stop splitting PTE commands into smaller ones MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It doesn't make much sense to create bigger commands first which we then need to split into smaller one again. Just make sure the commands we create aren't to big in the first place. Signed-off-by: Christian König Reviewed-by: Alex Deucher Reviewed-by: Edward O'Callaghan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 12925016370b..673c258e49db 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -639,7 +639,8 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, pde = pd_addr + pt_idx * 8; if (((last_pde + 8 * count) != pde) || - ((last_pt + incr * count) != pt)) { + ((last_pt + incr * count) != pt) || + (count == AMDGPU_VM_MAX_UPDATE_SIZE)) { if (count) { amdgpu_vm_update_pages(¶ms, last_pde, @@ -743,7 +744,8 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, next_pe_start = amdgpu_bo_gpu_offset(pt); next_pe_start += (addr & mask) * 8; - if ((cur_pe_start + 8 * cur_nptes) == next_pe_start) { + if ((cur_pe_start + 8 * cur_nptes) == next_pe_start && + ((cur_nptes + nptes) <= AMDGPU_VM_MAX_UPDATE_SIZE)) { /* The next ptb is consecutive to current ptb. * Don't call amdgpu_vm_update_pages now. * Will update two ptbs together in future. -- cgit From afef8b8f99b6489ae1e9fe535d74bf2ce9fa17bd Mon Sep 17 00:00:00 2001 From: Christian König Date: Fri, 12 Aug 2016 13:29:18 +0200 Subject: drm/amdgpu: add function pointer to the pte_update_params MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remember what function to call while planning the commands instead of figuring it our later on. Signed-off-by: Christian König Reviewed-by: Alex Deucher Reviewed-by: Edward O'Callaghan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 76 ++++++++++++++++++++++++---------- 1 file changed, 54 insertions(+), 22 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 673c258e49db..7ca2e8fa5906 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -61,6 +61,10 @@ struct amdgpu_pte_update_params { uint64_t src; /* indirect buffer to fill with commands */ struct amdgpu_ib *ib; + /* Function which actually does the update */ + void (*func)(struct amdgpu_pte_update_params *params, uint64_t pe, + uint64_t addr, unsigned count, uint32_t incr, + uint32_t flags); }; /** @@ -464,7 +468,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, } /** - * amdgpu_vm_update_pages - helper to call the right asic function + * amdgpu_vm_do_set_ptes - helper to call the right asic function * * @params: see amdgpu_pte_update_params definition * @pe: addr of the page entry @@ -476,18 +480,14 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, * Traces the parameters and calls the right asic functions * to setup the page table using the DMA. */ -static void amdgpu_vm_update_pages(struct amdgpu_pte_update_params *params, - uint64_t pe, uint64_t addr, - unsigned count, uint32_t incr, - uint32_t flags) +static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params, + uint64_t pe, uint64_t addr, + unsigned count, uint32_t incr, + uint32_t flags) { trace_amdgpu_vm_set_page(pe, addr, count, incr, flags); - if (params->src) { - amdgpu_vm_copy_pte(params->adev, params->ib, - pe, (params->src + (addr >> 12) * 8), count); - - } else if (count < 3) { + if (count < 3) { amdgpu_vm_write_pte(params->adev, params->ib, pe, addr | flags, count, incr); @@ -497,6 +497,29 @@ static void amdgpu_vm_update_pages(struct amdgpu_pte_update_params *params, } } +/** + * amdgpu_vm_do_copy_ptes - copy the PTEs from the GART + * + * @params: see amdgpu_pte_update_params definition + * @pe: addr of the page entry + * @addr: dst addr to write into pe + * @count: number of page entries to update + * @incr: increase next addr by incr bytes + * @flags: hw access flags + * + * Traces the parameters and calls the DMA function to copy the PTEs. + */ +static void amdgpu_vm_do_copy_ptes(struct amdgpu_pte_update_params *params, + uint64_t pe, uint64_t addr, + unsigned count, uint32_t incr, + uint32_t flags) +{ + trace_amdgpu_vm_set_page(pe, addr, count, incr, flags); + + amdgpu_vm_copy_pte(params->adev, params->ib, pe, + (params->src + (addr >> 12) * 8), count); +} + /** * amdgpu_vm_clear_bo - initially clear the page dir/table * @@ -537,7 +560,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, memset(¶ms, 0, sizeof(params)); params.adev = adev; params.ib = &job->ibs[0]; - amdgpu_vm_update_pages(¶ms, addr, 0, entries, 0, 0); + amdgpu_vm_do_set_ptes(¶ms, addr, 0, entries, 0, 0); amdgpu_ring_pad_ib(ring, &job->ibs[0]); WARN_ON(job->ibs[0].length_dw > 64); @@ -643,9 +666,9 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, (count == AMDGPU_VM_MAX_UPDATE_SIZE)) { if (count) { - amdgpu_vm_update_pages(¶ms, last_pde, - last_pt, count, incr, - AMDGPU_PTE_VALID); + amdgpu_vm_do_set_ptes(¶ms, last_pde, + last_pt, count, incr, + AMDGPU_PTE_VALID); } count = 1; @@ -657,8 +680,8 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, } if (count) - amdgpu_vm_update_pages(¶ms, last_pde, last_pt, - count, incr, AMDGPU_PTE_VALID); + amdgpu_vm_do_set_ptes(¶ms, last_pde, last_pt, + count, incr, AMDGPU_PTE_VALID); if (params.ib->length_dw != 0) { amdgpu_ring_pad_ib(ring, params.ib); @@ -747,14 +770,13 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, if ((cur_pe_start + 8 * cur_nptes) == next_pe_start && ((cur_nptes + nptes) <= AMDGPU_VM_MAX_UPDATE_SIZE)) { /* The next ptb is consecutive to current ptb. - * Don't call amdgpu_vm_update_pages now. + * Don't call the update function now. * Will update two ptbs together in future. */ cur_nptes += nptes; } else { - amdgpu_vm_update_pages(params, cur_pe_start, cur_dst, - cur_nptes, AMDGPU_GPU_PAGE_SIZE, - flags); + params->func(params, cur_pe_start, cur_dst, cur_nptes, + AMDGPU_GPU_PAGE_SIZE, flags); cur_pe_start = next_pe_start; cur_nptes = nptes; @@ -766,8 +788,8 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, dst += nptes * AMDGPU_GPU_PAGE_SIZE; } - amdgpu_vm_update_pages(params, cur_pe_start, cur_dst, cur_nptes, - AMDGPU_GPU_PAGE_SIZE, flags); + params->func(params, cur_pe_start, cur_dst, cur_nptes, + AMDGPU_GPU_PAGE_SIZE, flags); } /* @@ -875,6 +897,10 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, struct fence *f = NULL; int r; + memset(¶ms, 0, sizeof(params)); + params.adev = adev; + params.src = src; + ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); memset(¶ms, 0, sizeof(params)); @@ -900,6 +926,8 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, /* only copy commands needed */ ndw += ncmds * 7; + params.func = amdgpu_vm_do_copy_ptes; + } else if (pages_addr) { /* copy commands needed */ ndw += ncmds * 7; @@ -907,12 +935,16 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, /* and also PTEs */ ndw += nptes * 2; + params.func = amdgpu_vm_do_copy_ptes; + } else { /* set page commands needed */ ndw += ncmds * 10; /* two extra commands for begin/end of fragment */ ndw += 2 * 10; + + params.func = amdgpu_vm_do_set_ptes; } r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job); -- cgit From 6557e3d29469157f18342cd47e05792271e64e7c Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Mon, 15 Aug 2016 11:36:54 +0800 Subject: drm/amdgpu: update pd shadow while updating pd V2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit V2: Checking if shadow is valid. Signed-off-by: Chunming Zhou Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 60 +++++++++++++++++++++++----------- 1 file changed, 41 insertions(+), 19 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 7ca2e8fa5906..1eae307cdfd4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -604,24 +604,14 @@ static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr) return result; } -/** - * amdgpu_vm_update_pdes - make sure that page directory is valid - * - * @adev: amdgpu_device pointer - * @vm: requested vm - * @start: start of GPU address range - * @end: end of GPU address range - * - * Allocates new page tables if necessary - * and updates the page directory. - * Returns 0 for success, error for failure. - */ -int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, - struct amdgpu_vm *vm) +static int amdgpu_vm_update_pd_or_shadow(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + bool shadow) { struct amdgpu_ring *ring; - struct amdgpu_bo *pd = vm->page_directory; - uint64_t pd_addr = amdgpu_bo_gpu_offset(pd); + struct amdgpu_bo *pd = shadow ? vm->page_directory->shadow : + vm->page_directory; + uint64_t pd_addr; uint32_t incr = AMDGPU_VM_PTE_COUNT * 8; uint64_t last_pde = ~0, last_pt = ~0; unsigned count = 0, pt_idx, ndw; @@ -631,6 +621,9 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, int r; + if (!pd) + return 0; + pd_addr = amdgpu_bo_gpu_offset(pd); ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); /* padding, etc. */ @@ -656,9 +649,15 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, continue; pt = amdgpu_bo_gpu_offset(bo); - if (vm->page_tables[pt_idx].addr == pt) - continue; - vm->page_tables[pt_idx].addr = pt; + if (!shadow) { + if (vm->page_tables[pt_idx].addr == pt) + continue; + vm->page_tables[pt_idx].addr = pt; + } else { + if (vm->page_tables[pt_idx].shadow_addr == pt) + continue; + vm->page_tables[pt_idx].shadow_addr = pt; + } pde = pd_addr + pt_idx * 8; if (((last_pde + 8 * count) != pde) || @@ -709,6 +708,29 @@ error_free: return r; } +/* + * amdgpu_vm_update_pdes - make sure that page directory is valid + * + * @adev: amdgpu_device pointer + * @vm: requested vm + * @start: start of GPU address range + * @end: end of GPU address range + * + * Allocates new page tables if necessary + * and updates the page directory. + * Returns 0 for success, error for failure. + */ +int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, + struct amdgpu_vm *vm) +{ + int r; + + r = amdgpu_vm_update_pd_or_shadow(adev, vm, true); + if (r) + return r; + return amdgpu_vm_update_pd_or_shadow(adev, vm, false); +} + /** * amdgpu_vm_update_ptes - make sure that page tables are valid * -- cgit From 4c7e885506eaefc55c2b45293b52cae9ef797d67 Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Mon, 15 Aug 2016 11:46:21 +0800 Subject: drm/amdgpu: update pt shadow while updating pt V2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit V2: move shadow parameter to amdgpu_pte_update_params. Signed-off-by: Chunming Zhou Reviewed-by: Christian König Cc: minutemaidpark@hotmail.com Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 1eae307cdfd4..d2824d97b7e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -65,6 +65,8 @@ struct amdgpu_pte_update_params { void (*func)(struct amdgpu_pte_update_params *params, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint32_t flags); + /* indicate update pt or its shadow */ + bool shadow; }; /** @@ -761,7 +763,11 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, addr = start; pt_idx = addr >> amdgpu_vm_block_size; pt = vm->page_tables[pt_idx].entry.robj; - + if (params->shadow) { + if (!pt->shadow) + return; + pt = vm->page_tables[pt_idx].entry.robj->shadow; + } if ((addr & ~mask) == (end & ~mask)) nptes = end - addr; else @@ -780,6 +786,11 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, while (addr < end) { pt_idx = addr >> amdgpu_vm_block_size; pt = vm->page_tables[pt_idx].entry.robj; + if (params->shadow) { + if (!pt->shadow) + return; + pt = vm->page_tables[pt_idx].entry.robj->shadow; + } if ((addr & ~mask) == (end & ~mask)) nptes = end - addr; @@ -1004,6 +1015,9 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, if (r) goto error_free; + params.shadow = true; + amdgpu_vm_frag_ptes(¶ms, vm, start, last + 1, addr, flags); + params.shadow = false; amdgpu_vm_frag_ptes(¶ms, vm, start, last + 1, addr, flags); amdgpu_ring_pad_ib(ring, params.ib); -- cgit From 99e124f402d6d649498e2aa3cbcf4563a37fea0e Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 16 Aug 2016 14:43:17 +0200 Subject: drm/amdgpu: cleanup amdgpu_vm_bo_update params MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make it more obvious what we are doing here. Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index d2824d97b7e5..bf56f1814437 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1117,28 +1117,32 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, * * @adev: amdgpu_device pointer * @bo_va: requested BO and VM object - * @mem: ttm mem + * @clear: if true clear the entries * * Fill in the page table entries for @bo_va. * Returns 0 for success, -EINVAL for failure. - * - * Object have to be reserved and mutex must be locked! */ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, - struct ttm_mem_reg *mem) + bool clear) { struct amdgpu_vm *vm = bo_va->vm; struct amdgpu_bo_va_mapping *mapping; dma_addr_t *pages_addr = NULL; uint32_t gtt_flags, flags; + struct ttm_mem_reg *mem; struct fence *exclusive; uint64_t addr; int r; - if (mem) { + if (clear) { + mem = NULL; + addr = 0; + exclusive = NULL; + } else { struct ttm_dma_tt *ttm; + mem = &bo_va->bo->tbo.mem; addr = (u64)mem->start << PAGE_SHIFT; switch (mem->mem_type) { case TTM_PL_TT: @@ -1156,9 +1160,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, } exclusive = reservation_object_get_excl(bo_va->bo->tbo.resv); - } else { - addr = 0; - exclusive = NULL; } flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem); @@ -1189,7 +1190,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, spin_lock(&vm->status_lock); list_splice_init(&bo_va->invalids, &bo_va->valids); list_del_init(&bo_va->vm_status); - if (!mem) + if (clear) list_add(&bo_va->vm_status, &vm->cleared); spin_unlock(&vm->status_lock); @@ -1252,7 +1253,7 @@ int amdgpu_vm_clear_invalids(struct amdgpu_device *adev, struct amdgpu_bo_va, vm_status); spin_unlock(&vm->status_lock); - r = amdgpu_vm_bo_update(adev, bo_va, NULL); + r = amdgpu_vm_bo_update(adev, bo_va, true); if (r) return r; -- cgit From c855e25090cdafffb87119028eb018030a46dd9e Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 5 Sep 2016 17:00:57 +0200 Subject: drm/amdgpu: bind GTT on demand MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We don't really need the GTT table any more most of the time. So bind it only on demand. Signed-off-by: Christian König Reviewed-by: Alex Deucher Acked-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index bf56f1814437..bd5af328154f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1163,7 +1163,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, } flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem); - gtt_flags = (adev == bo_va->bo->adev) ? flags : 0; + gtt_flags = (amdgpu_ttm_is_bound(bo_va->bo->tbo.ttm) && + adev == bo_va->bo->adev) ? flags : 0; spin_lock(&vm->status_lock); if (!list_empty(&bo_va->vm_status)) -- cgit From d7a4ac667eab23307e82100cbceb781936a4500f Mon Sep 17 00:00:00 2001 From: Christian König Date: Sun, 25 Sep 2016 11:54:00 +0200 Subject: drm/amdgpu: fix addr handling in amdgpu_vm_bo_update_mapping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise we will look at the wrong place in the IB when GART mappings are split into smaller updates. Signed-off-by: Christian König Reviewed-by: Alex Deucher Acked-by: Tom StDenis Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index bd5af328154f..c4985a727620 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1000,6 +1000,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, AMDGPU_GPU_PAGE_SIZE); pte[i] |= flags; } + addr = 0; } r = amdgpu_sync_fence(adev, &job->sync, exclusive); -- cgit From 0fc8683e568f228c08321fd99756f6230c98651b Mon Sep 17 00:00:00 2001 From: Christian König Date: Fri, 16 Sep 2016 11:46:23 +0200 Subject: drm/amdgpu: allocate GTT space for shadow VM page tables MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to access those with the system domain. Fixes fallout from only allocating GTT space on demand. Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index c4985a727620..4ff285aae9f5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -552,6 +552,10 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, if (r) goto error; + r = amdgpu_ttm_bind(&bo->tbo, &bo->tbo.mem); + if (r) + goto error; + addr = amdgpu_bo_gpu_offset(bo); entries = amdgpu_bo_size(bo) / 8; @@ -625,6 +629,11 @@ static int amdgpu_vm_update_pd_or_shadow(struct amdgpu_device *adev, if (!pd) return 0; + + r = amdgpu_ttm_bind(&pd->tbo, &pd->tbo.mem); + if (r) + return r; + pd_addr = amdgpu_bo_gpu_offset(pd); ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); @@ -650,6 +659,14 @@ static int amdgpu_vm_update_pd_or_shadow(struct amdgpu_device *adev, if (bo == NULL) continue; + if (bo->shadow) { + struct amdgpu_bo *shadow = bo->shadow; + + r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem); + if (r) + return r; + } + pt = amdgpu_bo_gpu_offset(bo); if (!shadow) { if (vm->page_tables[pt_idx].addr == pt) -- cgit From 2698f6206f53bc23a40a1412b304225a6d62d120 Mon Sep 17 00:00:00 2001 From: Christian König Date: Fri, 16 Sep 2016 13:06:09 +0200 Subject: drm/amdgpu: cleanup VM shadow BO unreferencing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unreference the shadow BOs in the error path as well and drop the NULL checks. Signed-off-by: Christian König Acked-by: Edward O'Callaghan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 4ff285aae9f5..9f4ff29df80d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1430,6 +1430,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, r = amdgpu_vm_clear_bo(adev, vm, pt); if (r) { + amdgpu_bo_unref(&pt->shadow); amdgpu_bo_unref(&pt); goto error_free; } @@ -1636,6 +1637,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) return 0; error_free_page_directory: + amdgpu_bo_unref(&vm->page_directory->shadow); amdgpu_bo_unref(&vm->page_directory); vm->page_directory = NULL; @@ -1675,15 +1677,17 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) } for (i = 0; i < amdgpu_vm_num_pdes(adev); i++) { - if (vm->page_tables[i].entry.robj && - vm->page_tables[i].entry.robj->shadow) - amdgpu_bo_unref(&vm->page_tables[i].entry.robj->shadow); - amdgpu_bo_unref(&vm->page_tables[i].entry.robj); + struct amdgpu_bo *pt = vm->page_tables[i].entry.robj; + + if (!pt) + continue; + + amdgpu_bo_unref(&pt->shadow); + amdgpu_bo_unref(&pt); } drm_free_large(vm->page_tables); - if (vm->page_directory->shadow) - amdgpu_bo_unref(&vm->page_directory->shadow); + amdgpu_bo_unref(&vm->page_directory->shadow); amdgpu_bo_unref(&vm->page_directory); fence_put(vm->page_directory_fence); } -- cgit From 2a82ec21b9ba342dc90e054458e430f577beca89 Mon Sep 17 00:00:00 2001 From: Christian König Date: Fri, 16 Sep 2016 13:11:45 +0200 Subject: drm/amdgpu: fix initializing the VM last eviction counter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Close a very small window where something can go wrong. Signed-off-by: Christian König Acked-by: Edward O'Callaghan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 9f4ff29df80d..04aa5a77e953 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1629,13 +1629,17 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) goto error_free_page_directory; r = amdgpu_vm_clear_bo(adev, vm, vm->page_directory); - amdgpu_bo_unreserve(vm->page_directory); if (r) - goto error_free_page_directory; + goto error_unreserve; + vm->last_eviction_counter = atomic64_read(&adev->num_evictions); + amdgpu_bo_unreserve(vm->page_directory); return 0; +error_unreserve: + amdgpu_bo_unreserve(vm->page_directory); + error_free_page_directory: amdgpu_bo_unref(&vm->page_directory->shadow); amdgpu_bo_unref(&vm->page_directory); -- cgit From 2befa60e4c538a094853ea00ac03b209b7ad272d Mon Sep 17 00:00:00 2001 From: Christian König Date: Fri, 16 Sep 2016 14:07:46 +0200 Subject: drm/amdgpu: fix initializing the VM BO shadow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to clear the shadows as well. Signed-off-by: Christian König Acked-by: Edward O'Callaghan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 04aa5a77e953..3a43000bdf8c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1435,6 +1435,15 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, goto error_free; } + if (pt->shadow) { + r = amdgpu_vm_clear_bo(adev, vm, pt->shadow); + if (r) { + amdgpu_bo_unref(&pt->shadow); + amdgpu_bo_unref(&pt); + goto error_free; + } + } + entry->robj = pt; entry->priority = 0; entry->tv.bo = &entry->robj->tbo; @@ -1632,6 +1641,12 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) if (r) goto error_unreserve; + if (vm->page_directory->shadow) { + r = amdgpu_vm_clear_bo(adev, vm, vm->page_directory->shadow); + if (r) + goto error_unreserve; + } + vm->last_eviction_counter = atomic64_read(&adev->num_evictions); amdgpu_bo_unreserve(vm->page_directory); -- cgit From ec2f05f034688468ddf42e79755b66bd6dd8281e Mon Sep 17 00:00:00 2001 From: Christian König Date: Sun, 25 Sep 2016 16:11:52 +0200 Subject: drm/amdgpu: improve VM PTE trace points MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use a separate one for the copy operation and log all the interesting parameters. Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 3a43000bdf8c..1405d69fefb5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -487,7 +487,7 @@ static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params, unsigned count, uint32_t incr, uint32_t flags) { - trace_amdgpu_vm_set_page(pe, addr, count, incr, flags); + trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags); if (count < 3) { amdgpu_vm_write_pte(params->adev, params->ib, pe, @@ -516,10 +516,12 @@ static void amdgpu_vm_do_copy_ptes(struct amdgpu_pte_update_params *params, unsigned count, uint32_t incr, uint32_t flags) { - trace_amdgpu_vm_set_page(pe, addr, count, incr, flags); + uint64_t src = (params->src + (addr >> 12) * 8); - amdgpu_vm_copy_pte(params->adev, params->ib, pe, - (params->src + (addr >> 12) * 8), count); + + trace_amdgpu_vm_copy_ptes(pe, src, count); + + amdgpu_vm_copy_pte(params->adev, params->ib, pe, src, count); } /** -- cgit From 8036617e92e3fad49eef9bbe868b661c58249aff Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 4 Oct 2016 13:39:43 +0200 Subject: drm/amdgpu: revert "use more than 64KB fragment size if possible" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 1dcd32fb9c54334ec948a0f18174a748d6b14364. The block size is indeed an equal match, so this can cause performance regressions. Reviewed-by: Alex Deucher Signed-off-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index bc4b22c6fc08..06f24322e7c3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -878,13 +878,13 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, * allocation size to the fragment size. */ - const uint64_t frag_align = 1 << AMDGPU_LOG2_PAGES_PER_FRAG; + /* SI and newer are optimized for 64KB */ + uint64_t frag_flags = AMDGPU_PTE_FRAG(AMDGPU_LOG2_PAGES_PER_FRAG); + uint64_t frag_align = 1 << AMDGPU_LOG2_PAGES_PER_FRAG; uint64_t frag_start = ALIGN(start, frag_align); uint64_t frag_end = end & ~(frag_align - 1); - uint32_t frag; - /* system pages are non continuously */ if (params->src || !(flags & AMDGPU_PTE_VALID) || (frag_start >= frag_end)) { @@ -893,10 +893,6 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, return; } - /* use more than 64KB fragment size if possible */ - frag = lower_32_bits(frag_start | frag_end); - frag = likely(frag) ? __ffs(frag) : 31; - /* handle the 4K area at the beginning */ if (start != frag_start) { amdgpu_vm_update_ptes(params, vm, start, frag_start, @@ -906,7 +902,7 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, /* handle the area in the middle */ amdgpu_vm_update_ptes(params, vm, frag_start, frag_end, dst, - flags | AMDGPU_PTE_FRAG(frag)); + flags | frag_flags); /* handle the 4K area at the end */ if (frag_end != end) { -- cgit