diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 92 |
1 files changed, 73 insertions, 19 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 5599c01b265d..33789510e663 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -223,10 +223,11 @@ static s64 bytes_to_us(struct amdgpu_device *adev, u64 bytes) * ticks. The accumulated microseconds (us) are converted to bytes and * returned. */ -static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev) +static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev, + u64 *max_bytes, + u64 *max_vis_bytes) { s64 time_us, increment_us; - u64 max_bytes; u64 free_vram, total_vram, used_vram; /* Allow a maximum of 200 accumulated ms. This is basically per-IB @@ -238,8 +239,11 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev) */ const s64 us_upper_bound = 200000; - if (!adev->mm_stats.log2_max_MBps) - return 0; + if (!adev->mm_stats.log2_max_MBps) { + *max_bytes = 0; + *max_vis_bytes = 0; + return; + } total_vram = adev->mc.real_vram_size - adev->vram_pin_size; used_vram = atomic64_read(&adev->vram_usage); @@ -280,23 +284,45 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev) adev->mm_stats.accum_us = max(min_us, adev->mm_stats.accum_us); } - /* This returns 0 if the driver is in debt to disallow (optional) + /* This is set to 0 if the driver is in debt to disallow (optional) * buffer moves. */ - max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us); + *max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us); + + /* Do the same for visible VRAM if half of it is free */ + if (adev->mc.visible_vram_size < adev->mc.real_vram_size) { + u64 total_vis_vram = adev->mc.visible_vram_size; + u64 used_vis_vram = atomic64_read(&adev->vram_vis_usage); + + if (used_vis_vram < total_vis_vram) { + u64 free_vis_vram = total_vis_vram - used_vis_vram; + adev->mm_stats.accum_us_vis = min(adev->mm_stats.accum_us_vis + + increment_us, us_upper_bound); + + if (free_vis_vram >= total_vis_vram / 2) + adev->mm_stats.accum_us_vis = + max(bytes_to_us(adev, free_vis_vram / 2), + adev->mm_stats.accum_us_vis); + } + + *max_vis_bytes = us_to_bytes(adev, adev->mm_stats.accum_us_vis); + } else { + *max_vis_bytes = 0; + } spin_unlock(&adev->mm_stats.lock); - return max_bytes; } /* Report how many bytes have really been moved for the last command * submission. This can result in a debt that can stop buffer migrations * temporarily. */ -void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes) +void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes, + u64 num_vis_bytes) { spin_lock(&adev->mm_stats.lock); adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes); + adev->mm_stats.accum_us_vis -= bytes_to_us(adev, num_vis_bytes); spin_unlock(&adev->mm_stats.lock); } @@ -304,7 +330,7 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, struct amdgpu_bo *bo) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - u64 initial_bytes_moved; + u64 initial_bytes_moved, bytes_moved; uint32_t domain; int r; @@ -314,17 +340,35 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, /* Don't move this buffer if we have depleted our allowance * to move it. Don't move anything if the threshold is zero. */ - if (p->bytes_moved < p->bytes_moved_threshold) - domain = bo->prefered_domains; - else + if (p->bytes_moved < p->bytes_moved_threshold) { + if (adev->mc.visible_vram_size < adev->mc.real_vram_size && + (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) { + /* And don't move a CPU_ACCESS_REQUIRED BO to limited + * visible VRAM if we've depleted our allowance to do + * that. + */ + if (p->bytes_moved_vis < p->bytes_moved_vis_threshold) + domain = bo->prefered_domains; + else + domain = bo->allowed_domains; + } else { + domain = bo->prefered_domains; + } + } else { domain = bo->allowed_domains; + } retry: amdgpu_ttm_placement_from_domain(bo, domain); initial_bytes_moved = atomic64_read(&adev->num_bytes_moved); r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); - p->bytes_moved += atomic64_read(&adev->num_bytes_moved) - - initial_bytes_moved; + bytes_moved = atomic64_read(&adev->num_bytes_moved) - + initial_bytes_moved; + p->bytes_moved += bytes_moved; + if (adev->mc.visible_vram_size < adev->mc.real_vram_size && + bo->tbo.mem.mem_type == TTM_PL_VRAM && + bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT) + p->bytes_moved_vis += bytes_moved; if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) { domain = bo->allowed_domains; @@ -350,7 +394,8 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p, struct amdgpu_bo_list_entry *candidate = p->evictable; struct amdgpu_bo *bo = candidate->robj; struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - u64 initial_bytes_moved; + u64 initial_bytes_moved, bytes_moved; + bool update_bytes_moved_vis; uint32_t other; /* If we reached our current BO we can forget it */ @@ -370,10 +415,17 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p, /* Good we can try to move this BO somewhere else */ amdgpu_ttm_placement_from_domain(bo, other); + update_bytes_moved_vis = + adev->mc.visible_vram_size < adev->mc.real_vram_size && + bo->tbo.mem.mem_type == TTM_PL_VRAM && + bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT; initial_bytes_moved = atomic64_read(&adev->num_bytes_moved); r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); - p->bytes_moved += atomic64_read(&adev->num_bytes_moved) - + bytes_moved = atomic64_read(&adev->num_bytes_moved) - initial_bytes_moved; + p->bytes_moved += bytes_moved; + if (update_bytes_moved_vis) + p->bytes_moved_vis += bytes_moved; if (unlikely(r)) break; @@ -554,8 +606,10 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, list_splice(&need_pages, &p->validated); } - p->bytes_moved_threshold = amdgpu_cs_get_threshold_for_moves(p->adev); + amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold, + &p->bytes_moved_vis_threshold); p->bytes_moved = 0; + p->bytes_moved_vis = 0; p->evictable = list_last_entry(&p->validated, struct amdgpu_bo_list_entry, tv.head); @@ -579,8 +633,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, goto error_validate; } - amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved); - + amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved, + p->bytes_moved_vis); fpriv->vm.last_eviction_counter = atomic64_read(&p->adev->num_evictions); |