diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 131 |
1 files changed, 93 insertions, 38 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index bc1297dcdf97..586a58facca1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -62,14 +62,14 @@ * Returns 0 on success, error on failure. */ int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, - unsigned size, enum amdgpu_ib_pool_type pool_type, + unsigned int size, enum amdgpu_ib_pool_type pool_type, struct amdgpu_ib *ib) { int r; if (size) { r = amdgpu_sa_bo_new(&adev->ib_pools[pool_type], - &ib->sa_bo, size, 256); + &ib->sa_bo, size); if (r) { dev_err(adev->dev, "failed to get a new IB (%d)\n", r); return r; @@ -89,16 +89,14 @@ int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, /** * amdgpu_ib_free - free an IB (Indirect Buffer) * - * @adev: amdgpu_device pointer * @ib: IB object to free * @f: the fence SA bo need wait on for the ib alloation * * Free an IB (all asics). */ -void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib, - struct dma_fence *f) +void amdgpu_ib_free(struct amdgpu_ib *ib, struct dma_fence *f) { - amdgpu_sa_bo_free(adev, &ib->sa_bo, f); + amdgpu_sa_bo_free(&ib->sa_bo, f); } /** @@ -123,24 +121,26 @@ void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib, * a CONST_IB), it will be put on the ring prior to the DE IB. Prior * to SI there was just a DE IB. */ -int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, +int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, struct amdgpu_ib *ibs, struct amdgpu_job *job, struct dma_fence **f) { struct amdgpu_device *adev = ring->adev; struct amdgpu_ib *ib = &ibs[0]; struct dma_fence *tmp = NULL; + struct amdgpu_fence *af; bool need_ctx_switch; - unsigned patch_offset = ~0; struct amdgpu_vm *vm; uint64_t fence_ctx; uint32_t status = 0, alloc_size; - unsigned fence_flags = 0; - bool secure; - - unsigned i; - int r = 0; + unsigned int fence_flags = 0; + bool secure, init_shadow; + u64 shadow_va, csa_va, gds_va; + int vmid = AMDGPU_JOB_GET_VMID(job); bool need_pipe_sync = false; + unsigned int cond_exec; + unsigned int i; + int r = 0; if (num_ibs == 0) return -EINVAL; @@ -149,26 +149,48 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, if (job) { vm = job->vm; fence_ctx = job->base.s_fence ? - job->base.s_fence->scheduled.context : 0; + job->base.s_fence->finished.context : 0; + shadow_va = job->shadow_va; + csa_va = job->csa_va; + gds_va = job->gds_va; + init_shadow = job->init_shadow; + af = job->hw_fence; + /* Save the context of the job for reset handling. + * The driver needs this so it can skip the ring + * contents for guilty contexts. + */ + af->context = fence_ctx; + /* the vm fence is also part of the job's context */ + job->hw_vm_fence->context = fence_ctx; } else { vm = NULL; fence_ctx = 0; + shadow_va = 0; + csa_va = 0; + gds_va = 0; + init_shadow = false; + af = kzalloc(sizeof(*af), GFP_ATOMIC); + if (!af) + return -ENOMEM; } if (!ring->sched.ready) { dev_err(adev->dev, "couldn't schedule ib on ring <%s>\n", ring->name); - return -EINVAL; + r = -EINVAL; + goto free_fence; } if (vm && !job->vmid) { dev_err(adev->dev, "VM IB without ID\n"); - return -EINVAL; + r = -EINVAL; + goto free_fence; } if ((ib->flags & AMDGPU_IB_FLAGS_SECURE) && - (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)) { - dev_err(adev->dev, "secure submissions not supported on compute rings\n"); - return -EINVAL; + (!ring->funcs->secure_submission_supported)) { + dev_err(adev->dev, "secure submissions not supported on ring <%s>\n", ring->name); + r = -EINVAL; + goto free_fence; } alloc_size = ring->funcs->emit_frame_size + num_ibs * @@ -177,14 +199,14 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, r = amdgpu_ring_alloc(ring, alloc_size); if (r) { dev_err(adev->dev, "scheduling IB failed (%d).\n", r); - return r; + goto free_fence; } need_ctx_switch = ring->current_ctx != fence_ctx; if (ring->funcs->emit_pipeline_sync && job && - ((tmp = amdgpu_sync_get_fence(&job->sched_sync)) || - (amdgpu_sriov_vf(adev) && need_ctx_switch) || - amdgpu_vm_need_pipeline_sync(ring, job))) { + ((tmp = amdgpu_sync_get_fence(&job->explicit_sync)) || + need_ctx_switch || amdgpu_vm_need_pipeline_sync(ring, job))) { + need_pipe_sync = true; if (tmp) @@ -211,8 +233,15 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, } } - if (job && ring->funcs->init_cond_exec) - patch_offset = amdgpu_ring_init_cond_exec(ring); + amdgpu_ring_ib_begin(ring); + + if (ring->funcs->emit_gfx_shadow) + amdgpu_ring_emit_gfx_shadow(ring, shadow_va, csa_va, gds_va, + init_shadow, vmid); + + if (ring->funcs->init_cond_exec) + cond_exec = amdgpu_ring_init_cond_exec(ring, + ring->cond_exe_gpu_addr); amdgpu_device_flush_hdp(adev, ring); @@ -262,31 +291,54 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, fence_flags | AMDGPU_FENCE_FLAG_64BIT); } - r = amdgpu_fence_emit(ring, f, job, fence_flags); + if (ring->funcs->emit_gfx_shadow && ring->funcs->init_cond_exec) { + amdgpu_ring_emit_gfx_shadow(ring, 0, 0, 0, false, 0); + amdgpu_ring_init_cond_exec(ring, ring->cond_exe_gpu_addr); + } + + r = amdgpu_fence_emit(ring, af, fence_flags); if (r) { dev_err(adev->dev, "failed to emit fence (%d)\n", r); if (job && job->vmid) - amdgpu_vmid_reset(adev, ring->funcs->vmhub, job->vmid); + amdgpu_vmid_reset(adev, ring->vm_hub, job->vmid); amdgpu_ring_undo(ring); return r; } + *f = &af->base; + /* get a ref for the job */ + if (job) + dma_fence_get(*f); if (ring->funcs->insert_end) ring->funcs->insert_end(ring); - if (patch_offset != ~0 && ring->funcs->patch_cond_exec) - amdgpu_ring_patch_cond_exec(ring, patch_offset); + amdgpu_ring_patch_cond_exec(ring, cond_exec); ring->current_ctx = fence_ctx; - if (vm && ring->funcs->emit_switch_buffer) + if (job && ring->funcs->emit_switch_buffer) amdgpu_ring_emit_switch_buffer(ring); if (ring->funcs->emit_wave_limit && ring->hw_prio == AMDGPU_GFX_PIPE_PRIO_HIGH) ring->funcs->emit_wave_limit(ring, false); + /* Save the wptr associated with this fence. + * This must be last for resets to work properly + * as we need to save the wptr associated with this + * fence so we know what rings contents to backup + * after we reset the queue. + */ + amdgpu_fence_save_wptr(af); + + amdgpu_ring_ib_end(ring); amdgpu_ring_commit(ring); + return 0; + +free_fence: + if (!job) + kfree(af); + return r; } /** @@ -307,8 +359,7 @@ int amdgpu_ib_pool_init(struct amdgpu_device *adev) for (i = 0; i < AMDGPU_IB_POOL_MAX; i++) { r = amdgpu_sa_bo_manager_init(adev, &adev->ib_pools[i], - AMDGPU_IB_POOL_SIZE, - AMDGPU_GPU_PAGE_SIZE, + AMDGPU_IB_POOL_SIZE, 256, AMDGPU_GEM_DOMAIN_GTT); if (r) goto error; @@ -357,7 +408,7 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev) { long tmo_gfx, tmo_mm; int r, ret = 0; - unsigned i; + unsigned int i; tmo_mm = tmo_gfx = AMDGPU_IB_TEST_TIMEOUT; if (amdgpu_sriov_vf(adev)) { @@ -374,7 +425,7 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev) /* for CP & SDMA engines since they are scheduled together so * need to make the timeout width enough to cover the time * cost waiting for it coming back under RUNTIME only - */ + */ tmo_gfx = 8 * AMDGPU_IB_TEST_TIMEOUT; } else if (adev->gmc.xgmi.hive_id) { tmo_gfx = AMDGPU_IB_TEST_GFX_XGMI_TIMEOUT; @@ -390,6 +441,10 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev) if (!ring->sched.ready || !ring->funcs->test_ib) continue; + if (adev->enable_mes && + ring->funcs->type == AMDGPU_RING_TYPE_KIQ) + continue; + /* MM engine need more time */ if (ring->funcs->type == AMDGPU_RING_TYPE_UVD || ring->funcs->type == AMDGPU_RING_TYPE_VCE || @@ -431,15 +486,15 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev) static int amdgpu_debugfs_sa_info_show(struct seq_file *m, void *unused) { - struct amdgpu_device *adev = (struct amdgpu_device *)m->private; + struct amdgpu_device *adev = m->private; - seq_printf(m, "--------------------- DELAYED --------------------- \n"); + seq_puts(m, "--------------------- DELAYED ---------------------\n"); amdgpu_sa_bo_dump_debug_info(&adev->ib_pools[AMDGPU_IB_POOL_DELAYED], m); - seq_printf(m, "-------------------- IMMEDIATE -------------------- \n"); + seq_puts(m, "-------------------- IMMEDIATE --------------------\n"); amdgpu_sa_bo_dump_debug_info(&adev->ib_pools[AMDGPU_IB_POOL_IMMEDIATE], m); - seq_printf(m, "--------------------- DIRECT ---------------------- \n"); + seq_puts(m, "--------------------- DIRECT ----------------------\n"); amdgpu_sa_bo_dump_debug_info(&adev->ib_pools[AMDGPU_IB_POOL_DIRECT], m); return 0; |
