diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 278 |
1 files changed, 186 insertions, 92 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index c48207b377bc..586a58facca1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -28,13 +28,15 @@ */ #include <linux/seq_file.h> #include <linux/slab.h> -#include <drm/drmP.h> + #include <drm/amdgpu_drm.h> + #include "amdgpu.h" #include "atom.h" #include "amdgpu_trace.h" #define AMDGPU_IB_TEST_TIMEOUT msecs_to_jiffies(1000) +#define AMDGPU_IB_TEST_GFX_XGMI_TIMEOUT msecs_to_jiffies(2000) /* * IB @@ -45,13 +47,14 @@ * produce command buffers which are send to the kernel and * put in IBs for execution by the requested ring. */ -static int amdgpu_debugfs_sa_init(struct amdgpu_device *adev); /** * amdgpu_ib_get - request an IB (Indirect Buffer) * - * @ring: ring index the IB is associated with + * @adev: amdgpu_device pointer + * @vm: amdgpu_vm pointer * @size: requested IB size + * @pool_type: IB pool type (delayed, immediate, direct) * @ib: IB object returned * * Request an IB (all asics). IBs are allocated using the @@ -59,19 +62,22 @@ static int amdgpu_debugfs_sa_init(struct amdgpu_device *adev); * Returns 0 on success, error on failure. */ int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, - unsigned size, struct amdgpu_ib *ib) + unsigned int size, enum amdgpu_ib_pool_type pool_type, + struct amdgpu_ib *ib) { int r; if (size) { - r = amdgpu_sa_bo_new(&adev->ring_tmp_bo, - &ib->sa_bo, size, 256); + r = amdgpu_sa_bo_new(&adev->ib_pools[pool_type], + &ib->sa_bo, size); if (r) { dev_err(adev->dev, "failed to get a new IB (%d)\n", r); return r; } ib->ptr = amdgpu_sa_bo_cpu_addr(ib->sa_bo); + /* flush the cache before commit the IB */ + ib->flags = AMDGPU_IB_FLAG_EMIT_MEM_SYNC; if (!vm) ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo); @@ -83,24 +89,23 @@ int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, /** * amdgpu_ib_free - free an IB (Indirect Buffer) * - * @adev: amdgpu_device pointer * @ib: IB object to free * @f: the fence SA bo need wait on for the ib alloation * * Free an IB (all asics). */ -void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib, - struct dma_fence *f) +void amdgpu_ib_free(struct amdgpu_ib *ib, struct dma_fence *f) { - amdgpu_sa_bo_free(adev, &ib->sa_bo, f); + amdgpu_sa_bo_free(&ib->sa_bo, f); } /** * amdgpu_ib_schedule - schedule an IB (Indirect Buffer) on the ring * - * @adev: amdgpu_device pointer + * @ring: ring index the IB is associated with * @num_ibs: number of IBs to schedule * @ibs: IB objects to schedule + * @job: job to schedule * @f: fence created during this submission * * Schedule an IB on the associated ring (all asics). @@ -116,23 +121,26 @@ void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib, * a CONST_IB), it will be put on the ring prior to the DE IB. Prior * to SI there was just a DE IB. */ -int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, +int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, struct amdgpu_ib *ibs, struct amdgpu_job *job, struct dma_fence **f) { struct amdgpu_device *adev = ring->adev; struct amdgpu_ib *ib = &ibs[0]; struct dma_fence *tmp = NULL; - bool skip_preamble, need_ctx_switch; - unsigned patch_offset = ~0; + struct amdgpu_fence *af; + bool need_ctx_switch; struct amdgpu_vm *vm; uint64_t fence_ctx; uint32_t status = 0, alloc_size; - unsigned fence_flags = 0; - - unsigned i; - int r = 0; + unsigned int fence_flags = 0; + bool secure, init_shadow; + u64 shadow_va, csa_va, gds_va; + int vmid = AMDGPU_JOB_GET_VMID(job); bool need_pipe_sync = false; + unsigned int cond_exec; + unsigned int i; + int r = 0; if (num_ibs == 0) return -EINVAL; @@ -140,20 +148,49 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, /* ring tests don't use a job */ if (job) { vm = job->vm; - fence_ctx = job->base.s_fence->scheduled.context; + fence_ctx = job->base.s_fence ? + job->base.s_fence->finished.context : 0; + shadow_va = job->shadow_va; + csa_va = job->csa_va; + gds_va = job->gds_va; + init_shadow = job->init_shadow; + af = job->hw_fence; + /* Save the context of the job for reset handling. + * The driver needs this so it can skip the ring + * contents for guilty contexts. + */ + af->context = fence_ctx; + /* the vm fence is also part of the job's context */ + job->hw_vm_fence->context = fence_ctx; } else { vm = NULL; fence_ctx = 0; + shadow_va = 0; + csa_va = 0; + gds_va = 0; + init_shadow = false; + af = kzalloc(sizeof(*af), GFP_ATOMIC); + if (!af) + return -ENOMEM; } if (!ring->sched.ready) { dev_err(adev->dev, "couldn't schedule ib on ring <%s>\n", ring->name); - return -EINVAL; + r = -EINVAL; + goto free_fence; } if (vm && !job->vmid) { dev_err(adev->dev, "VM IB without ID\n"); - return -EINVAL; + r = -EINVAL; + goto free_fence; + } + + if ((ib->flags & AMDGPU_IB_FLAGS_SECURE) && + (!ring->funcs->secure_submission_supported)) { + dev_err(adev->dev, "secure submissions not supported on ring <%s>\n", ring->name); + r = -EINVAL; + goto free_fence; } alloc_size = ring->funcs->emit_frame_size + num_ibs * @@ -162,14 +199,14 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, r = amdgpu_ring_alloc(ring, alloc_size); if (r) { dev_err(adev->dev, "scheduling IB failed (%d).\n", r); - return r; + goto free_fence; } need_ctx_switch = ring->current_ctx != fence_ctx; if (ring->funcs->emit_pipeline_sync && job && - ((tmp = amdgpu_sync_get_fence(&job->sched_sync, NULL)) || - (amdgpu_sriov_vf(adev) && need_ctx_switch) || - amdgpu_vm_need_pipeline_sync(ring, job))) { + ((tmp = amdgpu_sync_get_fence(&job->explicit_sync)) || + need_ctx_switch || amdgpu_vm_need_pipeline_sync(ring, job))) { + need_pipe_sync = true; if (tmp) @@ -178,6 +215,13 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, dma_fence_put(tmp); } + if ((ib->flags & AMDGPU_IB_FLAG_EMIT_MEM_SYNC) && ring->funcs->emit_mem_sync) + ring->funcs->emit_mem_sync(ring); + + if (ring->funcs->emit_wave_limit && + ring->hw_prio == AMDGPU_GFX_PIPE_PRIO_HIGH) + ring->funcs->emit_wave_limit(ring, true); + if (ring->funcs->insert_start) ring->funcs->insert_start(ring); @@ -189,49 +233,54 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, } } - if (job && ring->funcs->init_cond_exec) - patch_offset = amdgpu_ring_init_cond_exec(ring); + amdgpu_ring_ib_begin(ring); -#ifdef CONFIG_X86_64 - if (!(adev->flags & AMD_IS_APU)) -#endif - { - if (ring->funcs->emit_hdp_flush) - amdgpu_ring_emit_hdp_flush(ring); - else - amdgpu_asic_flush_hdp(adev, ring); - } + if (ring->funcs->emit_gfx_shadow) + amdgpu_ring_emit_gfx_shadow(ring, shadow_va, csa_va, gds_va, + init_shadow, vmid); + + if (ring->funcs->init_cond_exec) + cond_exec = amdgpu_ring_init_cond_exec(ring, + ring->cond_exe_gpu_addr); + + amdgpu_device_flush_hdp(adev, ring); + + if (need_ctx_switch) + status |= AMDGPU_HAVE_CTX_SWITCH; - skip_preamble = ring->current_ctx == fence_ctx; if (job && ring->funcs->emit_cntxcntl) { - if (need_ctx_switch) - status |= AMDGPU_HAVE_CTX_SWITCH; status |= job->preamble_status; - + status |= job->preemption_status; amdgpu_ring_emit_cntxcntl(ring, status); } + /* Setup initial TMZiness and send it off. + */ + secure = false; + if (job && ring->funcs->emit_frame_cntl) { + secure = ib->flags & AMDGPU_IB_FLAGS_SECURE; + amdgpu_ring_emit_frame_cntl(ring, true, secure); + } + for (i = 0; i < num_ibs; ++i) { ib = &ibs[i]; - /* drop preamble IBs if we don't have a context switch */ - if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && - skip_preamble && - !(status & AMDGPU_PREAMBLE_IB_PRESENT_FIRST) && - !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */ - continue; + if (job && ring->funcs->emit_frame_cntl) { + if (secure != !!(ib->flags & AMDGPU_IB_FLAGS_SECURE)) { + amdgpu_ring_emit_frame_cntl(ring, false, secure); + secure = !secure; + amdgpu_ring_emit_frame_cntl(ring, true, secure); + } + } - amdgpu_ring_emit_ib(ring, job, ib, need_ctx_switch); - need_ctx_switch = false; + amdgpu_ring_emit_ib(ring, job, ib, status); + status &= ~AMDGPU_HAVE_CTX_SWITCH; } - if (ring->funcs->emit_tmz) - amdgpu_ring_emit_tmz(ring, false); + if (job && ring->funcs->emit_frame_cntl) + amdgpu_ring_emit_frame_cntl(ring, false, secure); -#ifdef CONFIG_X86_64 - if (!(adev->flags & AMD_IS_APU)) -#endif - amdgpu_asic_invalidate_hdp(adev, ring); + amdgpu_device_invalidate_hdp(adev, ring); if (ib->flags & AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE) fence_flags |= AMDGPU_FENCE_FLAG_TC_WB_ONLY; @@ -242,26 +291,54 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, fence_flags | AMDGPU_FENCE_FLAG_64BIT); } - r = amdgpu_fence_emit(ring, f, fence_flags); + if (ring->funcs->emit_gfx_shadow && ring->funcs->init_cond_exec) { + amdgpu_ring_emit_gfx_shadow(ring, 0, 0, 0, false, 0); + amdgpu_ring_init_cond_exec(ring, ring->cond_exe_gpu_addr); + } + + r = amdgpu_fence_emit(ring, af, fence_flags); if (r) { dev_err(adev->dev, "failed to emit fence (%d)\n", r); if (job && job->vmid) - amdgpu_vmid_reset(adev, ring->funcs->vmhub, job->vmid); + amdgpu_vmid_reset(adev, ring->vm_hub, job->vmid); amdgpu_ring_undo(ring); return r; } + *f = &af->base; + /* get a ref for the job */ + if (job) + dma_fence_get(*f); if (ring->funcs->insert_end) ring->funcs->insert_end(ring); - if (patch_offset != ~0 && ring->funcs->patch_cond_exec) - amdgpu_ring_patch_cond_exec(ring, patch_offset); + amdgpu_ring_patch_cond_exec(ring, cond_exec); ring->current_ctx = fence_ctx; - if (vm && ring->funcs->emit_switch_buffer) + if (job && ring->funcs->emit_switch_buffer) amdgpu_ring_emit_switch_buffer(ring); + + if (ring->funcs->emit_wave_limit && + ring->hw_prio == AMDGPU_GFX_PIPE_PRIO_HIGH) + ring->funcs->emit_wave_limit(ring, false); + + /* Save the wptr associated with this fence. + * This must be last for resets to work properly + * as we need to save the wptr associated with this + * fence so we know what rings contents to backup + * after we reset the queue. + */ + amdgpu_fence_save_wptr(af); + + amdgpu_ring_ib_end(ring); amdgpu_ring_commit(ring); + return 0; + +free_fence: + if (!job) + kfree(af); + return r; } /** @@ -275,24 +352,26 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, */ int amdgpu_ib_pool_init(struct amdgpu_device *adev) { - int r; + int r, i; - if (adev->ib_pool_ready) { + if (adev->ib_pool_ready) return 0; - } - r = amdgpu_sa_bo_manager_init(adev, &adev->ring_tmp_bo, - AMDGPU_IB_POOL_SIZE*64*1024, - AMDGPU_GPU_PAGE_SIZE, - AMDGPU_GEM_DOMAIN_GTT); - if (r) { - return r; - } - adev->ib_pool_ready = true; - if (amdgpu_debugfs_sa_init(adev)) { - dev_err(adev->dev, "failed to register debugfs file for SA\n"); + for (i = 0; i < AMDGPU_IB_POOL_MAX; i++) { + r = amdgpu_sa_bo_manager_init(adev, &adev->ib_pools[i], + AMDGPU_IB_POOL_SIZE, 256, + AMDGPU_GEM_DOMAIN_GTT); + if (r) + goto error; } + adev->ib_pool_ready = true; + return 0; + +error: + while (i--) + amdgpu_sa_bo_manager_fini(adev, &adev->ib_pools[i]); + return r; } /** @@ -305,10 +384,14 @@ int amdgpu_ib_pool_init(struct amdgpu_device *adev) */ void amdgpu_ib_pool_fini(struct amdgpu_device *adev) { - if (adev->ib_pool_ready) { - amdgpu_sa_bo_manager_fini(adev, &adev->ring_tmp_bo); - adev->ib_pool_ready = false; - } + int i; + + if (!adev->ib_pool_ready) + return; + + for (i = 0; i < AMDGPU_IB_POOL_MAX; i++) + amdgpu_sa_bo_manager_fini(adev, &adev->ib_pools[i]); + adev->ib_pool_ready = false; } /** @@ -323,9 +406,9 @@ void amdgpu_ib_pool_fini(struct amdgpu_device *adev) */ int amdgpu_ib_ring_tests(struct amdgpu_device *adev) { - unsigned i; - int r, ret = 0; long tmo_gfx, tmo_mm; + int r, ret = 0; + unsigned int i; tmo_mm = tmo_gfx = AMDGPU_IB_TEST_TIMEOUT; if (amdgpu_sriov_vf(adev)) { @@ -342,8 +425,10 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev) /* for CP & SDMA engines since they are scheduled together so * need to make the timeout width enough to cover the time * cost waiting for it coming back under RUNTIME only - */ + */ tmo_gfx = 8 * AMDGPU_IB_TEST_TIMEOUT; + } else if (adev->gmc.xgmi.hive_id) { + tmo_gfx = AMDGPU_IB_TEST_GFX_XGMI_TIMEOUT; } for (i = 0; i < adev->num_rings; ++i) { @@ -356,6 +441,10 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev) if (!ring->sched.ready || !ring->funcs->test_ib) continue; + if (adev->enable_mes && + ring->funcs->type == AMDGPU_RING_TYPE_KIQ) + continue; + /* MM engine need more time */ if (ring->funcs->type == AMDGPU_RING_TYPE_UVD || ring->funcs->type == AMDGPU_RING_TYPE_VCE || @@ -395,29 +484,34 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev) */ #if defined(CONFIG_DEBUG_FS) -static int amdgpu_debugfs_sa_info(struct seq_file *m, void *data) +static int amdgpu_debugfs_sa_info_show(struct seq_file *m, void *unused) { - struct drm_info_node *node = (struct drm_info_node *) m->private; - struct drm_device *dev = node->minor->dev; - struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_device *adev = m->private; - amdgpu_sa_bo_dump_debug_info(&adev->ring_tmp_bo, m); + seq_puts(m, "--------------------- DELAYED ---------------------\n"); + amdgpu_sa_bo_dump_debug_info(&adev->ib_pools[AMDGPU_IB_POOL_DELAYED], + m); + seq_puts(m, "-------------------- IMMEDIATE --------------------\n"); + amdgpu_sa_bo_dump_debug_info(&adev->ib_pools[AMDGPU_IB_POOL_IMMEDIATE], + m); + seq_puts(m, "--------------------- DIRECT ----------------------\n"); + amdgpu_sa_bo_dump_debug_info(&adev->ib_pools[AMDGPU_IB_POOL_DIRECT], m); return 0; - } -static const struct drm_info_list amdgpu_debugfs_sa_list[] = { - {"amdgpu_sa_info", &amdgpu_debugfs_sa_info, 0, NULL}, -}; +DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_sa_info); #endif -static int amdgpu_debugfs_sa_init(struct amdgpu_device *adev) +void amdgpu_debugfs_sa_init(struct amdgpu_device *adev) { #if defined(CONFIG_DEBUG_FS) - return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_sa_list, 1); -#else - return 0; + struct drm_minor *minor = adev_to_drm(adev)->primary; + struct dentry *root = minor->debugfs_root; + + debugfs_create_file("amdgpu_sa_info", 0444, root, adev, + &amdgpu_debugfs_sa_info_fops); + #endif } |
