diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
35 files changed, 430 insertions, 377 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 8b5452a8d330..6b6d46e29e6e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1621,7 +1621,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( mutex_lock(&mem->lock); - /* Unpin MMIO/DOORBELL BO's that were pinnned during allocation */ + /* Unpin MMIO/DOORBELL BO's that were pinned during allocation */ if (mem->alloc_flags & (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) { @@ -1918,9 +1918,6 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct amdgpu_device *adev, return -EINVAL; } - /* delete kgd_mem from kfd_bo_list to avoid re-validating - * this BO in BO's restoring after eviction. - */ mutex_lock(&mem->process_info->lock); ret = amdgpu_bo_reserve(bo, true); @@ -1943,7 +1940,6 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct amdgpu_device *adev, amdgpu_amdkfd_remove_eviction_fence( bo, mem->process_info->eviction_fence); - list_del_init(&mem->validate_list.head); if (size) *size = amdgpu_bo_size(bo); @@ -2512,12 +2508,15 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) process_info->eviction_fence = new_fence; *ef = dma_fence_get(&new_fence->base); - /* Attach new eviction fence to all BOs */ + /* Attach new eviction fence to all BOs except pinned ones */ list_for_each_entry(mem, &process_info->kfd_bo_list, - validate_list.head) + validate_list.head) { + if (mem->bo->tbo.pin_count) + continue; + amdgpu_bo_fence(mem->bo, &process_info->eviction_fence->base, true); - + } /* Attach eviction fence to PD / PT BOs */ list_for_each_entry(peer_vm, &process_info->vm_list_head, vm_list_node) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index 63e0293edc5f..fd8f3731758e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -188,13 +188,17 @@ static int convert_atom_mem_type_to_vram_type(struct amdgpu_device *adev, vram_type = AMDGPU_VRAM_TYPE_DDR3; break; case Ddr4MemType: - case LpDdr4MemType: vram_type = AMDGPU_VRAM_TYPE_DDR4; break; + case LpDdr4MemType: + vram_type = AMDGPU_VRAM_TYPE_LPDDR4; + break; case Ddr5MemType: - case LpDdr5MemType: vram_type = AMDGPU_VRAM_TYPE_DDR5; break; + case LpDdr5MemType: + vram_type = AMDGPU_VRAM_TYPE_LPDDR5; + break; default: vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index e552a2004868..b28af04b0c3e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -116,7 +116,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs int ret; if (cs->in.num_chunks == 0) - return 0; + return -EINVAL; chunk_array = kvmalloc_array(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL); if (!chunk_array) @@ -1252,7 +1252,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, p->fence = dma_fence_get(&job->base.s_fence->finished); - amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq); + seq = amdgpu_ctx_add_fence(p->ctx, entity, p->fence); amdgpu_cs_post_dependencies(p); if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) && diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index c317078d1afd..7dc92ef36b2b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -135,9 +135,9 @@ static enum amdgpu_ring_priority_level amdgpu_ctx_sched_prio_to_ring_prio(int32_ static unsigned int amdgpu_ctx_get_hw_prio(struct amdgpu_ctx *ctx, u32 hw_ip) { - struct amdgpu_device *adev = ctx->adev; - int32_t ctx_prio; + struct amdgpu_device *adev = ctx->mgr->adev; unsigned int hw_prio; + int32_t ctx_prio; ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ? ctx->init_priority : ctx->override_priority; @@ -162,17 +162,50 @@ static unsigned int amdgpu_ctx_get_hw_prio(struct amdgpu_ctx *ctx, u32 hw_ip) return hw_prio; } +/* Calculate the time spend on the hw */ +static ktime_t amdgpu_ctx_fence_time(struct dma_fence *fence) +{ + struct drm_sched_fence *s_fence; + + if (!fence) + return ns_to_ktime(0); + + /* When the fence is not even scheduled it can't have spend time */ + s_fence = to_drm_sched_fence(fence); + if (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &s_fence->scheduled.flags)) + return ns_to_ktime(0); + + /* When it is still running account how much already spend */ + if (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &s_fence->finished.flags)) + return ktime_sub(ktime_get(), s_fence->scheduled.timestamp); + + return ktime_sub(s_fence->finished.timestamp, + s_fence->scheduled.timestamp); +} + +static ktime_t amdgpu_ctx_entity_time(struct amdgpu_ctx *ctx, + struct amdgpu_ctx_entity *centity) +{ + ktime_t res = ns_to_ktime(0); + uint32_t i; + + spin_lock(&ctx->ring_lock); + for (i = 0; i < amdgpu_sched_jobs; i++) { + res = ktime_add(res, amdgpu_ctx_fence_time(centity->fences[i])); + } + spin_unlock(&ctx->ring_lock); + return res; +} static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip, const u32 ring) { - struct amdgpu_device *adev = ctx->adev; - struct amdgpu_ctx_entity *entity; struct drm_gpu_scheduler **scheds = NULL, *sched = NULL; - unsigned num_scheds = 0; - int32_t ctx_prio; - unsigned int hw_prio; + struct amdgpu_device *adev = ctx->mgr->adev; + struct amdgpu_ctx_entity *entity; enum drm_sched_priority drm_prio; + unsigned int hw_prio, num_scheds; + int32_t ctx_prio; int r; entity = kzalloc(struct_size(entity, fences, amdgpu_sched_jobs), @@ -182,6 +215,7 @@ static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip, ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ? ctx->init_priority : ctx->override_priority; + entity->hw_ip = hw_ip; entity->sequence = 1; hw_prio = amdgpu_ctx_get_hw_prio(ctx, hw_ip); drm_prio = amdgpu_ctx_to_drm_sched_prio(ctx_prio); @@ -220,10 +254,25 @@ error_free_entity: return r; } -static int amdgpu_ctx_init(struct amdgpu_device *adev, - int32_t priority, - struct drm_file *filp, - struct amdgpu_ctx *ctx) +static ktime_t amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity) +{ + ktime_t res = ns_to_ktime(0); + int i; + + if (!entity) + return res; + + for (i = 0; i < amdgpu_sched_jobs; ++i) { + res = ktime_add(res, amdgpu_ctx_fence_time(entity->fences[i])); + dma_fence_put(entity->fences[i]); + } + + kfree(entity); + return res; +} + +static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority, + struct drm_file *filp, struct amdgpu_ctx *ctx) { int r; @@ -233,15 +282,14 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, memset(ctx, 0, sizeof(*ctx)); - ctx->adev = adev; - kref_init(&ctx->refcount); + ctx->mgr = mgr; spin_lock_init(&ctx->ring_lock); mutex_init(&ctx->lock); - ctx->reset_counter = atomic_read(&adev->gpu_reset_counter); + ctx->reset_counter = atomic_read(&mgr->adev->gpu_reset_counter); ctx->reset_counter_query = ctx->reset_counter; - ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter); + ctx->vram_lost_counter = atomic_read(&mgr->adev->vram_lost_counter); ctx->init_priority = priority; ctx->override_priority = AMDGPU_CTX_PRIORITY_UNSET; ctx->stable_pstate = AMDGPU_CTX_STABLE_PSTATE_NONE; @@ -249,24 +297,10 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, return 0; } -static void amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity) -{ - - int i; - - if (!entity) - return; - - for (i = 0; i < amdgpu_sched_jobs; ++i) - dma_fence_put(entity->fences[i]); - - kfree(entity); -} - static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx, u32 *stable_pstate) { - struct amdgpu_device *adev = ctx->adev; + struct amdgpu_device *adev = ctx->mgr->adev; enum amd_dpm_forced_level current_level; current_level = amdgpu_dpm_get_performance_level(adev); @@ -294,7 +328,7 @@ static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx, static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx, u32 stable_pstate) { - struct amdgpu_device *adev = ctx->adev; + struct amdgpu_device *adev = ctx->mgr->adev; enum amd_dpm_forced_level level; u32 current_stable_pstate; int r; @@ -345,7 +379,8 @@ done: static void amdgpu_ctx_fini(struct kref *ref) { struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount); - struct amdgpu_device *adev = ctx->adev; + struct amdgpu_ctx_mgr *mgr = ctx->mgr; + struct amdgpu_device *adev = mgr->adev; unsigned i, j, idx; if (!adev) @@ -353,8 +388,10 @@ static void amdgpu_ctx_fini(struct kref *ref) for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { for (j = 0; j < AMDGPU_MAX_ENTITY_NUM; ++j) { - amdgpu_ctx_fini_entity(ctx->entities[i][j]); - ctx->entities[i][j] = NULL; + ktime_t spend; + + spend = amdgpu_ctx_fini_entity(ctx->entities[i][j]); + atomic64_add(ktime_to_ns(spend), &mgr->time_spend[i]); } } @@ -421,7 +458,7 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev, } *id = (uint32_t)r; - r = amdgpu_ctx_init(adev, priority, filp, ctx); + r = amdgpu_ctx_init(mgr, priority, filp, ctx); if (r) { idr_remove(&mgr->ctx_handles, *id); *id = 0; @@ -671,9 +708,9 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx) return 0; } -void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, - struct drm_sched_entity *entity, - struct dma_fence *fence, uint64_t *handle) +uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, + struct drm_sched_entity *entity, + struct dma_fence *fence) { struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity); uint64_t seq = centity->sequence; @@ -682,8 +719,7 @@ void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, idx = seq & (amdgpu_sched_jobs - 1); other = centity->fences[idx]; - if (other) - BUG_ON(!dma_fence_is_signaled(other)); + WARN_ON(other && !dma_fence_is_signaled(other)); dma_fence_get(fence); @@ -692,9 +728,11 @@ void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, centity->sequence++; spin_unlock(&ctx->ring_lock); + atomic64_add(ktime_to_ns(amdgpu_ctx_fence_time(other)), + &ctx->mgr->time_spend[centity->hw_ip]); + dma_fence_put(other); - if (handle) - *handle = seq; + return seq; } struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, @@ -731,7 +769,7 @@ static void amdgpu_ctx_set_entity_priority(struct amdgpu_ctx *ctx, int hw_ip, int32_t priority) { - struct amdgpu_device *adev = ctx->adev; + struct amdgpu_device *adev = ctx->mgr->adev; unsigned int hw_prio; struct drm_gpu_scheduler **scheds = NULL; unsigned num_scheds; @@ -796,10 +834,17 @@ int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, return r; } -void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr) +void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr, + struct amdgpu_device *adev) { + unsigned int i; + + mgr->adev = adev; mutex_init(&mgr->lock); idr_init(&mgr->ctx_handles); + + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) + atomic64_set(&mgr->time_spend[i], 0); } long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout) @@ -875,80 +920,38 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr) mutex_destroy(&mgr->lock); } -static void amdgpu_ctx_fence_time(struct amdgpu_ctx *ctx, - struct amdgpu_ctx_entity *centity, ktime_t *total, ktime_t *max) -{ - ktime_t now, t1; - uint32_t i; - - *total = *max = 0; - - now = ktime_get(); - for (i = 0; i < amdgpu_sched_jobs; i++) { - struct dma_fence *fence; - struct drm_sched_fence *s_fence; - - spin_lock(&ctx->ring_lock); - fence = dma_fence_get(centity->fences[i]); - spin_unlock(&ctx->ring_lock); - if (!fence) - continue; - s_fence = to_drm_sched_fence(fence); - if (!dma_fence_is_signaled(&s_fence->scheduled)) { - dma_fence_put(fence); - continue; - } - t1 = s_fence->scheduled.timestamp; - if (!ktime_before(t1, now)) { - dma_fence_put(fence); - continue; - } - if (dma_fence_is_signaled(&s_fence->finished) && - s_fence->finished.timestamp < now) - *total += ktime_sub(s_fence->finished.timestamp, t1); - else - *total += ktime_sub(now, t1); - t1 = ktime_sub(now, t1); - dma_fence_put(fence); - *max = max(t1, *max); - } -} - -ktime_t amdgpu_ctx_mgr_fence_usage(struct amdgpu_ctx_mgr *mgr, uint32_t hwip, - uint32_t idx, uint64_t *elapsed) +void amdgpu_ctx_mgr_usage(struct amdgpu_ctx_mgr *mgr, + ktime_t usage[AMDGPU_HW_IP_NUM]) { - struct idr *idp; struct amdgpu_ctx *ctx; + unsigned int hw_ip, i; uint32_t id; - struct amdgpu_ctx_entity *centity; - ktime_t total = 0, max = 0; - if (idx >= AMDGPU_MAX_ENTITY_NUM) - return 0; - idp = &mgr->ctx_handles; + /* + * This is a little bit racy because it can be that a ctx or a fence are + * destroyed just in the moment we try to account them. But that is ok + * since exactly that case is explicitely allowed by the interface. + */ mutex_lock(&mgr->lock); - idr_for_each_entry(idp, ctx, id) { - ktime_t ttotal, tmax; + for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) { + uint64_t ns = atomic64_read(&mgr->time_spend[hw_ip]); - if (!ctx->entities[hwip][idx]) - continue; - - centity = ctx->entities[hwip][idx]; - amdgpu_ctx_fence_time(ctx, centity, &ttotal, &tmax); + usage[hw_ip] = ns_to_ktime(ns); + } - /* Harmonic mean approximation diverges for very small - * values. If ratio < 0.01% ignore - */ - if (AMDGPU_CTX_FENCE_USAGE_MIN_RATIO(tmax, ttotal)) - continue; + idr_for_each_entry(&mgr->ctx_handles, ctx, id) { + for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) { + for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i) { + struct amdgpu_ctx_entity *centity; + ktime_t spend; - total = ktime_add(total, ttotal); - max = ktime_after(tmax, max) ? tmax : max; + centity = ctx->entities[hw_ip][i]; + if (!centity) + continue; + spend = amdgpu_ctx_entity_time(ctx, centity); + usage[hw_ip] = ktime_add(usage[hw_ip], spend); + } + } } - mutex_unlock(&mgr->lock); - if (elapsed) - *elapsed = max; - - return total; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h index 142f2f87d44c..cc7c8afff414 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h @@ -23,16 +23,20 @@ #ifndef __AMDGPU_CTX_H__ #define __AMDGPU_CTX_H__ +#include <linux/ktime.h> +#include <linux/types.h> + #include "amdgpu_ring.h" struct drm_device; struct drm_file; struct amdgpu_fpriv; +struct amdgpu_ctx_mgr; #define AMDGPU_MAX_ENTITY_NUM 4 -#define AMDGPU_CTX_FENCE_USAGE_MIN_RATIO(max, total) ((max) > 16384ULL*(total)) struct amdgpu_ctx_entity { + uint32_t hw_ip; uint64_t sequence; struct drm_sched_entity entity; struct dma_fence *fences[]; @@ -40,7 +44,7 @@ struct amdgpu_ctx_entity { struct amdgpu_ctx { struct kref refcount; - struct amdgpu_device *adev; + struct amdgpu_ctx_mgr *mgr; unsigned reset_counter; unsigned reset_counter_query; uint32_t vram_lost_counter; @@ -61,6 +65,7 @@ struct amdgpu_ctx_mgr { struct mutex lock; /* protected by lock */ struct idr ctx_handles; + atomic64_t time_spend[AMDGPU_HW_IP_NUM]; }; extern const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM]; @@ -70,9 +75,9 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx); int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance, u32 ring, struct drm_sched_entity **entity); -void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, - struct drm_sched_entity *entity, - struct dma_fence *fence, uint64_t *seq); +uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, + struct drm_sched_entity *entity, + struct dma_fence *fence); struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, struct drm_sched_entity *entity, uint64_t seq); @@ -85,10 +90,12 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, struct drm_sched_entity *entity); -void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr); +void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr, + struct amdgpu_device *adev); void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr); long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout); void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr); -ktime_t amdgpu_ctx_mgr_fence_usage(struct amdgpu_ctx_mgr *mgr, uint32_t hwip, - uint32_t idx, uint64_t *elapsed); +void amdgpu_ctx_mgr_usage(struct amdgpu_ctx_mgr *mgr, + ktime_t usage[AMDGPU_HW_IP_NUM]); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 9af8d7a1d011..625424f3082b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1556,9 +1556,6 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev) adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); - amdgpu_gmc_tmz_set(adev); - - return 0; } @@ -3701,6 +3698,9 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (r) return r; + /* Enable TMZ based on IP_VERSION */ + amdgpu_gmc_tmz_set(adev); + amdgpu_gmc_noretry_set(adev); /* Need to get xgmi info early to decide the reset behavior*/ if (adev->gmc.xgmi.supported) { @@ -5219,6 +5219,10 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */ r = amdgpu_device_reset_sriov(adev, job ? false : true); if (r) adev->asic_reset_res = r; + + /* Aldebaran supports ras in SRIOV, so need resume ras during reset */ + if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2)) + amdgpu_ras_resume(adev); } else { r = amdgpu_do_asic_reset(device_list_handle, &reset_context); if (r && r == -EAGAIN) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 881570dced41..47f0344205ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -1130,13 +1130,24 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) adev->vcn.vcn_config[adev->vcn.num_vcn_inst] = ip->revision & 0xc0; ip->revision &= ~0xc0; - adev->vcn.num_vcn_inst++; + if (adev->vcn.num_vcn_inst < AMDGPU_MAX_VCN_INSTANCES) + adev->vcn.num_vcn_inst++; + else + dev_err(adev->dev, "Too many VCN instances: %d vs %d\n", + adev->vcn.num_vcn_inst + 1, + AMDGPU_MAX_VCN_INSTANCES); } if (le16_to_cpu(ip->hw_id) == SDMA0_HWID || le16_to_cpu(ip->hw_id) == SDMA1_HWID || le16_to_cpu(ip->hw_id) == SDMA2_HWID || - le16_to_cpu(ip->hw_id) == SDMA3_HWID) - adev->sdma.num_instances++; + le16_to_cpu(ip->hw_id) == SDMA3_HWID) { + if (adev->sdma.num_instances < AMDGPU_MAX_SDMA_INSTANCES) + adev->sdma.num_instances++; + else + dev_err(adev->dev, "Too many SDMA instances: %d vs %d\n", + adev->sdma.num_instances + 1, + AMDGPU_MAX_SDMA_INSTANCES); + } if (le16_to_cpu(ip->hw_id) == UMC_HWID) adev->gmc.num_umc++; @@ -1361,7 +1372,7 @@ union mall_info { struct mall_info_v1_0 v1; }; -int amdgpu_discovery_get_mall_info(struct amdgpu_device *adev) +static int amdgpu_discovery_get_mall_info(struct amdgpu_device *adev) { struct binary_header *bhdr; union mall_info *mall_info; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 8592d43a79b0..8890300766a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -99,10 +99,11 @@ * - 3.43.0 - Add device hot plug/unplug support * - 3.44.0 - DCN3 supports DCC independent block settings: !64B && 128B, 64B && 128B * - 3.45.0 - Add context ioctl stable pstate interface - * * 3.46.0 - To enable hot plug amdgpu tests in libdrm + * - 3.46.0 - To enable hot plug amdgpu tests in libdrm + * * 3.47.0 - Add AMDGPU_GEM_CREATE_DISCARDABLE and AMDGPU_VM_NOALLOC flags */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 46 +#define KMS_DRIVER_MINOR 47 #define KMS_DRIVER_PATCHLEVEL 0 int amdgpu_vram_limit; @@ -1940,6 +1941,7 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x7421, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY}, {0x1002, 0x7422, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY}, {0x1002, 0x7423, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY}, + {0x1002, 0x7424, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY}, {0x1002, 0x743F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY}, { PCI_DEVICE(0x1002, PCI_ANY_ID), diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c index 5a6857c44bb6..99a7855ab1bc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c @@ -32,6 +32,7 @@ #include <drm/amdgpu_drm.h> #include <drm/drm_debugfs.h> +#include <drm/drm_drv.h> #include "amdgpu.h" #include "amdgpu_vm.h" @@ -54,58 +55,49 @@ static const char *amdgpu_ip_name[AMDGPU_HW_IP_NUM] = { void amdgpu_show_fdinfo(struct seq_file *m, struct file *f) { - struct amdgpu_fpriv *fpriv; - uint32_t bus, dev, fn, i, domain; - uint64_t vram_mem = 0, gtt_mem = 0, cpu_mem = 0; struct drm_file *file = f->private_data; struct amdgpu_device *adev = drm_to_adev(file->minor->dev); - struct amdgpu_bo *root; + struct amdgpu_fpriv *fpriv = file->driver_priv; + struct amdgpu_vm *vm = &fpriv->vm; + + uint64_t vram_mem = 0, gtt_mem = 0, cpu_mem = 0; + ktime_t usage[AMDGPU_HW_IP_NUM]; + uint32_t bus, dev, fn, domain; + unsigned int hw_ip; int ret; - ret = amdgpu_file_to_fpriv(f, &fpriv); - if (ret) - return; bus = adev->pdev->bus->number; domain = pci_domain_nr(adev->pdev->bus); dev = PCI_SLOT(adev->pdev->devfn); fn = PCI_FUNC(adev->pdev->devfn); - root = amdgpu_bo_ref(fpriv->vm.root.bo); - if (!root) + ret = amdgpu_bo_reserve(vm->root.bo, false); + if (ret) return; - ret = amdgpu_bo_reserve(root, false); - if (ret) { - DRM_ERROR("Fail to reserve bo\n"); - return; - } - amdgpu_vm_get_memory(&fpriv->vm, &vram_mem, >t_mem, &cpu_mem); - amdgpu_bo_unreserve(root); - amdgpu_bo_unref(&root); + amdgpu_vm_get_memory(vm, &vram_mem, >t_mem, &cpu_mem); + amdgpu_bo_unreserve(vm->root.bo); - seq_printf(m, "pdev:\t%04x:%02x:%02x.%d\npasid:\t%u\n", domain, bus, - dev, fn, fpriv->vm.pasid); - seq_printf(m, "vram mem:\t%llu kB\n", vram_mem/1024UL); - seq_printf(m, "gtt mem:\t%llu kB\n", gtt_mem/1024UL); - seq_printf(m, "cpu mem:\t%llu kB\n", cpu_mem/1024UL); - for (i = 0; i < AMDGPU_HW_IP_NUM; i++) { - uint32_t count = amdgpu_ctx_num_entities[i]; - int idx = 0; - uint64_t total = 0, min = 0; - uint32_t perc, frac; + amdgpu_ctx_mgr_usage(&fpriv->ctx_mgr, usage); - for (idx = 0; idx < count; idx++) { - total = amdgpu_ctx_mgr_fence_usage(&fpriv->ctx_mgr, - i, idx, &min); - if ((total == 0) || (min == 0)) - continue; + /* + * ****************************************************************** + * For text output format description please see drm-usage-stats.rst! + * ****************************************************************** + */ - perc = div64_u64(10000 * total, min); - frac = perc % 100; + seq_printf(m, "pasid:\t%u\n", fpriv->vm.pasid); + seq_printf(m, "drm-driver:\t%s\n", file->minor->dev->driver->name); + seq_printf(m, "drm-pdev:\t%04x:%02x:%02x.%d\n", domain, bus, dev, fn); + seq_printf(m, "drm-client-id:\t%Lu\n", vm->immediate.fence_context); + seq_printf(m, "drm-memory-vram:\t%llu KiB\n", vram_mem/1024UL); + seq_printf(m, "drm-memory-gtt: \t%llu KiB\n", gtt_mem/1024UL); + seq_printf(m, "drm-memory-cpu: \t%llu KiB\n", cpu_mem/1024UL); + for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) { + if (!usage[hw_ip]) + continue; - seq_printf(m, "%s%d:\t%d.%d%%\n", - amdgpu_ip_name[i], - idx, perc/100, frac); - } + seq_printf(m, "drm-engine-%s:\t%Ld ns\n", amdgpu_ip_name[hw_ip], + ktime_to_ns(usage[hw_ip])); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 652571267077..8ef31d687ef3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -296,8 +296,8 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, AMDGPU_GEM_CREATE_VRAM_CLEARED | AMDGPU_GEM_CREATE_VM_ALWAYS_VALID | AMDGPU_GEM_CREATE_EXPLICIT_SYNC | - AMDGPU_GEM_CREATE_ENCRYPTED)) - + AMDGPU_GEM_CREATE_ENCRYPTED | + AMDGPU_GEM_CREATE_DISCARDABLE)) return -EINVAL; /* reject invalid gem domains */ @@ -645,6 +645,8 @@ uint64_t amdgpu_gem_va_map_flags(struct amdgpu_device *adev, uint32_t flags) pte_flag |= AMDGPU_PTE_WRITEABLE; if (flags & AMDGPU_VM_PAGE_PRT) pte_flag |= AMDGPU_PTE_PRT; + if (flags & AMDGPU_VM_PAGE_NOALLOC) + pte_flag |= AMDGPU_PTE_NOALLOC; if (adev->gmc.gmc_funcs->map_mtype) pte_flag |= amdgpu_gmc_map_mtype(adev, @@ -658,7 +660,8 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, { const uint32_t valid_flags = AMDGPU_VM_DELAY_UPDATE | AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE | - AMDGPU_VM_PAGE_EXECUTABLE | AMDGPU_VM_MTYPE_MASK; + AMDGPU_VM_PAGE_EXECUTABLE | AMDGPU_VM_MTYPE_MASK | + AMDGPU_VM_PAGE_NOALLOC; const uint32_t prt_flags = AMDGPU_VM_DELAY_UPDATE | AMDGPU_VM_PAGE_PRT; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index ede2fa56f6c9..16699158e00d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -594,17 +594,20 @@ int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value) int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) { int r; - r = amdgpu_ras_block_late_init(adev, ras_block); - if (r) - return r; if (amdgpu_ras_is_supported(adev, ras_block->block)) { if (!amdgpu_persistent_edc_harvesting_supported(adev)) amdgpu_ras_reset_error_status(adev, AMDGPU_RAS_BLOCK__GFX); + r = amdgpu_ras_block_late_init(adev, ras_block); + if (r) + return r; + r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0); if (r) goto late_fini; + } else { + amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0); } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 88b852b3a2cb..aebc384531ac 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -512,9 +512,14 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev) */ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev) { - switch (adev->asic_type) { - case CHIP_RAVEN: - case CHIP_RENOIR: + switch (adev->ip_versions[GC_HWIP][0]) { + /* RAVEN */ + case IP_VERSION(9, 2, 2): + case IP_VERSION(9, 1, 0): + /* RENOIR looks like RAVEN */ + case IP_VERSION(9, 3, 0): + /* GC 10.3.7 */ + case IP_VERSION(10, 3, 7): if (amdgpu_tmz == 0) { adev->gmc.tmz_enabled = false; dev_info(adev->dev, @@ -525,12 +530,18 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev) "Trusted Memory Zone (TMZ) feature enabled\n"); } break; - case CHIP_NAVI10: - case CHIP_NAVI14: - case CHIP_NAVI12: - case CHIP_VANGOGH: - case CHIP_YELLOW_CARP: - case CHIP_IP_DISCOVERY: + case IP_VERSION(10, 1, 10): + case IP_VERSION(10, 1, 1): + case IP_VERSION(10, 1, 2): + case IP_VERSION(10, 1, 3): + case IP_VERSION(10, 3, 0): + case IP_VERSION(10, 3, 2): + case IP_VERSION(10, 3, 4): + case IP_VERSION(10, 3, 5): + /* VANGOGH */ + case IP_VERSION(10, 3, 1): + /* YELLOW_CARP*/ + case IP_VERSION(10, 3, 3): /* Don't enable it by default yet. */ if (amdgpu_tmz < 1) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 497478f8a5d3..6de63ea6687e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -642,7 +642,6 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) atomic64_read(&adev->visible_pin_size), vram_gtt.vram_size); vram_gtt.gtt_size = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT)->size; - vram_gtt.gtt_size *= PAGE_SIZE; vram_gtt.gtt_size -= atomic64_read(&adev->gart_pin_size); return copy_to_user(out, &vram_gtt, min((size_t)size, sizeof(vram_gtt))) ? -EFAULT : 0; @@ -675,7 +674,6 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) mem.cpu_accessible_vram.usable_heap_size * 3 / 4; mem.gtt.total_heap_size = gtt_man->size; - mem.gtt.total_heap_size *= PAGE_SIZE; mem.gtt.usable_heap_size = mem.gtt.total_heap_size - atomic64_read(&adev->gart_pin_size); mem.gtt.heap_usage = ttm_resource_manager_usage(gtt_man); @@ -1152,7 +1150,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) mutex_init(&fpriv->bo_list_lock); idr_init(&fpriv->bo_list_handles); - amdgpu_ctx_mgr_init(&fpriv->ctx_mgr); + amdgpu_ctx_mgr_init(&fpriv->ctx_mgr, adev); file_priv->driver_priv = fpriv; goto out_suspend; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 5444515c1476..2c82b1d5a0d7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -567,6 +567,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev, bp->domain; bo->allowed_domains = bo->preferred_domains; if (bp->type != ttm_bo_type_kernel && + !(bp->flags & AMDGPU_GEM_CREATE_DISCARDABLE) && bo->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM) bo->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT; @@ -1018,7 +1019,9 @@ static const char *amdgpu_vram_names[] = { "DDR3", "DDR4", "GDDR6", - "DDR5" + "DDR5", + "LPDDR4", + "LPDDR5" }; /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 4c9cbdc66995..147b79c10cbb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -41,7 +41,6 @@ /* BO flag to indicate a KFD userptr BO */ #define AMDGPU_AMDKFD_CREATE_USERPTR_BO (1ULL << 63) -#define AMDGPU_AMDKFD_CREATE_SVM_BO (1ULL << 62) #define to_amdgpu_bo_user(abo) container_of((abo), struct amdgpu_bo_user, bo) #define to_amdgpu_bo_vm(abo) container_of((abo), struct amdgpu_bo_vm, bo) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 214e4e89a028..e9411c28d88b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -1177,7 +1177,7 @@ int psp_xgmi_initialize(struct psp_context *psp, bool set_extended_data, bool lo psp->xgmi_context.context.mem_context.shared_mem_size = PSP_XGMI_SHARED_MEM_SIZE; psp->xgmi_context.context.ta_load_type = GFX_CMD_ID_LOAD_TA; - if (!psp->xgmi_context.context.initialized) { + if (!psp->xgmi_context.context.mem_context.shared_buf) { ret = psp_ta_init_shared_buf(psp, &psp->xgmi_context.context.mem_context); if (ret) return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 035891ec59d5..dac202ae864d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -197,6 +197,13 @@ static ssize_t amdgpu_ras_debugfs_read(struct file *f, char __user *buf, if (amdgpu_ras_query_error_status(obj->adev, &info)) return -EINVAL; + /* Hardware counter will be reset automatically after the query on Vega20 and Arcturus */ + if (obj->adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 2) && + obj->adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4)) { + if (amdgpu_ras_reset_error_status(obj->adev, info.head.block)) + dev_warn(obj->adev->dev, "Failed to reset error counter and error status"); + } + s = snprintf(val, sizeof(val), "%s: %lu\n%s: %lu\n", "ue", info.ue_count, "ce", info.ce_count); @@ -550,9 +557,10 @@ static ssize_t amdgpu_ras_sysfs_read(struct device *dev, if (amdgpu_ras_query_error_status(obj->adev, &info)) return -EINVAL; - if (obj->adev->asic_type == CHIP_ALDEBARAN) { + if (obj->adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 2) && + obj->adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4)) { if (amdgpu_ras_reset_error_status(obj->adev, info.head.block)) - DRM_WARN("Failed to reset error counter and error status"); + dev_warn(obj->adev->dev, "Failed to reset error counter and error status"); } return sysfs_emit(buf, "%s: %lu\n%s: %lu\n", "ue", info.ue_count, @@ -726,7 +734,9 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev, /* Do not enable if it is not allowed. */ WARN_ON(enable && !amdgpu_ras_is_feature_allowed(adev, head)); - if (!amdgpu_ras_intr_triggered()) { + /* Only enable ras feature operation handle on host side */ + if (!amdgpu_sriov_vf(adev) && + !amdgpu_ras_intr_triggered()) { ret = psp_ras_enable_features(&adev->psp, info, enable); if (ret) { dev_err(adev->dev, "ras %s %s failed poison:%d ret:%d\n", @@ -1025,9 +1035,6 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, } } - if (!amdgpu_persistent_edc_harvesting_supported(adev)) - amdgpu_ras_reset_error_status(adev, info->head.block); - return 0; } @@ -1147,6 +1154,12 @@ int amdgpu_ras_query_error_count(struct amdgpu_device *adev, if (res) return res; + if (adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 2) && + adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4)) { + if (amdgpu_ras_reset_error_status(adev, info.head.block)) + dev_warn(adev->dev, "Failed to reset error counter and error status"); + } + ce += info.ce_count; ue += info.ue_count; } @@ -1523,7 +1536,9 @@ static int amdgpu_ras_fs_fini(struct amdgpu_device *adev) */ void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev) { - if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__PCIE_BIF)) + /* Fatal error events are handled on host side */ + if (amdgpu_sriov_vf(adev) || + !amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__PCIE_BIF)) return; if (adev->nbio.ras && @@ -1788,6 +1803,12 @@ static void amdgpu_ras_log_on_err_counter(struct amdgpu_device *adev) continue; amdgpu_ras_query_error_status(adev, &info); + + if (adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 2) && + adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4)) { + if (amdgpu_ras_reset_error_status(adev, info.head.block)) + dev_warn(adev->dev, "Failed to reset error counter and error status"); + } } } @@ -2270,10 +2291,15 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev) { adev->ras_hw_enabled = adev->ras_enabled = 0; - if (amdgpu_sriov_vf(adev) || !adev->is_atom_fw || + if (!adev->is_atom_fw || !amdgpu_ras_asic_supported(adev)) return; + /* If driver run on sriov guest side, only enable ras for aldebaran */ + if (amdgpu_sriov_vf(adev) && + adev->ip_versions[MP1_HWIP][0] != IP_VERSION(13, 0, 2)) + return; + if (!adev->gmc.xgmi.connected_to_cpu) { if (amdgpu_atomfirmware_mem_ecc_supported(adev)) { dev_info(adev->dev, "MEM ECC is active.\n"); @@ -2285,15 +2311,21 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev) if (amdgpu_atomfirmware_sram_ecc_supported(adev)) { dev_info(adev->dev, "SRAM ECC is active.\n"); - adev->ras_hw_enabled |= ~(1 << AMDGPU_RAS_BLOCK__UMC | - 1 << AMDGPU_RAS_BLOCK__DF); - - if (adev->ip_versions[VCN_HWIP][0] == IP_VERSION(2, 6, 0)) - adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__VCN | - 1 << AMDGPU_RAS_BLOCK__JPEG); - else - adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__VCN | - 1 << AMDGPU_RAS_BLOCK__JPEG); + if (!amdgpu_sriov_vf(adev)) { + adev->ras_hw_enabled |= ~(1 << AMDGPU_RAS_BLOCK__UMC | + 1 << AMDGPU_RAS_BLOCK__DF); + + if (adev->ip_versions[VCN_HWIP][0] == IP_VERSION(2, 6, 0)) + adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__VCN | + 1 << AMDGPU_RAS_BLOCK__JPEG); + else + adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__VCN | + 1 << AMDGPU_RAS_BLOCK__JPEG); + } else { + adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__PCIE_BIF | + 1 << AMDGPU_RAS_BLOCK__SDMA | + 1 << AMDGPU_RAS_BLOCK__GFX); + } } else { dev_info(adev->dev, "SRAM ECC is not presented.\n"); } @@ -2637,6 +2669,10 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev) struct amdgpu_ras_block_object *obj; int r; + /* Guest side doesn't need init ras feature */ + if (amdgpu_sriov_vf(adev)) + return 0; + list_for_each_entry_safe(node, tmp, &adev->ras_list, node) { if (!node->ras_obj) { dev_warn(adev->dev, "Warning: abnormal ras list node.\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index 8e221a1ba937..42c1f050542f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -124,6 +124,10 @@ int amdgpu_sdma_process_ras_data_cb(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) { kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); + + if (amdgpu_sriov_vf(adev)) + return AMDGPU_RAS_SUCCESS; + amdgpu_ras_reset_gpu(adev); return AMDGPU_RAS_SUCCESS; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index ec26edd4f4d8..be6f76a30ac6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -117,7 +117,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, } abo = ttm_to_amdgpu_bo(bo); - if (abo->flags & AMDGPU_AMDKFD_CREATE_SVM_BO) { + if (abo->flags & AMDGPU_GEM_CREATE_DISCARDABLE) { placement->num_placement = 0; placement->num_busy_placement = 0; return; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 2ceeaa4c793a..dc76d2b3ce52 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -679,6 +679,7 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, { struct amdgpu_vm_update_params params; struct amdgpu_vm_bo_base *entry; + bool flush_tlb_needed = false; int r, idx; if (list_empty(&vm->relocated)) @@ -697,6 +698,9 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, goto error; list_for_each_entry(entry, &vm->relocated, vm_status) { + /* vm_flush_needed after updating moved PDEs */ + flush_tlb_needed |= entry->moved; + r = amdgpu_vm_pde_update(¶ms, entry); if (r) goto error; @@ -706,8 +710,8 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, if (r) goto error; - /* vm_flush_needed after updating PDEs */ - atomic64_inc(&vm->tlb_seq); + if (flush_tlb_needed) + atomic64_inc(&vm->tlb_seq); while (!list_empty(&vm->relocated)) { entry = list_first_entry(&vm->relocated, @@ -789,6 +793,11 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, flush_tlb |= adev->gmc.xgmi.num_physical_nodes && adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0); + /* + * On GFX8 and older any 8 PTE block with a valid bit set enters the TLB + */ + flush_tlb |= adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 0, 0); + memset(¶ms, 0, sizeof(params)); params.adev = adev; params.vm = vm; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 65a4126135b0..c5f46d264b23 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -5111,7 +5111,7 @@ static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev) mutex_unlock(&adev->srbm_mutex); /* Initialize all compute VMIDs to have no GDS, GWS, or OA - acccess. These should be enabled by FW for target VMIDs. */ + access. These should be enabled by FW for target VMIDs. */ for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0); WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0); @@ -6898,7 +6898,7 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_device *adev, void *m, tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, (order_base_2(prop->queue_size / 4) - 1)); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, - ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); + (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1)); #ifdef __BIG_ENDIAN tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); #endif @@ -6919,23 +6919,6 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; - tmp = 0; - /* enable the doorbell if requested */ - if (prop->use_doorbell) { - tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, - DOORBELL_OFFSET, prop->doorbell_index); - - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, - DOORBELL_EN, 1); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, - DOORBELL_SOURCE, 0); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, - DOORBELL_HIT, 0); - } - - mqd->cp_hqd_pq_doorbell_control = tmp; - /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR); @@ -6973,20 +6956,6 @@ static int gfx_v10_0_kiq_init_register(struct amdgpu_ring *ring) /* disable wptr polling */ WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); - /* write the EOP addr */ - WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR, - mqd->cp_hqd_eop_base_addr_lo); - WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, - mqd->cp_hqd_eop_base_addr_hi); - - /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ - WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL, - mqd->cp_hqd_eop_control); - - /* enable doorbell? */ - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, - mqd->cp_hqd_pq_doorbell_control); - /* disable the queue if it's active */ if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); @@ -7005,6 +6974,19 @@ static int gfx_v10_0_kiq_init_register(struct amdgpu_ring *ring) mqd->cp_hqd_pq_wptr_hi); } + /* disable doorbells */ + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0); + + /* write the EOP addr */ + WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR, + mqd->cp_hqd_eop_base_addr_lo); + WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, + mqd->cp_hqd_eop_base_addr_hi); + + /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ + WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL, + mqd->cp_hqd_eop_control); + /* set the pointer to the MQD */ WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 8773cbd1f03b..a4a6751b1e44 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1096,6 +1096,7 @@ static void gfx_v11_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2); dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1); dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE); } static void gfx_v11_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, @@ -1316,7 +1317,7 @@ static void gfx_v11_0_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *ade memset(ptr + toc_offset + fw_size, 0, toc_fw_size - fw_size); if ((id != SOC21_FIRMWARE_ID_RS64_PFP) && (id != SOC21_FIRMWARE_ID_RS64_ME)) - *(uint64_t *)fw_autoload_mask |= 1 << id; + *(uint64_t *)fw_autoload_mask |= 1ULL << id; } static void gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev, @@ -1983,7 +1984,7 @@ static int gfx_v11_0_init_csb(struct amdgpu_device *adev) return 0; } -void gfx_v11_0_rlc_stop(struct amdgpu_device *adev) +static void gfx_v11_0_rlc_stop(struct amdgpu_device *adev) { u32 tmp = RREG32_SOC15(GC, 0, regRLC_CNTL); @@ -4082,7 +4083,7 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, (order_base_2(prop->queue_size / 4) - 1)); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, - ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); + (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1)); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); @@ -6028,6 +6029,7 @@ static void gfx_v11_0_handle_priv_fault(struct amdgpu_device *adev, break; default: BUG(); + break; } } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 90f64219d291..7f0b18b0d4c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -3714,7 +3714,7 @@ static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev) mutex_unlock(&adev->srbm_mutex); /* Initialize all compute VMIDs to have no GDS, GWS, or OA - acccess. These should be enabled by FW for target VMIDs. */ + access. These should be enabled by FW for target VMIDs. */ for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { WREG32(amdgpu_gds_reg_offset[i].mem_base, 0); WREG32(amdgpu_gds_reg_offset[i].mem_size, 0); @@ -4490,7 +4490,7 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, (order_base_2(ring->ring_size / 4) - 1)); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, - ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); + (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1)); #ifdef __BIG_ENDIAN tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); #endif @@ -5815,7 +5815,7 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ gfx_v8_0_wait_for_rlc_serdes(adev); - /* write cmd to Set CGCG Overrride */ + /* write cmd to Set CGCG Override */ gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD); /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 83639b5ea6a9..5349ca4d19e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -2535,7 +2535,7 @@ static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) mutex_unlock(&adev->srbm_mutex); /* Initialize all compute VMIDs to have no GDS, GWS, or OA - acccess. These should be enabled by FW for target VMIDs. */ + access. These should be enabled by FW for target VMIDs. */ for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0); WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0); @@ -3514,7 +3514,7 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, (order_base_2(ring->ring_size / 4) - 1)); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, - ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); + (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1)); #ifdef __BIG_ENDIAN tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); #endif @@ -3535,23 +3535,6 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; - tmp = 0; - /* enable the doorbell if requested */ - if (ring->use_doorbell) { - tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, - DOORBELL_OFFSET, ring->doorbell_index); - - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, - DOORBELL_EN, 1); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, - DOORBELL_SOURCE, 0); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, - DOORBELL_HIT, 0); - } - - mqd->cp_hqd_pq_doorbell_control = tmp; - /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ ring->wptr = 0; mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index b8c79789e1e4..9077dfccaf3c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -613,6 +613,9 @@ static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev, *flags &= ~AMDGPU_PTE_MTYPE_NV10_MASK; *flags |= (mapping->flags & AMDGPU_PTE_MTYPE_NV10_MASK); + *flags &= ~AMDGPU_PTE_NOALLOC; + *flags |= (mapping->flags & AMDGPU_PTE_NOALLOC); + if (mapping->flags & AMDGPU_PTE_PRT) { *flags |= AMDGPU_PTE_PRT; *flags |= AMDGPU_PTE_SNOOPED; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index 477f67d9b07c..7f4b480ae66e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -500,6 +500,9 @@ static void gmc_v11_0_get_vm_pte(struct amdgpu_device *adev, *flags &= ~AMDGPU_PTE_MTYPE_NV10_MASK; *flags |= (mapping->flags & AMDGPU_PTE_MTYPE_NV10_MASK); + *flags &= ~AMDGPU_PTE_NOALLOC; + *flags |= (mapping->flags & AMDGPU_PTE_NOALLOC); + if (mapping->flags & AMDGPU_PTE_PRT) { *flags |= AMDGPU_PTE_PRT; *flags |= AMDGPU_PTE_SNOOPED; @@ -635,6 +638,12 @@ static int gmc_v11_0_mc_init(struct amdgpu_device *adev) adev->gmc.aper_base = pci_resource_start(adev->pdev, 0); adev->gmc.aper_size = pci_resource_len(adev->pdev, 0); +#ifdef CONFIG_X86_64 + if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) { + adev->gmc.aper_base = adev->mmhub.funcs->get_mc_fb_offset(adev); + adev->gmc.aper_size = adev->gmc.real_vram_size; + } +#endif /* In case the PCI BAR is larger than the actual amount of vram */ adev->gmc.visible_vram_size = adev->gmc.aper_size; if (adev->gmc.visible_vram_size > adev->gmc.real_vram_size) diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c index 5d2dfeff8fe5..d63d3f2b8a16 100644 --- a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c @@ -299,7 +299,7 @@ static const struct imu_rlc_ram_golden imu_rlc_ram_golden_11_0_2[] = IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCPG_PSP_DEBUG, CPG_PSP_DEBUG__GPA_OVERRIDE_MASK, 0) }; -void program_imu_rlc_ram(struct amdgpu_device *adev, +static void program_imu_rlc_ram(struct amdgpu_device *adev, const struct imu_rlc_ram_golden *regs, const u32 array_size) { diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c index d2722adabd1b..f3c1af5130ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -535,6 +535,10 @@ void jpeg_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring, { unsigned vmid = AMDGPU_JOB_GET_VMID(job); + amdgpu_ring_write(ring, PACKETJ(mmUVD_JPEG_IH_CTRL_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, (vmid << JPEG_IH_CTRL__IH_VMID__SHIFT)); + amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); amdgpu_ring_write(ring, (vmid | (vmid << 4))); @@ -768,7 +772,7 @@ static const struct amdgpu_ring_funcs jpeg_v2_0_dec_ring_vm_funcs = { 8 + /* jpeg_v2_0_dec_ring_emit_vm_flush */ 18 + 18 + /* jpeg_v2_0_dec_ring_emit_fence x2 vm fence */ 8 + 16, - .emit_ib_size = 22, /* jpeg_v2_0_dec_ring_emit_ib */ + .emit_ib_size = 24, /* jpeg_v2_0_dec_ring_emit_ib */ .emit_ib = jpeg_v2_0_dec_ring_emit_ib, .emit_fence = jpeg_v2_0_dec_ring_emit_fence, .emit_vm_flush = jpeg_v2_0_dec_ring_emit_vm_flush, diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h index 1a03baa59755..654e43e83e2c 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h @@ -41,6 +41,7 @@ #define mmUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET 0x4084 #define mmUVD_JRBC_STATUS_INTERNAL_OFFSET 0x4089 #define mmUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f +#define mmUVD_JPEG_IH_CTRL_INTERNAL_OFFSET 0x4149 #define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR 0x18000 diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index fcf51947bb18..7eee004cf3ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -541,7 +541,7 @@ static void mes_v11_0_enable(struct amdgpu_device *adev, bool enable) /* This function is for backdoor MES firmware */ static int mes_v11_0_load_microcode(struct amdgpu_device *adev, - enum admgpu_mes_pipe pipe) + enum admgpu_mes_pipe pipe, bool prime_icache) { int r; uint32_t data; @@ -593,16 +593,18 @@ static int mes_v11_0_load_microcode(struct amdgpu_device *adev, /* Set 0x3FFFF (256K-1) to CP_MES_MDBOUND_LO */ WREG32_SOC15(GC, 0, regCP_MES_MDBOUND_LO, 0x3FFFF); - /* invalidate ICACHE */ - data = RREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL); - data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 0); - data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, INVALIDATE_CACHE, 1); - WREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL, data); - - /* prime the ICACHE. */ - data = RREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL); - data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 1); - WREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL, data); + if (prime_icache) { + /* invalidate ICACHE */ + data = RREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL); + data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 0); + data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, INVALIDATE_CACHE, 1); + WREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL, data); + + /* prime the ICACHE. */ + data = RREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL); + data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 1); + WREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL, data); + } soc21_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); @@ -1044,17 +1046,19 @@ static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev) int r = 0; if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { - r = mes_v11_0_load_microcode(adev, AMDGPU_MES_KIQ_PIPE); + + r = mes_v11_0_load_microcode(adev, AMDGPU_MES_SCHED_PIPE, false); if (r) { - DRM_ERROR("failed to load MES kiq fw, r=%d\n", r); + DRM_ERROR("failed to load MES fw, r=%d\n", r); return r; } - r = mes_v11_0_load_microcode(adev, AMDGPU_MES_SCHED_PIPE); + r = mes_v11_0_load_microcode(adev, AMDGPU_MES_KIQ_PIPE, true); if (r) { - DRM_ERROR("failed to load MES fw, r=%d\n", r); + DRM_ERROR("failed to load MES kiq fw, r=%d\n", r); return r; } + } mes_v11_0_enable(adev, true); @@ -1086,7 +1090,7 @@ static int mes_v11_0_hw_init(void *handle) if (!adev->enable_mes_kiq) { if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { r = mes_v11_0_load_microcode(adev, - AMDGPU_MES_SCHED_PIPE); + AMDGPU_MES_SCHED_PIPE, true); if (r) { DRM_ERROR("failed to MES fw, r=%d\n", r); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index d016e3c3e221..b3fba8dea63c 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -170,6 +170,7 @@ static const struct amdgpu_video_codec_info yc_video_codecs_decode_array[] = { {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)}, }; static const struct amdgpu_video_codecs yc_video_codecs_decode = { diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index d6d79e97def9..9e1ef81933ff 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -32,13 +32,10 @@ MODULE_FIRMWARE("amdgpu/aldebaran_sos.bin"); MODULE_FIRMWARE("amdgpu/aldebaran_ta.bin"); MODULE_FIRMWARE("amdgpu/aldebaran_cap.bin"); -MODULE_FIRMWARE("amdgpu/yellow_carp_asd.bin"); MODULE_FIRMWARE("amdgpu/yellow_carp_toc.bin"); MODULE_FIRMWARE("amdgpu/yellow_carp_ta.bin"); -MODULE_FIRMWARE("amdgpu/psp_13_0_5_asd.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_5_toc.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_5_ta.bin"); -MODULE_FIRMWARE("amdgpu/psp_13_0_8_asd.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_8_toc.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_8_ta.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_0_sos.bin"); @@ -85,17 +82,17 @@ static int psp_v13_0_init_microcode(struct psp_context *psp) err = psp_init_sos_microcode(psp, chip_name); if (err) return err; - err = psp_init_ta_microcode(&adev->psp, chip_name); - if (err) - return err; + /* It's not necessary to load ras ta on Guest side */ + if (!amdgpu_sriov_vf(adev)) { + err = psp_init_ta_microcode(&adev->psp, chip_name); + if (err) + return err; + } break; case IP_VERSION(13, 0, 1): case IP_VERSION(13, 0, 3): case IP_VERSION(13, 0, 5): case IP_VERSION(13, 0, 8): - err = psp_init_asd_microcode(psp, chip_name); - if (err) - return err; err = psp_init_toc_microcode(psp, chip_name); if (err) return err; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 06b2635b142a..83c6ccaaa9e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -469,6 +469,7 @@ static void sdma_v5_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se } } + /** * sdma_v5_2_gfx_stop - stop the gfx async dma engines * @@ -514,21 +515,17 @@ static void sdma_v5_2_rlc_stop(struct amdgpu_device *adev) } /** - * sdma_v5_2_ctx_switch_enable_for_instance - start the async dma engines - * context switch for an instance + * sdma_v5_2_ctx_switch_enable - stop the async dma engines context switch * * @adev: amdgpu_device pointer - * @instance_idx: the index of the SDMA instance + * @enable: enable/disable the DMA MEs context switch. * - * Unhalt the async dma engines context switch. + * Halt or unhalt the async dma engines context switch. */ -static void sdma_v5_2_ctx_switch_enable_for_instance(struct amdgpu_device *adev, int instance_idx) +static void sdma_v5_2_ctx_switch_enable(struct amdgpu_device *adev, bool enable) { u32 f32_cntl, phase_quantum = 0; - - if (WARN_ON(instance_idx >= adev->sdma.num_instances)) { - return; - } + int i; if (amdgpu_sdma_phase_quantum) { unsigned value = amdgpu_sdma_phase_quantum; @@ -552,68 +549,50 @@ static void sdma_v5_2_ctx_switch_enable_for_instance(struct amdgpu_device *adev, phase_quantum = value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT | unit << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT; - - WREG32_SOC15_IP(GC, - sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_PHASE0_QUANTUM), - phase_quantum); - WREG32_SOC15_IP(GC, - sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_PHASE1_QUANTUM), - phase_quantum); - WREG32_SOC15_IP(GC, - sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_PHASE2_QUANTUM), - phase_quantum); } - if (!amdgpu_sriov_vf(adev)) { - f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_CNTL)); - f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, - AUTO_CTXSW_ENABLE, 1); - WREG32(sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_CNTL), f32_cntl); + for (i = 0; i < adev->sdma.num_instances; i++) { + if (enable && amdgpu_sdma_phase_quantum) { + WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE0_QUANTUM), + phase_quantum); + WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE1_QUANTUM), + phase_quantum); + WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE2_QUANTUM), + phase_quantum); + } + + if (!amdgpu_sriov_vf(adev)) { + f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL)); + f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, + AUTO_CTXSW_ENABLE, enable ? 1 : 0); + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl); + } } + } /** - * sdma_v5_2_ctx_switch_disable_all - stop the async dma engines context switch + * sdma_v5_2_enable - stop the async dma engines * * @adev: amdgpu_device pointer + * @enable: enable/disable the DMA MEs. * - * Halt the async dma engines context switch. + * Halt or unhalt the async dma engines. */ -static void sdma_v5_2_ctx_switch_disable_all(struct amdgpu_device *adev) +static void sdma_v5_2_enable(struct amdgpu_device *adev, bool enable) { u32 f32_cntl; int i; - if (amdgpu_sriov_vf(adev)) - return; - - for (i = 0; i < adev->sdma.num_instances; i++) { - f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL)); - f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, - AUTO_CTXSW_ENABLE, 0); - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl); + if (!enable) { + sdma_v5_2_gfx_stop(adev); + sdma_v5_2_rlc_stop(adev); } -} - -/** - * sdma_v5_2_halt - stop the async dma engines - * - * @adev: amdgpu_device pointer - * - * Halt the async dma engines. - */ -static void sdma_v5_2_halt(struct amdgpu_device *adev) -{ - int i; - u32 f32_cntl; - - sdma_v5_2_gfx_stop(adev); - sdma_v5_2_rlc_stop(adev); if (!amdgpu_sriov_vf(adev)) { for (i = 0; i < adev->sdma.num_instances; i++) { f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL)); - f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, 1); + f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1); WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl); } } @@ -625,9 +604,6 @@ static void sdma_v5_2_halt(struct amdgpu_device *adev) * @adev: amdgpu_device pointer * * Set up the gfx DMA ring buffers and enable them. - * It assumes that the dma engine is stopped for each instance. - * The function enables the engine and preemptions sequentially for each instance. - * * Returns 0 for success, error for failure. */ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev) @@ -769,7 +745,10 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev) ring->sched.ready = true; - sdma_v5_2_ctx_switch_enable_for_instance(adev, i); + if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */ + sdma_v5_2_ctx_switch_enable(adev, true); + sdma_v5_2_enable(adev, true); + } r = amdgpu_ring_test_ring(ring); if (r) { @@ -813,7 +792,7 @@ static int sdma_v5_2_load_microcode(struct amdgpu_device *adev) int i, j; /* halt the MEs */ - sdma_v5_2_halt(adev); + sdma_v5_2_enable(adev, false); for (i = 0; i < adev->sdma.num_instances; i++) { if (!adev->sdma.instance[i].fw) @@ -885,8 +864,8 @@ static int sdma_v5_2_start(struct amdgpu_device *adev) int r = 0; if (amdgpu_sriov_vf(adev)) { - sdma_v5_2_ctx_switch_disable_all(adev); - sdma_v5_2_halt(adev); + sdma_v5_2_ctx_switch_enable(adev, false); + sdma_v5_2_enable(adev, false); /* set RB registers */ r = sdma_v5_2_gfx_resume(adev); @@ -910,10 +889,12 @@ static int sdma_v5_2_start(struct amdgpu_device *adev) amdgpu_gfx_off_ctrl(adev, false); sdma_v5_2_soft_reset(adev); + /* unhalt the MEs */ + sdma_v5_2_enable(adev, true); + /* enable sdma ring preemption */ + sdma_v5_2_ctx_switch_enable(adev, true); - /* Soft reset supposes to disable the dma engine and preemption. - * Now start the gfx rings and rlc compute queues. - */ + /* start the gfx rings and rlc compute queues */ r = sdma_v5_2_gfx_resume(adev); if (adev->in_s0ix) amdgpu_gfx_off_ctrl(adev, true); @@ -1447,8 +1428,8 @@ static int sdma_v5_2_hw_fini(void *handle) if (amdgpu_sriov_vf(adev)) return 0; - sdma_v5_2_ctx_switch_disable_all(adev); - sdma_v5_2_halt(adev); + sdma_v5_2_ctx_switch_enable(adev, false); + sdma_v5_2_enable(adev, false); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index c6a8520053bb..9e18a2b22607 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -42,6 +42,7 @@ #include "soc15.h" #include "soc15_common.h" +#include "soc21.h" static const struct amd_ip_funcs soc21_common_ip_funcs; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 3cabceee5f57..39405f0db824 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -1761,23 +1761,21 @@ static const struct amdgpu_ring_funcs vcn_v3_0_dec_sw_ring_vm_funcs = { .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, }; -static int vcn_v3_0_limit_sched(struct amdgpu_cs_parser *p, - struct amdgpu_job *job) +static int vcn_v3_0_limit_sched(struct amdgpu_cs_parser *p) { struct drm_gpu_scheduler **scheds; /* The create msg must be in the first IB submitted */ - if (atomic_read(&job->base.entity->fence_seq)) + if (atomic_read(&p->entity->fence_seq)) return -EINVAL; scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_DEC] [AMDGPU_RING_PRIO_DEFAULT].sched; - drm_sched_entity_modify_sched(job->base.entity, scheds, 1); + drm_sched_entity_modify_sched(p->entity, scheds, 1); return 0; } -static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job, - uint64_t addr) +static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, uint64_t addr) { struct ttm_operation_ctx ctx = { false, false }; struct amdgpu_bo_va_mapping *map; @@ -1848,7 +1846,7 @@ static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job, if (create[0] == 0x7 || create[0] == 0x10 || create[0] == 0x11) continue; - r = vcn_v3_0_limit_sched(p, job); + r = vcn_v3_0_limit_sched(p); if (r) goto out; } @@ -1862,7 +1860,7 @@ static int vcn_v3_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, struct amdgpu_job *job, struct amdgpu_ib *ib) { - struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched); + struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched); uint32_t msg_lo = 0, msg_hi = 0; unsigned i; int r; @@ -1881,8 +1879,7 @@ static int vcn_v3_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, msg_hi = val; } else if (reg == PACKET0(p->adev->vcn.internal.cmd, 0) && val == 0) { - r = vcn_v3_0_dec_msg(p, job, - ((u64)msg_hi) << 32 | msg_lo); + r = vcn_v3_0_dec_msg(p, ((u64)msg_hi) << 32 | msg_lo); if (r) return r; } |