diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 948 |
1 files changed, 503 insertions, 445 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index ba2b7ac0c02d..ce6e04242c52 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -22,14 +22,14 @@ */ #include <linux/firmware.h> #include <linux/pci.h> + +#include <drm/drm_cache.h> + #include "amdgpu.h" #include "amdgpu_atomfirmware.h" #include "gmc_v10_0.h" +#include "umc_v8_7.h" -#include "hdp/hdp_5_0_0_offset.h" -#include "hdp/hdp_5_0_0_sh_mask.h" -#include "gc/gc_10_1_0_sh_mask.h" -#include "mmhub/mmhub_2_0_0_sh_mask.h" #include "athub/athub_2_0_0_sh_mask.h" #include "athub/athub_2_0_0_offset.h" #include "dcn/dcn_2_0_0_offset.h" @@ -45,80 +45,49 @@ #include "nbio_v2_3.h" #include "gfxhub_v2_0.h" +#include "gfxhub_v2_1.h" #include "mmhub_v2_0.h" +#include "mmhub_v2_3.h" #include "athub_v2_0.h" -/* XXX Move this macro to navi10 header file, which is like vid.h for VI.*/ -#define AMDGPU_NUM_OF_VMIDS 8 +#include "athub_v2_1.h" -#if 0 -static const struct soc15_reg_golden golden_settings_navi10_hdp[] = +static int gmc_v10_0_ecc_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *src, + unsigned int type, + enum amdgpu_interrupt_state state) { - /* TODO add golden setting for hdp */ -}; -#endif + return 0; +} static int gmc_v10_0_vm_fault_interrupt_state(struct amdgpu_device *adev, - struct amdgpu_irq_src *src, unsigned type, + struct amdgpu_irq_src *src, unsigned int type, enum amdgpu_interrupt_state state) { - struct amdgpu_vmhub *hub; - u32 tmp, reg, bits[AMDGPU_MAX_VMHUBS], i; - - bits[AMDGPU_GFXHUB_0] = GCVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | - GCVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | - GCVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | - GCVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | - GCVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | - GCVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | - GCVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK; - - bits[AMDGPU_MMHUB_0] = MMVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | - MMVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | - MMVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | - MMVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | - MMVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | - MMVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | - MMVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK; - switch (state) { case AMDGPU_IRQ_STATE_DISABLE: /* MM HUB */ - hub = &adev->vmhub[AMDGPU_MMHUB_0]; - for (i = 0; i < 16; i++) { - reg = hub->vm_context0_cntl + i; - tmp = RREG32(reg); - tmp &= ~bits[AMDGPU_MMHUB_0]; - WREG32(reg, tmp); - } - + amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB0(0), false); /* GFX HUB */ - hub = &adev->vmhub[AMDGPU_GFXHUB_0]; - for (i = 0; i < 16; i++) { - reg = hub->vm_context0_cntl + i; - tmp = RREG32(reg); - tmp &= ~bits[AMDGPU_GFXHUB_0]; - WREG32(reg, tmp); - } + /* This works because this interrupt is only + * enabled at init/resume and disabled in + * fini/suspend, so the overall state doesn't + * change over the course of suspend/resume. + */ + if (!adev->in_s0ix) + amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB(0), false); break; case AMDGPU_IRQ_STATE_ENABLE: /* MM HUB */ - hub = &adev->vmhub[AMDGPU_MMHUB_0]; - for (i = 0; i < 16; i++) { - reg = hub->vm_context0_cntl + i; - tmp = RREG32(reg); - tmp |= bits[AMDGPU_MMHUB_0]; - WREG32(reg, tmp); - } - + amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB0(0), true); /* GFX HUB */ - hub = &adev->vmhub[AMDGPU_GFXHUB_0]; - for (i = 0; i < 16; i++) { - reg = hub->vm_context0_cntl + i; - tmp = RREG32(reg); - tmp |= bits[AMDGPU_GFXHUB_0]; - WREG32(reg, tmp); - } + /* This works because this interrupt is only + * enabled at init/resume and disabled in + * fini/suspend, so the overall state doesn't + * change over the course of suspend/resume. + */ + if (!adev->in_s0ix) + amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB(0), true); break; default: break; @@ -131,66 +100,87 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - struct amdgpu_vmhub *hub = &adev->vmhub[entry->vmid_src]; + uint32_t vmhub_index = entry->client_id == SOC15_IH_CLIENTID_VMC ? + AMDGPU_MMHUB0(0) : AMDGPU_GFXHUB(0); + struct amdgpu_vmhub *hub = &adev->vmhub[vmhub_index]; + bool retry_fault = !!(entry->src_data[1] & + AMDGPU_GMC9_FAULT_SOURCE_DATA_RETRY); + bool write_fault = !!(entry->src_data[1] & + AMDGPU_GMC9_FAULT_SOURCE_DATA_WRITE); + struct amdgpu_task_info *task_info; uint32_t status = 0; u64 addr; addr = (u64)entry->src_data[0] << 12; addr |= ((u64)entry->src_data[1] & 0xf) << 44; + if (retry_fault) { + /* Returning 1 here also prevents sending the IV to the KFD */ + + /* Process it onyl if it's the first fault for this address */ + if (entry->ih != &adev->irq.ih_soft && + amdgpu_gmc_filter_faults(adev, entry->ih, addr, entry->pasid, + entry->timestamp)) + return 1; + + /* Delegate it to a different ring if the hardware hasn't + * already done it. + */ + if (entry->ih == &adev->irq.ih) { + amdgpu_irq_delegate(adev, entry, 8); + return 1; + } + + /* Try to handle the recoverable page faults by filling page + * tables + */ + if (amdgpu_vm_handle_fault(adev, entry->pasid, 0, 0, addr, + entry->timestamp, write_fault)) + return 1; + } + if (!amdgpu_sriov_vf(adev)) { /* * Issue a dummy read to wait for the status register to * be updated to avoid reading an incorrect value due to * the new fast GRBM interface. */ - if (entry->vmid_src == AMDGPU_GFXHUB_0) + if ((entry->vmid_src == AMDGPU_GFXHUB(0)) && + (amdgpu_ip_version(adev, GC_HWIP, 0) < + IP_VERSION(10, 3, 0))) RREG32(hub->vm_l2_pro_fault_status); status = RREG32(hub->vm_l2_pro_fault_status); WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); + + amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status, + entry->vmid_src ? AMDGPU_MMHUB0(0) : AMDGPU_GFXHUB(0)); } - if (printk_ratelimit()) { - struct amdgpu_task_info task_info; - - memset(&task_info, 0, sizeof(struct amdgpu_task_info)); - amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); - - dev_err(adev->dev, - "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u, " - "for process %s pid %d thread %s pid %d)\n", - entry->vmid_src ? "mmhub" : "gfxhub", - entry->src_id, entry->ring_id, entry->vmid, - entry->pasid, task_info.process_name, task_info.tgid, - task_info.task_name, task_info.pid); - dev_err(adev->dev, " in page starting at address 0x%016llx from client %d\n", - addr, entry->client_id); - if (!amdgpu_sriov_vf(adev)) { - dev_err(adev->dev, - "GCVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", - status); - dev_err(adev->dev, "\t Faulty UTCL2 client ID: 0x%lx\n", - REG_GET_FIELD(status, - GCVM_L2_PROTECTION_FAULT_STATUS, CID)); - dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n", - REG_GET_FIELD(status, - GCVM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS)); - dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n", - REG_GET_FIELD(status, - GCVM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR)); - dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n", - REG_GET_FIELD(status, - GCVM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS)); - dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n", - REG_GET_FIELD(status, - GCVM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR)); - dev_err(adev->dev, "\t RW: 0x%lx\n", - REG_GET_FIELD(status, - GCVM_L2_PROTECTION_FAULT_STATUS, RW)); - } + if (!printk_ratelimit()) + return 0; + + dev_err(adev->dev, + "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n", + entry->vmid_src ? "mmhub" : "gfxhub", + entry->src_id, entry->ring_id, entry->vmid, entry->pasid); + task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid); + if (task_info) { + amdgpu_vm_print_task_info(adev, task_info); + amdgpu_vm_put_task_info(task_info); } + dev_err(adev->dev, " in page starting at address 0x%016llx from client 0x%x (%s)\n", + addr, entry->client_id, + soc15_ih_clientid_name[entry->client_id]); + + /* Only print L2 fault status if the status register could be read and + * contains useful information + */ + if (status != 0) + hub->vmhub_funcs->print_l2_protection_fault_status(adev, + status); + return 0; } @@ -199,30 +189,20 @@ static const struct amdgpu_irq_src_funcs gmc_v10_0_irq_funcs = { .process = gmc_v10_0_process_interrupt, }; +static const struct amdgpu_irq_src_funcs gmc_v10_0_ecc_funcs = { + .set = gmc_v10_0_ecc_interrupt_state, + .process = amdgpu_umc_process_ecc_irq, +}; + static void gmc_v10_0_set_irq_funcs(struct amdgpu_device *adev) { adev->gmc.vm_fault.num_types = 1; adev->gmc.vm_fault.funcs = &gmc_v10_0_irq_funcs; -} -static uint32_t gmc_v10_0_get_invalidate_req(unsigned int vmid, - uint32_t flush_type) -{ - u32 req = 0; - - /* invalidate using legacy mode on vmid*/ - req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, - PER_VMID_INVALIDATE_REQ, 1 << vmid); - req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type); - req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1); - req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1); - req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1); - req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1); - req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1); - req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, - CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0); - - return req; + if (!amdgpu_sriov_vf(adev)) { + adev->gmc.ecc_irq.num_types = 1; + adev->gmc.ecc_irq.funcs = &gmc_v10_0_ecc_funcs; + } } /** @@ -235,8 +215,7 @@ static uint32_t gmc_v10_0_get_invalidate_req(unsigned int vmid, static bool gmc_v10_0_use_invalidate_semaphore(struct amdgpu_device *adev, uint32_t vmhub) { - return ((vmhub == AMDGPU_MMHUB_0 || - vmhub == AMDGPU_MMHUB_1) && + return ((vmhub == AMDGPU_MMHUB0(0)) && (!amdgpu_sriov_vf(adev))); } @@ -260,16 +239,48 @@ static bool gmc_v10_0_get_atc_vmid_pasid_mapping_info( * by the amdgpu vm/hsa code. */ -static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, - unsigned int vmhub, uint32_t flush_type) +/** + * gmc_v10_0_flush_gpu_tlb - gart tlb flush callback + * + * @adev: amdgpu_device pointer + * @vmid: vm instance to flush + * @vmhub: vmhub type + * @flush_type: the flush type + * + * Flush the TLB for the requested page table. + */ +static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, + uint32_t vmhub, uint32_t flush_type) { bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(adev, vmhub); struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; - u32 inv_req = gmc_v10_0_get_invalidate_req(vmid, flush_type); - u32 tmp; + u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type); /* Use register 17 for GART */ - const unsigned eng = 17; + const unsigned int eng = 17; + unsigned char hub_ip = 0; + u32 sem, req, ack; unsigned int i; + u32 tmp; + + sem = hub->vm_inv_eng0_sem + hub->eng_distance * eng; + req = hub->vm_inv_eng0_req + hub->eng_distance * eng; + ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; + + /* flush hdp cache */ + amdgpu_device_flush_hdp(adev, NULL); + + /* This is necessary for SRIOV as well as for GFXOFF to function + * properly under bare metal + */ + if (adev->gfx.kiq[0].ring.sched.ready && !adev->enable_mes && + (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) { + amdgpu_gmc_fw_reg_write_reg_wait(adev, req, ack, inv_req, + 1 << vmid, GET_INST(GC, 0)); + return; + } + + /* This path is needed before KIQ/MES/GFXOFF are set up */ + hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ? GC_HWIP : MMHUB_HWIP; spin_lock(&adev->gmc.invalidate_lock); /* @@ -283,7 +294,7 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, if (use_semaphore) { for (i = 0; i < adev->usec_timeout; i++) { /* a read return value of 1 means semaphore acuqire */ - tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng); + tmp = RREG32_RLC_NO_KIQ(sem, hub_ip); if (tmp & 0x1) break; udelay(1); @@ -293,18 +304,19 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n"); } - WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, inv_req); + WREG32_RLC_NO_KIQ(req, inv_req, hub_ip); /* * Issue a dummy read to wait for the ACK register to be cleared * to avoid a false ACK due to the new fast GRBM interface. */ - if (vmhub == AMDGPU_GFXHUB_0) - RREG32_NO_KIQ(hub->vm_inv_eng0_req + eng); + if ((vmhub == AMDGPU_GFXHUB(0)) && + (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 3, 0))) + RREG32_RLC_NO_KIQ(req, hub_ip); /* Wait for ACK with a delay.*/ for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng); + tmp = RREG32_RLC_NO_KIQ(ack, hub_ip); tmp &= 1 << vmid; if (tmp) break; @@ -314,91 +326,13 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ if (use_semaphore) - /* - * add semaphore release after invalidation, - * write with 0 means semaphore release - */ - WREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng, 0); + WREG32_RLC_NO_KIQ(sem, 0, hub_ip); spin_unlock(&adev->gmc.invalidate_lock); - if (i < adev->usec_timeout) - return; - - DRM_ERROR("Timeout waiting for VM flush ACK!\n"); -} - -/** - * gmc_v10_0_flush_gpu_tlb - gart tlb flush callback - * - * @adev: amdgpu_device pointer - * @vmid: vm instance to flush - * - * Flush the TLB for the requested page table. - */ -static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, - uint32_t vmhub, uint32_t flush_type) -{ - struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; - struct dma_fence *fence; - struct amdgpu_job *job; - - int r; - - /* flush hdp cache */ - adev->nbio.funcs->hdp_flush(adev, NULL); - - mutex_lock(&adev->mman.gtt_window_lock); - - if (vmhub == AMDGPU_MMHUB_0) { - gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB_0, 0); - mutex_unlock(&adev->mman.gtt_window_lock); - return; - } - - BUG_ON(vmhub != AMDGPU_GFXHUB_0); - - if (!adev->mman.buffer_funcs_enabled || - !adev->ib_pool_ready || - adev->in_gpu_reset || - ring->sched.ready == false) { - gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB_0, 0); - mutex_unlock(&adev->mman.gtt_window_lock); - return; - } - - /* The SDMA on Navi has a bug which can theoretically result in memory - * corruption if an invalidation happens at the same time as an VA - * translation. Avoid this by doing the invalidation from the SDMA - * itself. - */ - r = amdgpu_job_alloc_with_ib(adev, 16 * 4, AMDGPU_IB_POOL_IMMEDIATE, - &job); - if (r) - goto error_alloc; - - job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo); - job->vm_needs_flush = true; - job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop; - amdgpu_ring_pad_ib(ring, &job->ibs[0]); - r = amdgpu_job_submit(job, &adev->mman.entity, - AMDGPU_FENCE_OWNER_UNDEFINED, &fence); - if (r) - goto error_submit; - - mutex_unlock(&adev->mman.gtt_window_lock); - - dma_fence_wait(fence, false); - dma_fence_put(fence); - - return; - -error_submit: - amdgpu_job_free(job); - -error_alloc: - mutex_unlock(&adev->mman.gtt_window_lock); - DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r); + if (i >= adev->usec_timeout) + dev_err(adev->dev, "Timeout waiting for VM flush hub: %d!\n", + vmhub); } /** @@ -406,72 +340,46 @@ error_alloc: * * @adev: amdgpu_device pointer * @pasid: pasid to be flush + * @flush_type: the flush type + * @all_hub: Used with PACKET3_INVALIDATE_TLBS_ALL_HUB() + * @inst: is used to select which instance of KIQ to use for the invalidation * * Flush the TLB for the requested pasid. */ -static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, - uint16_t pasid, uint32_t flush_type, - bool all_hub) +static void gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, + uint16_t pasid, uint32_t flush_type, + bool all_hub, uint32_t inst) { + uint16_t queried; int vmid, i; - signed long r; - uint32_t seq; - uint16_t queried_pasid; - bool ret; - struct amdgpu_ring *ring = &adev->gfx.kiq.ring; - struct amdgpu_kiq *kiq = &adev->gfx.kiq; - - if (amdgpu_emu_mode == 0 && ring->sched.ready) { - spin_lock(&adev->gfx.kiq.ring_lock); - /* 2 dwords flush + 8 dwords fence */ - amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8); - kiq->pmf->kiq_invalidate_tlbs(ring, - pasid, flush_type, all_hub); - r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); - if (r) { - amdgpu_ring_undo(ring); - spin_unlock(&adev->gfx.kiq.ring_lock); - return -ETIME; - } - - amdgpu_ring_commit(ring); - spin_unlock(&adev->gfx.kiq.ring_lock); - r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); - if (r < 1) { - DRM_ERROR("wait for kiq fence error: %ld.\n", r); - return -ETIME; - } - - return 0; - } - for (vmid = 1; vmid < 16; vmid++) { - - ret = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid, - &queried_pasid); - if (ret && queried_pasid == pasid) { - if (all_hub) { - for (i = 0; i < adev->num_vmhubs; i++) - gmc_v10_0_flush_gpu_tlb(adev, vmid, - i, flush_type); - } else { - gmc_v10_0_flush_gpu_tlb(adev, vmid, - AMDGPU_GFXHUB_0, flush_type); - } - break; + for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) { + bool valid; + + valid = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid, + &queried); + if (!valid || queried != pasid) + continue; + + if (all_hub) { + for_each_set_bit(i, adev->vmhubs_mask, + AMDGPU_MAX_VMHUBS) + gmc_v10_0_flush_gpu_tlb(adev, vmid, i, + flush_type); + } else { + gmc_v10_0_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB(0), + flush_type); } } - - return 0; } static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, - unsigned vmid, uint64_t pd_addr) + unsigned int vmid, uint64_t pd_addr) { - bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(ring->adev, ring->funcs->vmhub); - struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; - uint32_t req = gmc_v10_0_get_invalidate_req(vmid, 0); - unsigned eng = ring->vm_inv_eng; + bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(ring->adev, ring->vm_hub); + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub]; + uint32_t req = hub->vmhub_funcs->get_invalidate_req(vmid, 0); + unsigned int eng = ring->vm_inv_eng; /* * It may lose gpuvm invalidate acknowldege state across power-gating @@ -484,16 +392,21 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, if (use_semaphore) /* a read return value of 1 means semaphore acuqire */ amdgpu_ring_emit_reg_wait(ring, - hub->vm_inv_eng0_sem + eng, 0x1, 0x1); + hub->vm_inv_eng0_sem + + hub->eng_distance * eng, 0x1, 0x1); - amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid), + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + + (hub->ctx_addr_distance * vmid), lower_32_bits(pd_addr)); - amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid), + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + + (hub->ctx_addr_distance * vmid), upper_32_bits(pd_addr)); - amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng, - hub->vm_inv_eng0_ack + eng, + amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + + hub->eng_distance * eng, + hub->vm_inv_eng0_ack + + hub->eng_distance * eng, req, 1 << vmid); /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ @@ -502,18 +415,19 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, * add semaphore release after invalidation, * write with 0 means semaphore release */ - amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem + eng, 0); + amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem + + hub->eng_distance * eng, 0); return pd_addr; } -static void gmc_v10_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid, - unsigned pasid) +static void gmc_v10_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned int vmid, + unsigned int pasid) { struct amdgpu_device *adev = ring->adev; uint32_t reg; - if (ring->funcs->vmhub == AMDGPU_GFXHUB_0) + if (ring->vm_hub == AMDGPU_GFXHUB(0)) reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid; else reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid; @@ -524,7 +438,8 @@ static void gmc_v10_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid /* * PTE format on NAVI 10: * 63:59 reserved - * 58:57 reserved + * 58 reserved and for sienna_cichlid is used for MALL noalloc + * 57 reserved * 56 F * 55 L * 54 reserved @@ -553,30 +468,11 @@ static void gmc_v10_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid * 0 valid */ -static uint64_t gmc_v10_0_map_mtype(struct amdgpu_device *adev, uint32_t flags) -{ - switch (flags) { - case AMDGPU_VM_MTYPE_DEFAULT: - return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC); - case AMDGPU_VM_MTYPE_NC: - return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC); - case AMDGPU_VM_MTYPE_WC: - return AMDGPU_PTE_MTYPE_NV10(MTYPE_WC); - case AMDGPU_VM_MTYPE_CC: - return AMDGPU_PTE_MTYPE_NV10(MTYPE_CC); - case AMDGPU_VM_MTYPE_UC: - return AMDGPU_PTE_MTYPE_NV10(MTYPE_UC); - default: - return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC); - } -} - static void gmc_v10_0_get_vm_pde(struct amdgpu_device *adev, int level, uint64_t *addr, uint64_t *flags) { if (!(*flags & AMDGPU_PDE_PTE) && !(*flags & AMDGPU_PTE_SYSTEM)) - *addr = adev->vm_manager.vram_base_offset + *addr - - adev->gmc.vram_start; + *addr = amdgpu_gmc_vram_mc2pa(adev, *addr); BUG_ON(*addr & 0xFFFF00000000003FULL); if (!adev->gmc.translate_further) @@ -596,22 +492,72 @@ static void gmc_v10_0_get_vm_pde(struct amdgpu_device *adev, int level, } static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev, - struct amdgpu_bo_va_mapping *mapping, + struct amdgpu_vm *vm, + struct amdgpu_bo *bo, + uint32_t vm_flags, uint64_t *flags) { - *flags &= ~AMDGPU_PTE_EXECUTABLE; - *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE; + if (vm_flags & AMDGPU_VM_PAGE_EXECUTABLE) + *flags |= AMDGPU_PTE_EXECUTABLE; + else + *flags &= ~AMDGPU_PTE_EXECUTABLE; + + switch (vm_flags & AMDGPU_VM_MTYPE_MASK) { + case AMDGPU_VM_MTYPE_DEFAULT: + case AMDGPU_VM_MTYPE_NC: + default: + *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_NC); + break; + case AMDGPU_VM_MTYPE_WC: + *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_WC); + break; + case AMDGPU_VM_MTYPE_CC: + *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_CC); + break; + case AMDGPU_VM_MTYPE_UC: + *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_UC); + break; + } - *flags &= ~AMDGPU_PTE_MTYPE_NV10_MASK; - *flags |= (mapping->flags & AMDGPU_PTE_MTYPE_NV10_MASK); + if (vm_flags & AMDGPU_VM_PAGE_NOALLOC) + *flags |= AMDGPU_PTE_NOALLOC; + else + *flags &= ~AMDGPU_PTE_NOALLOC; - if (mapping->flags & AMDGPU_PTE_PRT) { + if (vm_flags & AMDGPU_VM_PAGE_PRT) { *flags |= AMDGPU_PTE_PRT; *flags |= AMDGPU_PTE_SNOOPED; *flags |= AMDGPU_PTE_LOG; *flags |= AMDGPU_PTE_SYSTEM; *flags &= ~AMDGPU_PTE_VALID; } + + if (bo && bo->flags & (AMDGPU_GEM_CREATE_COHERENT | + AMDGPU_GEM_CREATE_EXT_COHERENT | + AMDGPU_GEM_CREATE_UNCACHED)) + *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_UC); +} + +static unsigned int gmc_v10_0_get_vbios_fb_size(struct amdgpu_device *adev) +{ + u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL); + unsigned int size; + + if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) { + size = AMDGPU_VBIOS_VGA_ALLOCATION; + } else { + u32 viewport; + u32 pitch; + + viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION); + pitch = RREG32_SOC15(DCE, 0, mmHUBPREQ0_DCSURF_SURFACE_PITCH); + size = (REG_GET_FIELD(viewport, + HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) * + REG_GET_FIELD(pitch, HUBPREQ0_DCSURF_SURFACE_PITCH, PITCH) * + 4); + } + + return size; } static const struct amdgpu_gmc_funcs gmc_v10_0_gmc_funcs = { @@ -619,9 +565,9 @@ static const struct amdgpu_gmc_funcs gmc_v10_0_gmc_funcs = { .flush_gpu_tlb_pasid = gmc_v10_0_flush_gpu_tlb_pasid, .emit_flush_gpu_tlb = gmc_v10_0_emit_flush_gpu_tlb, .emit_pasid_mapping = gmc_v10_0_emit_pasid_mapping, - .map_mtype = gmc_v10_0_map_mtype, .get_vm_pde = gmc_v10_0_get_vm_pde, - .get_vm_pte = gmc_v10_0_get_vm_pte + .get_vm_pte = gmc_v10_0_get_vm_pte, + .get_vbios_fb_size = gmc_v10_0_get_vbios_fb_size, }; static void gmc_v10_0_set_gmc_funcs(struct amdgpu_device *adev) @@ -630,12 +576,66 @@ static void gmc_v10_0_set_gmc_funcs(struct amdgpu_device *adev) adev->gmc.gmc_funcs = &gmc_v10_0_gmc_funcs; } -static int gmc_v10_0_early_init(void *handle) +static void gmc_v10_0_set_umc_funcs(struct amdgpu_device *adev) +{ + switch (amdgpu_ip_version(adev, UMC_HWIP, 0)) { + case IP_VERSION(8, 7, 0): + adev->umc.max_ras_err_cnt_per_query = UMC_V8_7_TOTAL_CHANNEL_NUM; + adev->umc.channel_inst_num = UMC_V8_7_CHANNEL_INSTANCE_NUM; + adev->umc.umc_inst_num = UMC_V8_7_UMC_INSTANCE_NUM; + adev->umc.channel_offs = UMC_V8_7_PER_CHANNEL_OFFSET_SIENNA; + adev->umc.retire_unit = 1; + adev->umc.channel_idx_tbl = &umc_v8_7_channel_idx_tbl[0][0]; + adev->umc.ras = &umc_v8_7_ras; + break; + default: + break; + } +} + +static void gmc_v10_0_set_mmhub_funcs(struct amdgpu_device *adev) +{ + switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { + case IP_VERSION(2, 3, 0): + case IP_VERSION(2, 4, 0): + case IP_VERSION(2, 4, 1): + adev->mmhub.funcs = &mmhub_v2_3_funcs; + break; + default: + adev->mmhub.funcs = &mmhub_v2_0_funcs; + break; + } +} + +static void gmc_v10_0_set_gfxhub_funcs(struct amdgpu_device *adev) +{ + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(10, 3, 0): + case IP_VERSION(10, 3, 2): + case IP_VERSION(10, 3, 1): + case IP_VERSION(10, 3, 4): + case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): + case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 7): + adev->gfxhub.funcs = &gfxhub_v2_1_funcs; + break; + default: + adev->gfxhub.funcs = &gfxhub_v2_0_funcs; + break; + } +} + + +static int gmc_v10_0_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; + gmc_v10_0_set_mmhub_funcs(adev); + gmc_v10_0_set_gfxhub_funcs(adev); gmc_v10_0_set_gmc_funcs(adev); gmc_v10_0_set_irq_funcs(adev); + gmc_v10_0_set_umc_funcs(adev); adev->gmc.shared_aperture_start = 0x2000000000000000ULL; adev->gmc.shared_aperture_end = @@ -643,21 +643,24 @@ static int gmc_v10_0_early_init(void *handle) adev->gmc.private_aperture_start = 0x1000000000000000ULL; adev->gmc.private_aperture_end = adev->gmc.private_aperture_start + (4ULL << 30) - 1; + adev->gmc.noretry_flags = AMDGPU_VM_NORETRY_FLAGS_TF; return 0; } -static int gmc_v10_0_late_init(void *handle) +static int gmc_v10_0_late_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int r; - amdgpu_bo_late_init(adev); - r = amdgpu_gmc_allocate_vm_inv_eng(adev); if (r) return r; + r = amdgpu_gmc_ras_late_init(adev); + if (r) + return r; + return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0); } @@ -666,13 +669,23 @@ static void gmc_v10_0_vram_gtt_location(struct amdgpu_device *adev, { u64 base = 0; - base = gfxhub_v2_0_get_fb_location(adev); + base = adev->gfxhub.funcs->get_fb_location(adev); + /* add the xgmi offset of the physical node */ + base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size; + + amdgpu_gmc_set_agp_default(adev, mc); amdgpu_gmc_vram_location(adev, &adev->gmc, base); - amdgpu_gmc_gart_location(adev, mc); + amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT); + if (!amdgpu_sriov_vf(adev) && (amdgpu_agp == 1)) + amdgpu_gmc_agp_location(adev, mc); /* base offset of vram pages */ - adev->vm_manager.vram_base_offset = gfxhub_v2_0_get_mc_fb_offset(adev); + adev->vm_manager.vram_base_offset = adev->gfxhub.funcs->get_mc_fb_offset(adev); + + /* add the xgmi offset of the physical node */ + adev->vm_manager.vram_base_offset += + adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size; } /** @@ -701,23 +714,31 @@ static int gmc_v10_0_mc_init(struct amdgpu_device *adev) adev->gmc.aper_base = pci_resource_start(adev->pdev, 0); adev->gmc.aper_size = pci_resource_len(adev->pdev, 0); - /* In case the PCI BAR is larger than the actual amount of vram */ +#ifdef CONFIG_X86_64 + if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) { + adev->gmc.aper_base = adev->gfxhub.funcs->get_mc_fb_offset(adev); + adev->gmc.aper_size = adev->gmc.real_vram_size; + } +#endif + adev->gmc.visible_vram_size = adev->gmc.aper_size; - if (adev->gmc.visible_vram_size > adev->gmc.real_vram_size) - adev->gmc.visible_vram_size = adev->gmc.real_vram_size; /* set the gart size */ if (amdgpu_gart_size == -1) { - switch (adev->asic_type) { - case CHIP_NAVI10: - case CHIP_NAVI14: - case CHIP_NAVI12: + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { default: adev->gmc.gart_size = 512ULL << 20; break; + case IP_VERSION(10, 3, 1): /* DCE SG support */ + case IP_VERSION(10, 3, 3): /* DCE SG support */ + case IP_VERSION(10, 3, 6): /* DCE SG support */ + case IP_VERSION(10, 3, 7): /* DCE SG support */ + adev->gmc.gart_size = 1024ULL << 20; + break; } - } else + } else { adev->gmc.gart_size = (u64)amdgpu_gart_size << 20; + } gmc_v10_0_vram_gtt_location(adev, &adev->gmc); @@ -739,66 +760,72 @@ static int gmc_v10_0_gart_init(struct amdgpu_device *adev) return r; adev->gart.table_size = adev->gart.num_gpu_pages * 8; - adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_NV10(MTYPE_UC) | + adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_UC) | AMDGPU_PTE_EXECUTABLE; return amdgpu_gart_table_vram_alloc(adev); } -static unsigned gmc_v10_0_get_vbios_fb_size(struct amdgpu_device *adev) -{ - u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL); - unsigned size; - - if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) { - size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */ - } else { - u32 viewport; - u32 pitch; - - viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION); - pitch = RREG32_SOC15(DCE, 0, mmHUBPREQ0_DCSURF_SURFACE_PITCH); - size = (REG_GET_FIELD(viewport, - HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) * - REG_GET_FIELD(pitch, HUBPREQ0_DCSURF_SURFACE_PITCH, PITCH) * - 4); - } - /* return 0 if the pre-OS buffer uses up most of vram */ - if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024)) { - DRM_ERROR("Warning: pre-OS buffer uses most of vram, \ - be aware of gart table overwrite\n"); - return 0; - } - - return size; -} - - - -static int gmc_v10_0_sw_init(void *handle) +static int gmc_v10_0_sw_init(struct amdgpu_ip_block *ip_block) { int r, vram_width = 0, vram_type = 0, vram_vendor = 0; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; + + adev->gfxhub.funcs->init(adev); - gfxhub_v2_0_init(adev); - mmhub_v2_0_init(adev); + adev->mmhub.funcs->init(adev); spin_lock_init(&adev->gmc.invalidate_lock); - r = amdgpu_atomfirmware_get_vram_info(adev, - &vram_width, &vram_type, &vram_vendor); - if (!amdgpu_emu_mode) - adev->gmc.vram_width = vram_width; - else + if ((adev->flags & AMD_IS_APU) && amdgpu_emu_mode == 1) { + adev->gmc.vram_type = AMDGPU_VRAM_TYPE_DDR4; + adev->gmc.vram_width = 64; + } else if (amdgpu_emu_mode == 1) { + adev->gmc.vram_type = AMDGPU_VRAM_TYPE_GDDR6; adev->gmc.vram_width = 1 * 128; /* numchan * chansize */ + } else { + r = amdgpu_atomfirmware_get_vram_info(adev, + &vram_width, &vram_type, &vram_vendor); + adev->gmc.vram_width = vram_width; + + adev->gmc.vram_type = vram_type; + adev->gmc.vram_vendor = vram_vendor; + } + + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(10, 3, 0): + adev->gmc.mall_size = 128 * 1024 * 1024; + break; + case IP_VERSION(10, 3, 2): + adev->gmc.mall_size = 96 * 1024 * 1024; + break; + case IP_VERSION(10, 3, 4): + adev->gmc.mall_size = 32 * 1024 * 1024; + break; + case IP_VERSION(10, 3, 5): + adev->gmc.mall_size = 16 * 1024 * 1024; + break; + default: + adev->gmc.mall_size = 0; + break; + } - adev->gmc.vram_type = vram_type; - adev->gmc.vram_vendor = vram_vendor; - switch (adev->asic_type) { - case CHIP_NAVI10: - case CHIP_NAVI14: - case CHIP_NAVI12: - adev->num_vmhubs = 2; + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(10, 1, 10): + case IP_VERSION(10, 1, 1): + case IP_VERSION(10, 1, 2): + case IP_VERSION(10, 1, 3): + case IP_VERSION(10, 1, 4): + case IP_VERSION(10, 3, 0): + case IP_VERSION(10, 3, 2): + case IP_VERSION(10, 3, 1): + case IP_VERSION(10, 3, 4): + case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): + case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 7): + set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask); + set_bit(AMDGPU_MMHUB0(0), adev->vmhubs_mask); /* * To fulfill 4-level page support, * vm size is 256TB (48bit), maximum size of Navi10/Navi14/Navi12, @@ -824,6 +851,14 @@ static int gmc_v10_0_sw_init(void *handle) if (r) return r; + if (!amdgpu_sriov_vf(adev)) { + /* interrupt sent to DF. */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DF, 0, + &adev->gmc.ecc_irq); + if (r) + return r; + } + /* * Set the internal MC address mask This is the max address of the GPU's * internal address space. @@ -832,15 +867,17 @@ static int gmc_v10_0_sw_init(void *handle) r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44)); if (r) { - printk(KERN_WARNING "amdgpu: No suitable DMA available.\n"); + dev_warn(adev->dev, "amdgpu: No suitable DMA available.\n"); return r; } + adev->need_swiotlb = drm_need_swiotlb(44); + r = gmc_v10_0_mc_init(adev); if (r) return r; - adev->gmc.stolen_size = gmc_v10_0_get_vbios_fb_size(adev); + amdgpu_gmc_get_vbios_allocations(adev); /* Memory manager */ r = amdgpu_bo_init(adev); @@ -857,16 +894,19 @@ static int gmc_v10_0_sw_init(void *handle) * amdgpu graphics/compute will use VMIDs 1-7 * amdkfd will use VMIDs 8-15 */ - adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS; - adev->vm_manager.id_mgr[AMDGPU_MMHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS; + adev->vm_manager.first_kfd_vmid = 8; amdgpu_vm_manager_init(adev); + r = amdgpu_gmc_ras_sw_init(adev); + if (r) + return r; + return 0; } /** - * gmc_v8_0_gart_fini - vm fini callback + * gmc_v10_0_gart_fini - vm fini callback * * @adev: amdgpu_device pointer * @@ -875,12 +915,11 @@ static int gmc_v10_0_sw_init(void *handle) static void gmc_v10_0_gart_fini(struct amdgpu_device *adev) { amdgpu_gart_table_vram_free(adev); - amdgpu_gart_fini(adev); } -static int gmc_v10_0_sw_fini(void *handle) +static int gmc_v10_0_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; amdgpu_vm_manager_fini(adev); gmc_v10_0_gart_fini(adev); @@ -892,14 +931,6 @@ static int gmc_v10_0_sw_fini(void *handle) static void gmc_v10_0_init_golden_registers(struct amdgpu_device *adev) { - switch (adev->asic_type) { - case CHIP_NAVI10: - case CHIP_NAVI14: - case CHIP_NAVI12: - break; - default: - break; - } } /** @@ -911,64 +942,75 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev) { int r; bool value; - u32 tmp; if (adev->gart.bo == NULL) { dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); return -EINVAL; } - r = amdgpu_gart_table_vram_pin(adev); - if (r) - return r; + amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr); - r = gfxhub_v2_0_gart_enable(adev); - if (r) - return r; + if (!adev->in_s0ix) { + r = adev->gfxhub.funcs->gart_enable(adev); + if (r) + return r; + } - r = mmhub_v2_0_gart_enable(adev); + r = adev->mmhub.funcs->gart_enable(adev); if (r) return r; - tmp = RREG32_SOC15(HDP, 0, mmHDP_MISC_CNTL); - tmp |= HDP_MISC_CNTL__FLUSH_INVALIDATE_CACHE_MASK; - WREG32_SOC15(HDP, 0, mmHDP_MISC_CNTL, tmp); - - tmp = RREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL); - WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp); + adev->hdp.funcs->init_registers(adev); /* Flush HDP after it is initialized */ - adev->nbio.funcs->hdp_flush(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); - value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? - false : true; + value = amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS; - gfxhub_v2_0_set_fault_enable_default(adev, value); - mmhub_v2_0_set_fault_enable_default(adev, value); - gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB_0, 0); - gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB_0, 0); + if (!adev->in_s0ix) + adev->gfxhub.funcs->set_fault_enable_default(adev, value); + adev->mmhub.funcs->set_fault_enable_default(adev, value); + gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB0(0), 0); + if (!adev->in_s0ix) + gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB(0), 0); DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", - (unsigned)(adev->gmc.gart_size >> 20), + (unsigned int)(adev->gmc.gart_size >> 20), (unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo)); - adev->gart.ready = true; - return 0; } -static int gmc_v10_0_hw_init(void *handle) +static int gmc_v10_0_hw_init(struct amdgpu_ip_block *ip_block) { + struct amdgpu_device *adev = ip_block->adev; int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + adev->gmc.flush_pasid_uses_kiq = !amdgpu_emu_mode; /* The sequence of these two function calls matters.*/ gmc_v10_0_init_golden_registers(adev); + /* + * harvestable groups in gc_utcl2 need to be programmed before any GFX block + * register setup within GMC, or else system hang when harvesting SA. + */ + if (!adev->in_s0ix && adev->gfxhub.funcs && adev->gfxhub.funcs->utcl2_harvest) + adev->gfxhub.funcs->utcl2_harvest(adev); + r = gmc_v10_0_gart_enable(adev); if (r) return r; + if (amdgpu_emu_mode == 1) { + r = amdgpu_gmc_vram_checking(adev); + if (r) + return r; + } + + if (adev->umc.funcs && adev->umc.funcs->init_registers) + adev->umc.funcs->init_registers(adev); + return 0; } @@ -981,14 +1023,16 @@ static int gmc_v10_0_hw_init(void *handle) */ static void gmc_v10_0_gart_disable(struct amdgpu_device *adev) { - gfxhub_v2_0_gart_disable(adev); - mmhub_v2_0_gart_disable(adev); - amdgpu_gart_table_vram_unpin(adev); + if (!adev->in_s0ix) + adev->gfxhub.funcs->gart_disable(adev); + adev->mmhub.funcs->gart_disable(adev); } -static int gmc_v10_0_hw_fini(void *handle) +static int gmc_v10_0_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; + + gmc_v10_0_gart_disable(adev); if (amdgpu_sriov_vf(adev)) { /* full access mode, so don't touch any GMC register */ @@ -997,74 +1041,90 @@ static int gmc_v10_0_hw_fini(void *handle) } amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0); - gmc_v10_0_gart_disable(adev); + + if (adev->gmc.ecc_irq.funcs && + amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC)) + amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0); return 0; } -static int gmc_v10_0_suspend(void *handle) +static int gmc_v10_0_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - gmc_v10_0_hw_fini(adev); + gmc_v10_0_hw_fini(ip_block); return 0; } -static int gmc_v10_0_resume(void *handle) +static int gmc_v10_0_resume(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = gmc_v10_0_hw_init(adev); + r = gmc_v10_0_hw_init(ip_block); if (r) return r; - amdgpu_vmid_reset_all(adev); + amdgpu_vmid_reset_all(ip_block->adev); return 0; } -static bool gmc_v10_0_is_idle(void *handle) +static bool gmc_v10_0_is_idle(struct amdgpu_ip_block *ip_block) { /* MC is always ready in GMC v10.*/ return true; } -static int gmc_v10_0_wait_for_idle(void *handle) +static int gmc_v10_0_wait_for_idle(struct amdgpu_ip_block *ip_block) { /* There is no need to wait for MC idle in GMC v10.*/ return 0; } -static int gmc_v10_0_soft_reset(void *handle) -{ - return 0; -} - -static int gmc_v10_0_set_clockgating_state(void *handle, +static int gmc_v10_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; - r = mmhub_v2_0_set_clockgating(adev, state); + /* + * The issue mmhub can't disconnect from DF with MMHUB clock gating being disabled + * is a new problem observed at DF 3.0.3, however with the same suspend sequence not + * seen any issue on the DF 3.0.2 series platform. + */ + if (adev->in_s0ix && + amdgpu_ip_version(adev, DF_HWIP, 0) > IP_VERSION(3, 0, 2)) { + dev_dbg(adev->dev, "keep mmhub clock gating being enabled for s0ix\n"); + return 0; + } + + r = adev->mmhub.funcs->set_clockgating(adev, state); if (r) return r; - return athub_v2_0_set_clockgating(adev, state); + if (amdgpu_ip_version(adev, ATHUB_HWIP, 0) >= IP_VERSION(2, 1, 0)) + return athub_v2_1_set_clockgating(adev, state); + else + return athub_v2_0_set_clockgating(adev, state); } -static void gmc_v10_0_get_clockgating_state(void *handle, u32 *flags) +static void gmc_v10_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; - mmhub_v2_0_get_clockgating(adev, flags); + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 3) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 4)) + return; + + adev->mmhub.funcs->get_clockgating(adev, flags); - athub_v2_0_get_clockgating(adev, flags); + if (amdgpu_ip_version(adev, ATHUB_HWIP, 0) >= IP_VERSION(2, 1, 0)) + athub_v2_1_get_clockgating(adev, flags); + else + athub_v2_0_get_clockgating(adev, flags); } -static int gmc_v10_0_set_powergating_state(void *handle, +static int gmc_v10_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; @@ -1082,14 +1142,12 @@ const struct amd_ip_funcs gmc_v10_0_ip_funcs = { .resume = gmc_v10_0_resume, .is_idle = gmc_v10_0_is_idle, .wait_for_idle = gmc_v10_0_wait_for_idle, - .soft_reset = gmc_v10_0_soft_reset, .set_clockgating_state = gmc_v10_0_set_clockgating_state, .set_powergating_state = gmc_v10_0_set_powergating_state, .get_clockgating_state = gmc_v10_0_get_clockgating_state, }; -const struct amdgpu_ip_block_version gmc_v10_0_ip_block = -{ +const struct amdgpu_ip_block_version gmc_v10_0_ip_block = { .type = AMD_IP_BLOCK_TYPE_GMC, .major = 10, .minor = 0, |
