diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 895 |
1 files changed, 386 insertions, 509 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index f9a5a2c0573e..8ad7519f7b58 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -56,6 +56,7 @@ #include "umc_v6_1.h" #include "umc_v6_0.h" #include "umc_v6_7.h" +#include "umc_v12_0.h" #include "hdp_v4_0.h" #include "mca_v3_0.h" @@ -64,8 +65,6 @@ #include "amdgpu_ras.h" #include "amdgpu_xgmi.h" -#include "amdgpu_reset.h" - /* add these here since we already include dce12 headers and these are for DCN */ #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION 0x055d #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_BASE_IDX 2 @@ -79,8 +78,6 @@ #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_DCN2 0x05ea #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_DCN2_BASE_IDX 2 -#define MAX_MEM_RANGES 8 - static const char * const gfxhub_client_ids[] = { "CB", "DB", @@ -497,14 +494,14 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev, if (j >= AMDGPU_MMHUB0(0)) tmp = RREG32_SOC15_IP(MMHUB, reg); else - tmp = RREG32_SOC15_IP(GC, reg); + tmp = RREG32_XCC(reg, j); tmp &= ~bits; if (j >= AMDGPU_MMHUB0(0)) WREG32_SOC15_IP(MMHUB, reg, tmp); else - WREG32_SOC15_IP(GC, reg, tmp); + WREG32_XCC(reg, tmp, j); } } break; @@ -525,14 +522,14 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev, if (j >= AMDGPU_MMHUB0(0)) tmp = RREG32_SOC15_IP(MMHUB, reg); else - tmp = RREG32_SOC15_IP(GC, reg); + tmp = RREG32_XCC(reg, j); tmp |= bits; if (j >= AMDGPU_MMHUB0(0)) WREG32_SOC15_IP(MMHUB, reg, tmp); else - WREG32_SOC15_IP(GC, reg, tmp); + WREG32_XCC(reg, tmp, j); } } break; @@ -547,13 +544,16 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - bool retry_fault = !!(entry->src_data[1] & 0x80); - bool write_fault = !!(entry->src_data[1] & 0x20); - uint32_t status = 0, cid = 0, rw = 0; - struct amdgpu_task_info task_info; + bool retry_fault = !!(entry->src_data[1] & + AMDGPU_GMC9_FAULT_SOURCE_DATA_RETRY); + bool write_fault = !!(entry->src_data[1] & + AMDGPU_GMC9_FAULT_SOURCE_DATA_WRITE); + uint32_t status = 0, cid = 0, rw = 0, fed = 0; + struct amdgpu_task_info *task_info; struct amdgpu_vmhub *hub; const char *mmhub_cid; const char *hub_name; + unsigned int vmhub; u64 addr; uint32_t cam_index = 0; int ret, xcc_id = 0; @@ -566,10 +566,10 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, if (entry->client_id == SOC15_IH_CLIENTID_VMC) { hub_name = "mmhub0"; - hub = &adev->vmhub[AMDGPU_MMHUB0(node_id / 4)]; + vmhub = AMDGPU_MMHUB0(node_id / 4); } else if (entry->client_id == SOC15_IH_CLIENTID_VMC1) { hub_name = "mmhub1"; - hub = &adev->vmhub[AMDGPU_MMHUB1(0)]; + vmhub = AMDGPU_MMHUB1(0); } else { hub_name = "gfxhub0"; if (adev->gfx.funcs->ih_node_to_logical_xcc) { @@ -578,8 +578,9 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, if (xcc_id < 0) xcc_id = 0; } - hub = &adev->vmhub[xcc_id]; + vmhub = xcc_id; } + hub = &adev->vmhub[vmhub]; if (retry_fault) { if (adev->irq.retry_cam_enabled) { @@ -594,7 +595,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, cam_index = entry->src_data[2] & 0x3ff; ret = amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id, - addr, write_fault); + addr, entry->timestamp, write_fault); WDOORBELL32(adev->irq.retry_cam_doorbell_index, cam_index); if (ret) return 1; @@ -617,29 +618,33 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, * tables */ if (amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id, - addr, write_fault)) + addr, entry->timestamp, write_fault)) return 1; } } + if (kgd2kfd_vmfault_fast_path(adev, entry, retry_fault)) + return 1; + if (!printk_ratelimit()) return 0; - - memset(&task_info, 0, sizeof(struct amdgpu_task_info)); - amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); - dev_err(adev->dev, - "[%s] %s page fault (src_id:%u ring:%u vmid:%u pasid:%u, for process %s pid %d thread %s pid %d)\n", - hub_name, retry_fault ? "retry" : "no-retry", - entry->src_id, entry->ring_id, entry->vmid, - entry->pasid, task_info.process_name, task_info.tgid, - task_info.task_name, task_info.pid); + "[%s] %s page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n", hub_name, + retry_fault ? "retry" : "no-retry", + entry->src_id, entry->ring_id, entry->vmid, entry->pasid); + + task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid); + if (task_info) { + amdgpu_vm_print_task_info(adev, task_info); + amdgpu_vm_put_task_info(task_info); + } + dev_err(adev->dev, " in page starting at address 0x%016llx from IH client 0x%x (%s)\n", addr, entry->client_id, soc15_ih_clientid_name[entry->client_id]); - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) + if (amdgpu_is_multi_aid(adev)) dev_err(adev->dev, " cookie node_id %d fault from die %s%d%s\n", node_id, node_id % 4 == 3 ? "RSV" : "AID", node_id / 4, node_id % 4 == 1 ? ".XCD0" : node_id % 4 == 2 ? ".XCD1" : ""); @@ -653,13 +658,29 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, * the new fast GRBM interface. */ if ((entry->vmid_src == AMDGPU_GFXHUB(0)) && - (adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 4, 2))) + (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(9, 4, 2))) RREG32(hub->vm_l2_pro_fault_status); status = RREG32(hub->vm_l2_pro_fault_status); cid = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, CID); rw = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, RW); - WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); + fed = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED); + + /* for fed error, kfd will handle it, return directly */ + if (fed && amdgpu_ras_is_poison_mode_supported(adev) && + (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 2))) + return 0; + + /* Only print L2 fault status if the status register could be read and + * contains useful information + */ + if (!status) + return 0; + + if (!amdgpu_sriov_vf(adev)) + WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); + + amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status, vmhub); dev_err(adev->dev, "VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", @@ -670,7 +691,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, gfxhub_client_ids[cid], cid); } else { - switch (adev->ip_versions[MMHUB_HWIP][0]) { + switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { case IP_VERSION(9, 0, 0): mmhub_cid = mmhub_client_ids_vega10[cid][rw]; break; @@ -735,7 +756,8 @@ static void gmc_v9_0_set_irq_funcs(struct amdgpu_device *adev) adev->gmc.vm_fault.funcs = &gmc_v9_0_irq_funcs; if (!amdgpu_sriov_vf(adev) && - !adev->gmc.xgmi.connected_to_cpu) { + !adev->gmc.xgmi.connected_to_cpu && + !adev->gmc.is_app_apu) { adev->gmc.ecc_irq.num_types = 1; adev->gmc.ecc_irq.funcs = &gmc_v9_0_ecc_funcs; } @@ -770,8 +792,8 @@ static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid, static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev, uint32_t vmhub) { - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) || - adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) || + amdgpu_is_multi_aid(adev)) return false; return ((vmhub == AMDGPU_MMHUB0(0) || @@ -814,48 +836,37 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, uint32_t vmhub, uint32_t flush_type) { bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub); + u32 j, inv_req, tmp, sem, req, ack, inst; const unsigned int eng = 17; - u32 j, inv_req, inv_req2, tmp; struct amdgpu_vmhub *hub; BUG_ON(vmhub >= AMDGPU_MAX_VMHUBS); hub = &adev->vmhub[vmhub]; - if (adev->gmc.xgmi.num_physical_nodes && - adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0)) { - /* Vega20+XGMI caches PTEs in TC and TLB. Add a - * heavy-weight TLB flush (type 2), which flushes - * both. Due to a race condition with concurrent - * memory accesses using the same TLB cache line, we - * still need a second TLB flush after this. - */ - inv_req = gmc_v9_0_get_invalidate_req(vmid, 2); - inv_req2 = gmc_v9_0_get_invalidate_req(vmid, flush_type); - } else if (flush_type == 2 && - adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3) && - adev->rev_id == 0) { - inv_req = gmc_v9_0_get_invalidate_req(vmid, 0); - inv_req2 = gmc_v9_0_get_invalidate_req(vmid, flush_type); - } else { - inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type); - inv_req2 = 0; - } + inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type); + sem = hub->vm_inv_eng0_sem + hub->eng_distance * eng; + req = hub->vm_inv_eng0_req + hub->eng_distance * eng; + ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; + + if (vmhub >= AMDGPU_MMHUB0(0)) + inst = 0; + else + inst = vmhub; - /* This is necessary for a HW workaround under SRIOV as well - * as GFXOFF under bare metal + /* This is necessary for SRIOV as well as for GFXOFF to function + * properly under bare metal */ - if (adev->gfx.kiq[0].ring.sched.ready && - (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) && - down_read_trylock(&adev->reset_domain->sem)) { + if (adev->gfx.kiq[inst].ring.sched.ready && + (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) { uint32_t req = hub->vm_inv_eng0_req + hub->eng_distance * eng; uint32_t ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; - amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req, - 1 << vmid); - up_read(&adev->reset_domain->sem); + amdgpu_gmc_fw_reg_write_reg_wait(adev, req, ack, inv_req, + 1 << vmid, inst); return; } + /* This path is needed before KIQ/MES/GFXOFF are set up */ spin_lock(&adev->gmc.invalidate_lock); /* @@ -870,9 +881,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, for (j = 0; j < adev->usec_timeout; j++) { /* a read return value of 1 means semaphore acquire */ if (vmhub >= AMDGPU_MMHUB0(0)) - tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_sem + hub->eng_distance * eng); + tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, sem, GET_INST(GC, inst)); else - tmp = RREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_sem + hub->eng_distance * eng); + tmp = RREG32_SOC15_IP_NO_KIQ(GC, sem, GET_INST(GC, inst)); if (tmp & 0x1) break; udelay(1); @@ -882,35 +893,29 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n"); } - do { - if (vmhub >= AMDGPU_MMHUB0(0)) - WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req); - else - WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req); - - /* - * Issue a dummy read to wait for the ACK register to - * be cleared to avoid a false ACK due to the new fast - * GRBM interface. - */ - if ((vmhub == AMDGPU_GFXHUB(0)) && - (adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 4, 2))) - RREG32_NO_KIQ(hub->vm_inv_eng0_req + - hub->eng_distance * eng); + if (vmhub >= AMDGPU_MMHUB0(0)) + WREG32_SOC15_IP_NO_KIQ(MMHUB, req, inv_req, GET_INST(GC, inst)); + else + WREG32_SOC15_IP_NO_KIQ(GC, req, inv_req, GET_INST(GC, inst)); - for (j = 0; j < adev->usec_timeout; j++) { - if (vmhub >= AMDGPU_MMHUB0(0)) - tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_ack + hub->eng_distance * eng); - else - tmp = RREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_ack + hub->eng_distance * eng); - if (tmp & (1 << vmid)) - break; - udelay(1); - } + /* + * Issue a dummy read to wait for the ACK register to + * be cleared to avoid a false ACK due to the new fast + * GRBM interface. + */ + if ((vmhub == AMDGPU_GFXHUB(0)) && + (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(9, 4, 2))) + RREG32_NO_KIQ(req); - inv_req = inv_req2; - inv_req2 = 0; - } while (inv_req); + for (j = 0; j < adev->usec_timeout; j++) { + if (vmhub >= AMDGPU_MMHUB0(0)) + tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, ack, GET_INST(GC, inst)); + else + tmp = RREG32_SOC15_IP_NO_KIQ(GC, ack, GET_INST(GC, inst)); + if (tmp & (1 << vmid)) + break; + udelay(1); + } /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ if (use_semaphore) { @@ -919,9 +924,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, * write with 0 means semaphore release */ if (vmhub >= AMDGPU_MMHUB0(0)) - WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_sem + hub->eng_distance * eng, 0); + WREG32_SOC15_IP_NO_KIQ(MMHUB, sem, 0, GET_INST(GC, inst)); else - WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_sem + hub->eng_distance * eng, 0); + WREG32_SOC15_IP_NO_KIQ(GC, sem, 0, GET_INST(GC, inst)); } spin_unlock(&adev->gmc.invalidate_lock); @@ -943,91 +948,32 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, * * Flush the TLB for the requested pasid. */ -static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, - uint16_t pasid, uint32_t flush_type, - bool all_hub, uint32_t inst) +static void gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, + uint16_t pasid, uint32_t flush_type, + bool all_hub, uint32_t inst) { - int vmid, i; - signed long r; - uint32_t seq; - uint16_t queried_pasid; - bool ret; - u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout; - struct amdgpu_ring *ring = &adev->gfx.kiq[inst].ring; - struct amdgpu_kiq *kiq = &adev->gfx.kiq[inst]; - - if (amdgpu_in_reset(adev)) - return -EIO; - - if (ring->sched.ready && down_read_trylock(&adev->reset_domain->sem)) { - /* Vega20+XGMI caches PTEs in TC and TLB. Add a - * heavy-weight TLB flush (type 2), which flushes - * both. Due to a race condition with concurrent - * memory accesses using the same TLB cache line, we - * still need a second TLB flush after this. - */ - bool vega20_xgmi_wa = (adev->gmc.xgmi.num_physical_nodes && - adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0)); - /* 2 dwords flush + 8 dwords fence */ - unsigned int ndw = kiq->pmf->invalidate_tlbs_size + 8; - - if (vega20_xgmi_wa) - ndw += kiq->pmf->invalidate_tlbs_size; - - spin_lock(&adev->gfx.kiq[inst].ring_lock); - /* 2 dwords flush + 8 dwords fence */ - amdgpu_ring_alloc(ring, ndw); - if (vega20_xgmi_wa) - kiq->pmf->kiq_invalidate_tlbs(ring, - pasid, 2, all_hub); - - if (flush_type == 2 && - adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3) && - adev->rev_id == 0) - kiq->pmf->kiq_invalidate_tlbs(ring, - pasid, 0, all_hub); - - kiq->pmf->kiq_invalidate_tlbs(ring, - pasid, flush_type, all_hub); - r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); - if (r) { - amdgpu_ring_undo(ring); - spin_unlock(&adev->gfx.kiq[inst].ring_lock); - up_read(&adev->reset_domain->sem); - return -ETIME; - } - - amdgpu_ring_commit(ring); - spin_unlock(&adev->gfx.kiq[inst].ring_lock); - r = amdgpu_fence_wait_polling(ring, seq, usec_timeout); - if (r < 1) { - dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r); - up_read(&adev->reset_domain->sem); - return -ETIME; - } - up_read(&adev->reset_domain->sem); - return 0; - } + uint16_t queried; + int i, vmid; for (vmid = 1; vmid < 16; vmid++) { + bool valid; - ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid, - &queried_pasid); - if (ret && queried_pasid == pasid) { - if (all_hub) { - for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) - gmc_v9_0_flush_gpu_tlb(adev, vmid, - i, flush_type); - } else { - gmc_v9_0_flush_gpu_tlb(adev, vmid, - AMDGPU_GFXHUB(0), flush_type); - } - break; + valid = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid, + &queried); + if (!valid || queried != pasid) + continue; + + if (all_hub) { + for_each_set_bit(i, adev->vmhubs_mask, + AMDGPU_MAX_VMHUBS) + gmc_v9_0_flush_gpu_tlb(adev, vmid, i, + flush_type); + } else { + gmc_v9_0_flush_gpu_tlb(adev, vmid, + AMDGPU_GFXHUB(0), + flush_type); } } - - return 0; - } static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, @@ -1129,27 +1075,6 @@ static void gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned int v * 0 valid */ -static uint64_t gmc_v9_0_map_mtype(struct amdgpu_device *adev, uint32_t flags) - -{ - switch (flags) { - case AMDGPU_VM_MTYPE_DEFAULT: - return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC); - case AMDGPU_VM_MTYPE_NC: - return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC); - case AMDGPU_VM_MTYPE_WC: - return AMDGPU_PTE_MTYPE_VG10(MTYPE_WC); - case AMDGPU_VM_MTYPE_RW: - return AMDGPU_PTE_MTYPE_VG10(MTYPE_RW); - case AMDGPU_VM_MTYPE_CC: - return AMDGPU_PTE_MTYPE_VG10(MTYPE_CC); - case AMDGPU_VM_MTYPE_UC: - return AMDGPU_PTE_MTYPE_VG10(MTYPE_UC); - default: - return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC); - } -} - static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level, uint64_t *addr, uint64_t *flags) { @@ -1177,20 +1102,26 @@ static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level, } static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, + struct amdgpu_vm *vm, struct amdgpu_bo *bo, - struct amdgpu_bo_va_mapping *mapping, + uint32_t vm_flags, uint64_t *flags) { struct amdgpu_device *bo_adev = amdgpu_ttm_adev(bo->tbo.bdev); - bool is_vram = bo->tbo.resource->mem_type == TTM_PL_VRAM; - bool coherent = bo->flags & AMDGPU_GEM_CREATE_COHERENT; + bool is_vram = bo->tbo.resource && + bo->tbo.resource->mem_type == TTM_PL_VRAM; + bool coherent = bo->flags & (AMDGPU_GEM_CREATE_COHERENT | + AMDGPU_GEM_CREATE_EXT_COHERENT); + bool ext_coherent = bo->flags & AMDGPU_GEM_CREATE_EXT_COHERENT; bool uncached = bo->flags & AMDGPU_GEM_CREATE_UNCACHED; - struct amdgpu_vm *vm = mapping->bo_va->base.vm; unsigned int mtype_local, mtype; + uint32_t gc_ip_version = amdgpu_ip_version(adev, GC_HWIP, 0); bool snoop = false; bool is_local; - switch (adev->ip_versions[GC_HWIP][0]) { + dma_resv_assert_held(bo->tbo.base.resv); + + switch (gc_ip_version) { case IP_VERSION(9, 4, 1): case IP_VERSION(9, 4, 2): if (is_vram) { @@ -1204,8 +1135,7 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, /* FIXME: is this still needed? Or does * amdgpu_ttm_tt_pde_flags already handle this? */ - if ((adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) || - adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) && + if (gc_ip_version == IP_VERSION(9, 4, 2) && adev->gmc.xgmi.connected_to_cpu) snoop = true; } else { @@ -1213,7 +1143,7 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, mtype = MTYPE_UC; else mtype = MTYPE_NC; - if (mapping->bo_va->is_xgmi) + if (amdgpu_xgmi_same_hive(adev, bo_adev)) snoop = true; } } else { @@ -1228,6 +1158,8 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, } break; case IP_VERSION(9, 4, 3): + case IP_VERSION(9, 4, 4): + case IP_VERSION(9, 5, 0): /* Only local VRAM BOs or system memory on non-NUMA APUs * can be assumed to be local in their entirety. Choose * MTYPE_NC as safe fallback for all system memory BOs on @@ -1251,16 +1183,18 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, snoop = true; if (uncached) { mtype = MTYPE_UC; + } else if (ext_coherent) { + mtype = is_local ? MTYPE_CC : MTYPE_UC; } else if (adev->flags & AMD_IS_APU) { mtype = is_local ? mtype_local : MTYPE_NC; } else { /* dGPU */ if (is_local) mtype = mtype_local; - else if (is_vram) - mtype = MTYPE_NC; - else + else if (gc_ip_version < IP_VERSION(9, 5, 0) && !is_vram) mtype = MTYPE_UC; + else + mtype = MTYPE_NC; } break; @@ -1278,31 +1212,49 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, } if (mtype != MTYPE_NC) - *flags = (*flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) | - AMDGPU_PTE_MTYPE_VG10(mtype); + *flags = AMDGPU_PTE_MTYPE_VG10(*flags, mtype); + *flags |= snoop ? AMDGPU_PTE_SNOOPED : 0; } static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev, - struct amdgpu_bo_va_mapping *mapping, + struct amdgpu_vm *vm, + struct amdgpu_bo *bo, + uint32_t vm_flags, uint64_t *flags) { - struct amdgpu_bo *bo = mapping->bo_va->base.bo; - - *flags &= ~AMDGPU_PTE_EXECUTABLE; - *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE; + if (vm_flags & AMDGPU_VM_PAGE_EXECUTABLE) + *flags |= AMDGPU_PTE_EXECUTABLE; + else + *flags &= ~AMDGPU_PTE_EXECUTABLE; - *flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK; - *flags |= mapping->flags & AMDGPU_PTE_MTYPE_VG10_MASK; + switch (vm_flags & AMDGPU_VM_MTYPE_MASK) { + case AMDGPU_VM_MTYPE_DEFAULT: + case AMDGPU_VM_MTYPE_NC: + default: + *flags = AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_NC); + break; + case AMDGPU_VM_MTYPE_WC: + *flags |= AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_WC); + break; + case AMDGPU_VM_MTYPE_RW: + *flags |= AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_RW); + break; + case AMDGPU_VM_MTYPE_CC: + *flags |= AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_CC); + break; + case AMDGPU_VM_MTYPE_UC: + *flags |= AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_UC); + break; + } - if (mapping->flags & AMDGPU_PTE_PRT) { + if (vm_flags & AMDGPU_VM_PAGE_PRT) { *flags |= AMDGPU_PTE_PRT; *flags &= ~AMDGPU_PTE_VALID; } - if (bo && bo->tbo.resource) - gmc_v9_0_get_coherence_flags(adev, mapping->bo_va->base.bo, - mapping, flags); + if ((*flags & AMDGPU_PTE_VALID) && bo) + gmc_v9_0_get_coherence_flags(adev, vm, bo, vm_flags, flags); } static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev, @@ -1314,23 +1266,27 @@ static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev, /* Only GFX 9.4.3 APUs associate GPUs with NUMA nodes. Local system * memory can use more efficient MTYPEs. */ - if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 3)) + if (!(adev->flags & AMD_IS_APU) || + amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 3)) return; /* Only direct-mapped memory allows us to determine the NUMA node from * the DMA address. */ if (!adev->ram_is_direct_mapped) { - dev_dbg(adev->dev, "RAM is not direct mapped\n"); + dev_dbg_ratelimited(adev->dev, "RAM is not direct mapped\n"); return; } - /* Only override mappings with MTYPE_NC, which is the safe default for - * cacheable memory. + /* MTYPE_NC is the same default and can be overridden. + * MTYPE_UC will be present if the memory is extended-coherent + * and can also be overridden. */ if ((*flags & AMDGPU_PTE_MTYPE_VG10_MASK) != - AMDGPU_PTE_MTYPE_VG10(MTYPE_NC)) { - dev_dbg(adev->dev, "MTYPE is not NC\n"); + AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_NC) && + (*flags & AMDGPU_PTE_MTYPE_VG10_MASK) != + AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_UC)) { + dev_dbg_ratelimited(adev->dev, "MTYPE is not NC or UC\n"); return; } @@ -1341,7 +1297,7 @@ static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev, if (adev->gmc.is_app_apu && vm->mem_id >= 0) { local_node = adev->gmc.mem_partitions[vm->mem_id].numa.node; } else { - dev_dbg(adev->dev, "Only native mode APU is supported.\n"); + dev_dbg_ratelimited(adev->dev, "Only native mode APU is supported.\n"); return; } @@ -1349,25 +1305,31 @@ static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev, * page or NUMA nodes. */ if (!page_is_ram(addr >> PAGE_SHIFT)) { - dev_dbg(adev->dev, "Page is not RAM.\n"); + dev_dbg_ratelimited(adev->dev, "Page is not RAM.\n"); return; } nid = pfn_to_nid(addr >> PAGE_SHIFT); - dev_dbg(adev->dev, "vm->mem_id=%d, local_node=%d, nid=%d\n", - vm->mem_id, local_node, nid); + dev_dbg_ratelimited(adev->dev, "vm->mem_id=%d, local_node=%d, nid=%d\n", + vm->mem_id, local_node, nid); if (nid == local_node) { uint64_t old_flags = *flags; - unsigned int mtype_local = MTYPE_RW; + if ((*flags & AMDGPU_PTE_MTYPE_VG10_MASK) == + AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_NC)) { + unsigned int mtype_local = MTYPE_RW; - if (amdgpu_mtype_local == 1) - mtype_local = MTYPE_NC; - else if (amdgpu_mtype_local == 2) - mtype_local = MTYPE_CC; + if (amdgpu_mtype_local == 1) + mtype_local = MTYPE_NC; + else if (amdgpu_mtype_local == 2) + mtype_local = MTYPE_CC; + + *flags = AMDGPU_PTE_MTYPE_VG10(*flags, mtype_local); + } else { + /* MTYPE_UC case */ + *flags = AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_CC); + } - *flags = (*flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) | - AMDGPU_PTE_MTYPE_VG10(mtype_local); - dev_dbg(adev->dev, "flags updated from %llx to %llx\n", - old_flags, *flags); + dev_dbg_ratelimited(adev->dev, "flags updated from %llx to %llx\n", + old_flags, *flags); } } @@ -1383,7 +1345,7 @@ static unsigned int gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev) } else { u32 viewport; - switch (adev->ip_versions[DCE_HWIP][0]) { + switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) { case IP_VERSION(1, 0, 0): case IP_VERSION(1, 0, 1): viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION); @@ -1413,25 +1375,15 @@ static unsigned int gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev) return size; } -static enum amdgpu_memory_partition -gmc_v9_0_get_memory_partition(struct amdgpu_device *adev, u32 *supp_modes) +static bool gmc_v9_0_need_reset_on_init(struct amdgpu_device *adev) { - enum amdgpu_memory_partition mode = UNKNOWN_MEMORY_PARTITION_MODE; - - if (adev->nbio.funcs->get_memory_partition_mode) - mode = adev->nbio.funcs->get_memory_partition_mode(adev, - supp_modes); - - return mode; -} - -static enum amdgpu_memory_partition -gmc_v9_0_query_memory_partition(struct amdgpu_device *adev) -{ - if (amdgpu_sriov_vf(adev)) - return AMDGPU_NPS1_PARTITION_MODE; + if (adev->nbio.funcs && adev->nbio.funcs->is_nps_switch_requested && + adev->nbio.funcs->is_nps_switch_requested(adev)) { + adev->gmc.reset_flags |= AMDGPU_GMC_INIT_RESET_NPS; + return true; + } - return gmc_v9_0_get_memory_partition(adev, NULL); + return false; } static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = { @@ -1439,12 +1391,13 @@ static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = { .flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid, .emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb, .emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping, - .map_mtype = gmc_v9_0_map_mtype, .get_vm_pde = gmc_v9_0_get_vm_pde, .get_vm_pte = gmc_v9_0_get_vm_pte, .override_vm_pte_flags = gmc_v9_0_override_vm_pte_flags, .get_vbios_fb_size = gmc_v9_0_get_vbios_fb_size, - .query_mem_partition_mode = &gmc_v9_0_query_memory_partition, + .query_mem_partition_mode = &amdgpu_gmc_query_memory_partition, + .request_mem_partition_mode = &amdgpu_gmc_request_memory_partition, + .need_reset_on_init = &gmc_v9_0_need_reset_on_init, }; static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev) @@ -1454,7 +1407,7 @@ static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev) static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev) { - switch (adev->ip_versions[UMC_HWIP][0]) { + switch (amdgpu_ip_version(adev, UMC_HWIP, 0)) { case IP_VERSION(6, 0, 0): adev->umc.funcs = &umc_v6_0_funcs; break; @@ -1490,6 +1443,17 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev) else adev->umc.channel_idx_tbl = &umc_v6_7_channel_idx_tbl_second[0][0]; break; + case IP_VERSION(12, 0, 0): + case IP_VERSION(12, 5, 0): + adev->umc.max_ras_err_cnt_per_query = + UMC_V12_0_TOTAL_CHANNEL_NUM(adev) * UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL; + adev->umc.channel_inst_num = UMC_V12_0_CHANNEL_INSTANCE_NUM; + adev->umc.umc_inst_num = UMC_V12_0_UMC_INSTANCE_NUM; + adev->umc.node_inst_num /= UMC_V12_0_UMC_INSTANCE_NUM; + adev->umc.channel_offs = UMC_V12_0_PER_CHANNEL_OFFSET; + if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) + adev->umc.ras = &umc_v12_0_ras; + break; default: break; } @@ -1497,7 +1461,7 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev) static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device *adev) { - switch (adev->ip_versions[MMHUB_HWIP][0]) { + switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { case IP_VERSION(9, 4, 1): adev->mmhub.funcs = &mmhub_v9_4_funcs; break; @@ -1505,6 +1469,7 @@ static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device *adev) adev->mmhub.funcs = &mmhub_v1_7_funcs; break; case IP_VERSION(1, 8, 0): + case IP_VERSION(1, 8, 1): adev->mmhub.funcs = &mmhub_v1_8_funcs; break; default: @@ -1515,7 +1480,7 @@ static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device *adev) static void gmc_v9_0_set_mmhub_ras_funcs(struct amdgpu_device *adev) { - switch (adev->ip_versions[MMHUB_HWIP][0]) { + switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { case IP_VERSION(9, 4, 0): adev->mmhub.ras = &mmhub_v1_0_ras; break; @@ -1526,6 +1491,7 @@ static void gmc_v9_0_set_mmhub_ras_funcs(struct amdgpu_device *adev) adev->mmhub.ras = &mmhub_v1_7_ras; break; case IP_VERSION(1, 8, 0): + case IP_VERSION(1, 8, 1): adev->mmhub.ras = &mmhub_v1_8_ras; break; default: @@ -1536,7 +1502,7 @@ static void gmc_v9_0_set_mmhub_ras_funcs(struct amdgpu_device *adev) static void gmc_v9_0_set_gfxhub_funcs(struct amdgpu_device *adev) { - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) + if (amdgpu_is_multi_aid(adev)) adev->gfxhub.funcs = &gfxhub_v1_2_funcs; else adev->gfxhub.funcs = &gfxhub_v1_0_funcs; @@ -1552,7 +1518,7 @@ static void gmc_v9_0_set_mca_ras_funcs(struct amdgpu_device *adev) struct amdgpu_mca *mca = &adev->mca; /* is UMC the right IP to check for MCA? Maybe DF? */ - switch (adev->ip_versions[UMC_HWIP][0]) { + switch (amdgpu_ip_version(adev, UMC_HWIP, 0)) { case IP_VERSION(6, 7, 0): if (!adev->gmc.xgmi.connected_to_cpu) { mca->mp0.ras = &mca_v3_0_mp0_ras; @@ -1571,26 +1537,63 @@ static void gmc_v9_0_set_xgmi_ras_funcs(struct amdgpu_device *adev) adev->gmc.xgmi.ras = &xgmi_ras; } -static int gmc_v9_0_early_init(void *handle) +static void gmc_v9_0_init_nps_details(struct amdgpu_device *adev) +{ + enum amdgpu_memory_partition mode; + uint32_t supp_modes; + int i; + + adev->gmc.supported_nps_modes = 0; + + if (amdgpu_sriov_vf(adev) || (adev->flags & AMD_IS_APU)) + return; + + mode = amdgpu_gmc_get_memory_partition(adev, &supp_modes); + + /* Mode detected by hardware and supported modes available */ + if ((mode != UNKNOWN_MEMORY_PARTITION_MODE) && supp_modes) { + while ((i = ffs(supp_modes))) { + if (AMDGPU_ALL_NPS_MASK & BIT(i)) + adev->gmc.supported_nps_modes |= BIT(i); + supp_modes &= supp_modes - 1; + } + } else { + /*TODO: Check PSP version also which supports NPS switch. Otherwise keep + * supported modes as 0. + */ + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(9, 4, 3): + case IP_VERSION(9, 4, 4): + adev->gmc.supported_nps_modes = + BIT(AMDGPU_NPS1_PARTITION_MODE) | + BIT(AMDGPU_NPS4_PARTITION_MODE); + break; + default: + break; + } + } +} + +static int gmc_v9_0_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; /* * 9.4.0, 9.4.1 and 9.4.3 don't have XGMI defined * in their IP discovery tables */ - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0) || - adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) || - adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 0) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) || + amdgpu_is_multi_aid(adev)) adev->gmc.xgmi.supported = true; - if (adev->ip_versions[XGMI_HWIP][0] == IP_VERSION(6, 1, 0)) { + if (amdgpu_ip_version(adev, XGMI_HWIP, 0) == IP_VERSION(6, 1, 0)) { adev->gmc.xgmi.supported = true; adev->gmc.xgmi.connected_to_cpu = adev->smuio.funcs->is_host_gpu_xgmi_supported(adev); } - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) { + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) { enum amdgpu_pkg_type pkg_type = adev->smuio.funcs->get_pkg_type(adev); /* On GFXIP 9.4.3. APU, there is no physical VRAM domain present @@ -1625,9 +1628,9 @@ static int gmc_v9_0_early_init(void *handle) return 0; } -static int gmc_v9_0_late_init(void *handle) +static int gmc_v9_0_late_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int r; r = amdgpu_gmc_allocate_vm_inv_eng(adev); @@ -1639,7 +1642,7 @@ static int gmc_v9_0_late_init(void *handle) * writes, while disables HBM ECC for vega10. */ if (!amdgpu_sriov_vf(adev) && - (adev->ip_versions[UMC_HWIP][0] == IP_VERSION(6, 0, 0))) { + (amdgpu_ip_version(adev, UMC_HWIP, 0) == IP_VERSION(6, 0, 0))) { if (!(adev->ras_enabled & (1 << AMDGPU_RAS_BLOCK__UMC))) { if (adev->df.funcs && adev->df.funcs->enable_ecc_force_par_wr_rmw) @@ -1648,13 +1651,8 @@ static int gmc_v9_0_late_init(void *handle) } if (!amdgpu_persistent_edc_harvesting_supported(adev)) { - if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops && - adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count) - adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev); - - if (adev->hdp.ras && adev->hdp.ras->ras_block.hw_ops && - adev->hdp.ras->ras_block.hw_ops->reset_ras_error_count) - adev->hdp.ras->ras_block.hw_ops->reset_ras_error_count(adev); + amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__MMHUB); + amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__HDP); } r = amdgpu_gmc_ras_late_init(adev); @@ -1669,14 +1667,17 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev, { u64 base = adev->mmhub.funcs->get_fb_location(adev); + amdgpu_gmc_set_agp_default(adev, mc); + /* add the xgmi offset of the physical node */ base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size; - if (adev->gmc.xgmi.connected_to_cpu) { + if (amdgpu_gmc_is_pdb0_enabled(adev)) { amdgpu_gmc_sysvm_location(adev, mc); } else { amdgpu_gmc_vram_location(adev, mc, base); - amdgpu_gmc_gart_location(adev, mc); - amdgpu_gmc_agp_location(adev, mc); + amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT); + if (!amdgpu_sriov_vf(adev) && (amdgpu_agp == 1)) + amdgpu_gmc_agp_location(adev, mc); } /* base offset of vram pages */ adev->vm_manager.vram_base_offset = adev->gfxhub.funcs->get_mc_fb_offset(adev); @@ -1747,13 +1748,15 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) /* set the gart size */ if (amdgpu_gart_size == -1) { - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(9, 0, 1): /* all engines support GPUVM */ case IP_VERSION(9, 2, 1): /* all engines support GPUVM */ case IP_VERSION(9, 4, 0): case IP_VERSION(9, 4, 1): case IP_VERSION(9, 4, 2): case IP_VERSION(9, 4, 3): + case IP_VERSION(9, 4, 4): + case IP_VERSION(9, 5, 0): default: adev->gmc.gart_size = 512ULL << 20; break; @@ -1783,7 +1786,7 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev) return 0; } - if (adev->gmc.xgmi.connected_to_cpu) { + if (amdgpu_gmc_is_pdb0_enabled(adev)) { adev->gmc.vmid0_page_table_depth = 1; adev->gmc.vmid0_page_table_block_size = 12; } else { @@ -1796,7 +1799,7 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev) if (r) return r; adev->gart.table_size = adev->gart.num_gpu_pages * 8; - adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_VG10(MTYPE_UC) | + adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_UC) | AMDGPU_PTE_EXECUTABLE; if (!adev->gmc.real_vram_size) { @@ -1809,7 +1812,7 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev) if (r) return r; - if (adev->gmc.xgmi.connected_to_cpu) + if (amdgpu_gmc_is_pdb0_enabled(adev)) r = amdgpu_gmc_pdb0_alloc(adev); } @@ -1826,195 +1829,36 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev) */ static void gmc_v9_0_save_registers(struct amdgpu_device *adev) { - if ((adev->ip_versions[DCE_HWIP][0] == IP_VERSION(1, 0, 0)) || - (adev->ip_versions[DCE_HWIP][0] == IP_VERSION(1, 0, 1))) + if ((amdgpu_ip_version(adev, DCE_HWIP, 0) == IP_VERSION(1, 0, 0)) || + (amdgpu_ip_version(adev, DCE_HWIP, 0) == IP_VERSION(1, 0, 1))) adev->gmc.sdpif_register = RREG32_SOC15(DCE, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0); } -static bool gmc_v9_0_validate_partition_info(struct amdgpu_device *adev) -{ - enum amdgpu_memory_partition mode; - u32 supp_modes; - bool valid; - - mode = gmc_v9_0_get_memory_partition(adev, &supp_modes); - - /* Mode detected by hardware not present in supported modes */ - if ((mode != UNKNOWN_MEMORY_PARTITION_MODE) && - !(BIT(mode - 1) & supp_modes)) - return false; - - switch (mode) { - case UNKNOWN_MEMORY_PARTITION_MODE: - case AMDGPU_NPS1_PARTITION_MODE: - valid = (adev->gmc.num_mem_partitions == 1); - break; - case AMDGPU_NPS2_PARTITION_MODE: - valid = (adev->gmc.num_mem_partitions == 2); - break; - case AMDGPU_NPS4_PARTITION_MODE: - valid = (adev->gmc.num_mem_partitions == 3 || - adev->gmc.num_mem_partitions == 4); - break; - default: - valid = false; - } - - return valid; -} - -static bool gmc_v9_0_is_node_present(int *node_ids, int num_ids, int nid) -{ - int i; - - /* Check if node with id 'nid' is present in 'node_ids' array */ - for (i = 0; i < num_ids; ++i) - if (node_ids[i] == nid) - return true; - - return false; -} - -static void -gmc_v9_0_init_acpi_mem_ranges(struct amdgpu_device *adev, - struct amdgpu_mem_partition_info *mem_ranges) -{ - int num_ranges = 0, ret, mem_groups; - struct amdgpu_numa_info numa_info; - int node_ids[MAX_MEM_RANGES]; - int num_xcc, xcc_id; - uint32_t xcc_mask; - - num_xcc = NUM_XCC(adev->gfx.xcc_mask); - xcc_mask = (1U << num_xcc) - 1; - mem_groups = hweight32(adev->aid_mask); - - for_each_inst(xcc_id, xcc_mask) { - ret = amdgpu_acpi_get_mem_info(adev, xcc_id, &numa_info); - if (ret) - continue; - - if (numa_info.nid == NUMA_NO_NODE) { - mem_ranges[0].size = numa_info.size; - mem_ranges[0].numa.node = numa_info.nid; - num_ranges = 1; - break; - } - - if (gmc_v9_0_is_node_present(node_ids, num_ranges, - numa_info.nid)) - continue; - - node_ids[num_ranges] = numa_info.nid; - mem_ranges[num_ranges].numa.node = numa_info.nid; - mem_ranges[num_ranges].size = numa_info.size; - ++num_ranges; - } - - adev->gmc.num_mem_partitions = num_ranges; - - /* If there is only partition, don't use entire size */ - if (adev->gmc.num_mem_partitions == 1) { - mem_ranges[0].size = mem_ranges[0].size * (mem_groups - 1); - do_div(mem_ranges[0].size, mem_groups); - } -} - -static void -gmc_v9_0_init_sw_mem_ranges(struct amdgpu_device *adev, - struct amdgpu_mem_partition_info *mem_ranges) -{ - enum amdgpu_memory_partition mode; - u32 start_addr = 0, size; - int i; - - mode = gmc_v9_0_query_memory_partition(adev); - - switch (mode) { - case UNKNOWN_MEMORY_PARTITION_MODE: - case AMDGPU_NPS1_PARTITION_MODE: - adev->gmc.num_mem_partitions = 1; - break; - case AMDGPU_NPS2_PARTITION_MODE: - adev->gmc.num_mem_partitions = 2; - break; - case AMDGPU_NPS4_PARTITION_MODE: - if (adev->flags & AMD_IS_APU) - adev->gmc.num_mem_partitions = 3; - else - adev->gmc.num_mem_partitions = 4; - break; - default: - adev->gmc.num_mem_partitions = 1; - break; - } - - size = adev->gmc.real_vram_size >> AMDGPU_GPU_PAGE_SHIFT; - size /= adev->gmc.num_mem_partitions; - - for (i = 0; i < adev->gmc.num_mem_partitions; ++i) { - mem_ranges[i].range.fpfn = start_addr; - mem_ranges[i].size = ((u64)size << AMDGPU_GPU_PAGE_SHIFT); - mem_ranges[i].range.lpfn = start_addr + size - 1; - start_addr += size; - } - - /* Adjust the last one */ - mem_ranges[adev->gmc.num_mem_partitions - 1].range.lpfn = - (adev->gmc.real_vram_size >> AMDGPU_GPU_PAGE_SHIFT) - 1; - mem_ranges[adev->gmc.num_mem_partitions - 1].size = - adev->gmc.real_vram_size - - ((u64)mem_ranges[adev->gmc.num_mem_partitions - 1].range.fpfn - << AMDGPU_GPU_PAGE_SHIFT); -} - -static int gmc_v9_0_init_mem_ranges(struct amdgpu_device *adev) -{ - bool valid; - - adev->gmc.mem_partitions = kzalloc( - MAX_MEM_RANGES * sizeof(struct amdgpu_mem_partition_info), - GFP_KERNEL); - - if (!adev->gmc.mem_partitions) - return -ENOMEM; - - /* TODO : Get the range from PSP/Discovery for dGPU */ - if (adev->gmc.is_app_apu) - gmc_v9_0_init_acpi_mem_ranges(adev, adev->gmc.mem_partitions); - else - gmc_v9_0_init_sw_mem_ranges(adev, adev->gmc.mem_partitions); - - if (amdgpu_sriov_vf(adev)) - valid = true; - else - valid = gmc_v9_0_validate_partition_info(adev); - if (!valid) { - /* TODO: handle invalid case */ - dev_WARN(adev->dev, - "Mem ranges not matching with hardware config"); - } - - return 0; -} - static void gmc_v9_4_3_init_vram_info(struct amdgpu_device *adev) { static const u32 regBIF_BIOS_SCRATCH_4 = 0x50; u32 vram_info; - if (!amdgpu_sriov_vf(adev)) { + adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM; + adev->gmc.vram_width = 128 * 64; + + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)) + adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM3E; + + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) && + adev->rev_id == 0x3) + adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM3E; + + if (!(adev->flags & AMD_IS_APU) && !amdgpu_sriov_vf(adev)) { vram_info = RREG32(regBIF_BIOS_SCRATCH_4); adev->gmc.vram_vendor = vram_info & 0xF; } - adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM; - adev->gmc.vram_width = 128 * 64; } -static int gmc_v9_0_sw_init(void *handle) +static int gmc_v9_0_sw_init(struct amdgpu_ip_block *ip_block) { int r, vram_width = 0, vram_type = 0, vram_vendor = 0, dma_addr_bits; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; unsigned long inst_mask = adev->aid_mask; adev->gfxhub.funcs->init(adev); @@ -2023,7 +1867,7 @@ static int gmc_v9_0_sw_init(void *handle) spin_lock_init(&adev->gmc.invalidate_lock); - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) { + if (amdgpu_is_multi_aid(adev)) { gmc_v9_4_3_init_vram_info(adev); } else if (!adev->bios) { if (adev->flags & AMD_IS_APU) { @@ -2063,7 +1907,7 @@ static int gmc_v9_0_sw_init(void *handle) adev->gmc.vram_type = vram_type; adev->gmc.vram_vendor = vram_vendor; } - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(9, 1, 0): case IP_VERSION(9, 2, 2): set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask); @@ -2091,12 +1935,9 @@ static int gmc_v9_0_sw_init(void *handle) * vm size is 256TB (48bit), maximum size of Vega10, * block size 512 (9bit) */ - /* sriov restrict max_pfn below AMDGPU_GMC_HOLE */ - if (amdgpu_sriov_vf(adev)) - amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 47); - else - amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2)) + + amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2)) adev->gmc.translate_further = adev->vm_manager.num_level > 1; break; case IP_VERSION(9, 4, 1): @@ -2109,6 +1950,8 @@ static int gmc_v9_0_sw_init(void *handle) adev->gmc.translate_further = adev->vm_manager.num_level > 1; break; case IP_VERSION(9, 4, 3): + case IP_VERSION(9, 4, 4): + case IP_VERSION(9, 5, 0): bitmap_set(adev->vmhubs_mask, AMDGPU_GFXHUB(0), NUM_XCC(adev->gfx.xcc_mask)); @@ -2128,7 +1971,7 @@ static int gmc_v9_0_sw_init(void *handle) if (r) return r; - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1)) { + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1)) { r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC1, VMC_1_0__SRCID__VM_FAULT, &adev->gmc.vm_fault); if (r) @@ -2142,7 +1985,8 @@ static int gmc_v9_0_sw_init(void *handle) return r; if (!amdgpu_sriov_vf(adev) && - !adev->gmc.xgmi.connected_to_cpu) { + !adev->gmc.xgmi.connected_to_cpu && + !adev->gmc.is_app_apu) { /* interrupt sent to DF. */ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DF, 0, &adev->gmc.ecc_irq); @@ -2156,7 +2000,10 @@ static int gmc_v9_0_sw_init(void *handle) */ adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */ - dma_addr_bits = adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 4, 2) ? 48:44; + dma_addr_bits = amdgpu_ip_version(adev, GC_HWIP, 0) >= + IP_VERSION(9, 4, 2) ? + 48 : + 44; r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(dma_addr_bits)); if (r) { dev_warn(adev->dev, "amdgpu: No suitable DMA available.\n"); @@ -2170,8 +2017,8 @@ static int gmc_v9_0_sw_init(void *handle) amdgpu_gmc_get_vbios_allocations(adev); - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) { - r = gmc_v9_0_init_mem_ranges(adev); + if (amdgpu_is_multi_aid(adev)) { + r = amdgpu_gmc_init_mem_ranges(adev); if (r) return r; } @@ -2185,6 +2032,7 @@ static int gmc_v9_0_sw_init(void *handle) if (r) return r; + gmc_v9_0_init_nps_details(adev); /* * number of VMs * VMID 0 is reserved for System @@ -2196,9 +2044,11 @@ static int gmc_v9_0_sw_init(void *handle) * for video processing. */ adev->vm_manager.first_kfd_vmid = - (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) || - adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) || - adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) ? 3 : 8; + (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) || + amdgpu_is_multi_aid(adev)) ? + 3 : + 8; amdgpu_vm_manager_init(adev); @@ -2208,20 +2058,18 @@ static int gmc_v9_0_sw_init(void *handle) if (r) return r; - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) + if (amdgpu_is_multi_aid(adev)) amdgpu_gmc_sysfs_init(adev); return 0; } -static int gmc_v9_0_sw_fini(void *handle) +static int gmc_v9_0_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) + if (amdgpu_is_multi_aid(adev)) amdgpu_gmc_sysfs_fini(adev); - adev->gmc.num_mem_partitions = 0; - kfree(adev->gmc.mem_partitions); amdgpu_gmc_ras_fini(adev); amdgpu_gem_force_release(adev); @@ -2235,13 +2083,15 @@ static int gmc_v9_0_sw_fini(void *handle) amdgpu_bo_free_kernel(&adev->gmc.pdb0_bo, NULL, &adev->gmc.ptr_pdb0); amdgpu_bo_fini(adev); + adev->gmc.num_mem_partitions = 0; + kfree(adev->gmc.mem_partitions); + return 0; } static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev) { - - switch (adev->ip_versions[MMHUB_HWIP][0]) { + switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { case IP_VERSION(9, 0, 0): if (amdgpu_sriov_vf(adev)) break; @@ -2275,8 +2125,8 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev) */ void gmc_v9_0_restore_registers(struct amdgpu_device *adev) { - if ((adev->ip_versions[DCE_HWIP][0] == IP_VERSION(1, 0, 0)) || - (adev->ip_versions[DCE_HWIP][0] == IP_VERSION(1, 0, 1))) { + if ((amdgpu_ip_version(adev, DCE_HWIP, 0) == IP_VERSION(1, 0, 0)) || + (amdgpu_ip_version(adev, DCE_HWIP, 0) == IP_VERSION(1, 0, 1))) { WREG32_SOC15(DCE, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0, adev->gmc.sdpif_register); WARN_ON(adev->gmc.sdpif_register != RREG32_SOC15(DCE, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0)); @@ -2292,7 +2142,7 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) { int r; - if (adev->gmc.xgmi.connected_to_cpu) + if (amdgpu_gmc_is_pdb0_enabled(adev)) amdgpu_gmc_init_pdb0(adev); if (adev->gart.bo == NULL) { @@ -2323,12 +2173,23 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) return 0; } -static int gmc_v9_0_hw_init(void *handle) +static int gmc_v9_0_hw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool value; int i, r; + adev->gmc.flush_pasid_uses_kiq = true; + + /* Vega20+XGMI caches PTEs in TC and TLB. Add a heavy-weight TLB flush + * (type 2), which flushes both. Due to a race condition with + * concurrent memory accesses using the same TLB cache line, we still + * need a second TLB flush after this. + */ + adev->gmc.flush_tlb_needs_extra_type_2 = + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 0) && + adev->gmc.xgmi.num_physical_nodes; + /* The sequence of these two function calls matters.*/ gmc_v9_0_init_golden_registers(adev); @@ -2345,7 +2206,7 @@ static int gmc_v9_0_hw_init(void *handle) adev->hdp.funcs->init_registers(adev); /* After HDP is initialized, flush HDP.*/ - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) value = false; @@ -2372,8 +2233,8 @@ static int gmc_v9_0_hw_init(void *handle) if (amdgpu_emu_mode == 1) return amdgpu_gmc_vram_checking(adev); - else - return r; + + return 0; } /** @@ -2390,9 +2251,9 @@ static void gmc_v9_0_gart_disable(struct amdgpu_device *adev) adev->mmhub.funcs->gart_disable(adev); } -static int gmc_v9_0_hw_fini(void *handle) +static int gmc_v9_0_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; gmc_v9_0_gart_disable(adev); @@ -2410,54 +2271,70 @@ static int gmc_v9_0_hw_fini(void *handle) if (adev->mmhub.funcs->update_power_gating) adev->mmhub.funcs->update_power_gating(adev, false); - amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0); + /* + * For minimal init, late_init is not called, hence VM fault/RAS irqs + * are not enabled. + */ + if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) { + amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0); + + if (adev->gmc.ecc_irq.funcs && + amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC)) + amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0); + } return 0; } -static int gmc_v9_0_suspend(void *handle) +static int gmc_v9_0_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return gmc_v9_0_hw_fini(adev); + return gmc_v9_0_hw_fini(ip_block); } -static int gmc_v9_0_resume(void *handle) +static int gmc_v9_0_resume(struct amdgpu_ip_block *ip_block) { + struct amdgpu_device *adev = ip_block->adev; int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = gmc_v9_0_hw_init(adev); + /* If a reset is done for NPS mode switch, read the memory range + * information again. + */ + if (adev->gmc.reset_flags & AMDGPU_GMC_INIT_RESET_NPS) { + amdgpu_gmc_init_sw_mem_ranges(adev, adev->gmc.mem_partitions); + adev->gmc.reset_flags &= ~AMDGPU_GMC_INIT_RESET_NPS; + } + + r = gmc_v9_0_hw_init(ip_block); if (r) return r; - amdgpu_vmid_reset_all(adev); + amdgpu_vmid_reset_all(ip_block->adev); return 0; } -static bool gmc_v9_0_is_idle(void *handle) +static bool gmc_v9_0_is_idle(struct amdgpu_ip_block *ip_block) { /* MC is always ready in GMC v9.*/ return true; } -static int gmc_v9_0_wait_for_idle(void *handle) +static int gmc_v9_0_wait_for_idle(struct amdgpu_ip_block *ip_block) { /* There is no need to wait for MC idle in GMC v9.*/ return 0; } -static int gmc_v9_0_soft_reset(void *handle) +static int gmc_v9_0_soft_reset(struct amdgpu_ip_block *ip_block) { /* XXX for emulation.*/ return 0; } -static int gmc_v9_0_set_clockgating_state(void *handle, +static int gmc_v9_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; adev->mmhub.funcs->set_clockgating(adev, state); @@ -2466,16 +2343,16 @@ static int gmc_v9_0_set_clockgating_state(void *handle, return 0; } -static void gmc_v9_0_get_clockgating_state(void *handle, u64 *flags) +static void gmc_v9_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; adev->mmhub.funcs->get_clockgating(adev, flags); athub_v1_0_get_clockgating(adev, flags); } -static int gmc_v9_0_set_powergating_state(void *handle, +static int gmc_v9_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; |
