diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 1348 |
1 files changed, 931 insertions, 417 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 7eb70d69f760..8ad7519f7b58 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -49,11 +49,16 @@ #include "mmhub_v1_0.h" #include "athub_v1_0.h" #include "gfxhub_v1_1.h" +#include "gfxhub_v1_2.h" #include "mmhub_v9_4.h" #include "mmhub_v1_7.h" +#include "mmhub_v1_8.h" #include "umc_v6_1.h" #include "umc_v6_0.h" +#include "umc_v6_7.h" +#include "umc_v12_0.h" #include "hdp_v4_0.h" +#include "mca_v3_0.h" #include "ivsrcid/vmc/irqsrcs_vmc_1_0.h" @@ -70,8 +75,10 @@ #define mmDCHUBBUB_SDPIF_MMIO_CNTRL_0 0x049d #define mmDCHUBBUB_SDPIF_MMIO_CNTRL_0_BASE_IDX 2 +#define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_DCN2 0x05ea +#define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_DCN2_BASE_IDX 2 -static const char *gfxhub_client_ids[] = { +static const char * const gfxhub_client_ids[] = { "CB", "DB", "IA", @@ -322,14 +329,12 @@ static const char *mmhub_client_ids_aldebaran[][2] = { [384+0][1] = "OSS", }; -static const struct soc15_reg_golden golden_settings_mmhub_1_0_0[] = -{ +static const struct soc15_reg_golden golden_settings_mmhub_1_0_0[] = { SOC15_REG_GOLDEN_VALUE(MMHUB, 0, mmDAGB1_WRCLI2, 0x00000007, 0xfe5fe0fa), SOC15_REG_GOLDEN_VALUE(MMHUB, 0, mmMMEA1_DRAM_WR_CLI2GRP_MAP0, 0x00000030, 0x55555565) }; -static const struct soc15_reg_golden golden_settings_athub_1_0_0[] = -{ +static const struct soc15_reg_golden golden_settings_athub_1_0_0[] = { SOC15_REG_GOLDEN_VALUE(ATHUB, 0, mmRPB_ARB_CNTL, 0x0000ff00, 0x00000800), SOC15_REG_GOLDEN_VALUE(ATHUB, 0, mmRPB_ARB_CNTL2, 0x00ff00ff, 0x00080008) }; @@ -406,13 +411,14 @@ static const uint32_t ecc_umc_mcumc_ctrl_mask_addrs[] = { static int gmc_v9_0_ecc_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *src, - unsigned type, + unsigned int type, enum amdgpu_interrupt_state state) { u32 bits, i, tmp, reg; /* Devices newer then VEGA10/12 shall have these programming - sequences performed by PSP BL */ + * sequences performed by PSP BL + */ if (adev->asic_type >= CHIP_VEGA20) return 0; @@ -456,7 +462,7 @@ static int gmc_v9_0_ecc_interrupt_state(struct amdgpu_device *adev, static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *src, - unsigned type, + unsigned int type, enum amdgpu_interrupt_state state) { struct amdgpu_vmhub *hub; @@ -472,24 +478,58 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev, switch (state) { case AMDGPU_IRQ_STATE_DISABLE: - for (j = 0; j < adev->num_vmhubs; j++) { + for_each_set_bit(j, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) { hub = &adev->vmhub[j]; for (i = 0; i < 16; i++) { reg = hub->vm_context0_cntl + i; - tmp = RREG32(reg); + + /* This works because this interrupt is only + * enabled at init/resume and disabled in + * fini/suspend, so the overall state doesn't + * change over the course of suspend/resume. + */ + if (adev->in_s0ix && (j == AMDGPU_GFXHUB(0))) + continue; + + if (j >= AMDGPU_MMHUB0(0)) + tmp = RREG32_SOC15_IP(MMHUB, reg); + else + tmp = RREG32_XCC(reg, j); + tmp &= ~bits; - WREG32(reg, tmp); + + if (j >= AMDGPU_MMHUB0(0)) + WREG32_SOC15_IP(MMHUB, reg, tmp); + else + WREG32_XCC(reg, tmp, j); } } break; case AMDGPU_IRQ_STATE_ENABLE: - for (j = 0; j < adev->num_vmhubs; j++) { + for_each_set_bit(j, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) { hub = &adev->vmhub[j]; for (i = 0; i < 16; i++) { reg = hub->vm_context0_cntl + i; - tmp = RREG32(reg); + + /* This works because this interrupt is only + * enabled at init/resume and disabled in + * fini/suspend, so the overall state doesn't + * change over the course of suspend/resume. + */ + if (adev->in_s0ix && (j == AMDGPU_GFXHUB(0))) + continue; + + if (j >= AMDGPU_MMHUB0(0)) + tmp = RREG32_SOC15_IP(MMHUB, reg); + else + tmp = RREG32_XCC(reg, j); + tmp |= bits; - WREG32(reg, tmp); + + if (j >= AMDGPU_MMHUB0(0)) + WREG32_SOC15_IP(MMHUB, reg, tmp); + else + WREG32_XCC(reg, tmp, j); } } break; @@ -504,69 +544,111 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - bool retry_fault = !!(entry->src_data[1] & 0x80); - uint32_t status = 0, cid = 0, rw = 0; - struct amdgpu_task_info task_info; + bool retry_fault = !!(entry->src_data[1] & + AMDGPU_GMC9_FAULT_SOURCE_DATA_RETRY); + bool write_fault = !!(entry->src_data[1] & + AMDGPU_GMC9_FAULT_SOURCE_DATA_WRITE); + uint32_t status = 0, cid = 0, rw = 0, fed = 0; + struct amdgpu_task_info *task_info; struct amdgpu_vmhub *hub; const char *mmhub_cid; const char *hub_name; + unsigned int vmhub; u64 addr; + uint32_t cam_index = 0; + int ret, xcc_id = 0; + uint32_t node_id; + + node_id = entry->node_id; addr = (u64)entry->src_data[0] << 12; addr |= ((u64)entry->src_data[1] & 0xf) << 44; + if (entry->client_id == SOC15_IH_CLIENTID_VMC) { + hub_name = "mmhub0"; + vmhub = AMDGPU_MMHUB0(node_id / 4); + } else if (entry->client_id == SOC15_IH_CLIENTID_VMC1) { + hub_name = "mmhub1"; + vmhub = AMDGPU_MMHUB1(0); + } else { + hub_name = "gfxhub0"; + if (adev->gfx.funcs->ih_node_to_logical_xcc) { + xcc_id = adev->gfx.funcs->ih_node_to_logical_xcc(adev, + node_id); + if (xcc_id < 0) + xcc_id = 0; + } + vmhub = xcc_id; + } + hub = &adev->vmhub[vmhub]; + if (retry_fault) { - /* Returning 1 here also prevents sending the IV to the KFD */ + if (adev->irq.retry_cam_enabled) { + /* Delegate it to a different ring if the hardware hasn't + * already done it. + */ + if (entry->ih == &adev->irq.ih) { + amdgpu_irq_delegate(adev, entry, 8); + return 1; + } + + cam_index = entry->src_data[2] & 0x3ff; - /* Process it onyl if it's the first fault for this address */ - if (entry->ih != &adev->irq.ih_soft && - amdgpu_gmc_filter_faults(adev, addr, entry->pasid, + ret = amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id, + addr, entry->timestamp, write_fault); + WDOORBELL32(adev->irq.retry_cam_doorbell_index, cam_index); + if (ret) + return 1; + } else { + /* Process it onyl if it's the first fault for this address */ + if (entry->ih != &adev->irq.ih_soft && + amdgpu_gmc_filter_faults(adev, entry->ih, addr, entry->pasid, entry->timestamp)) - return 1; + return 1; + + /* Delegate it to a different ring if the hardware hasn't + * already done it. + */ + if (entry->ih == &adev->irq.ih) { + amdgpu_irq_delegate(adev, entry, 8); + return 1; + } - /* Delegate it to a different ring if the hardware hasn't - * already done it. - */ - if (entry->ih == &adev->irq.ih) { - amdgpu_irq_delegate(adev, entry, 8); - return 1; + /* Try to handle the recoverable page faults by filling page + * tables + */ + if (amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id, + addr, entry->timestamp, write_fault)) + return 1; } - - /* Try to handle the recoverable page faults by filling page - * tables - */ - if (amdgpu_vm_handle_fault(adev, entry->pasid, addr)) - return 1; } + if (kgd2kfd_vmfault_fast_path(adev, entry, retry_fault)) + return 1; + if (!printk_ratelimit()) return 0; - if (entry->client_id == SOC15_IH_CLIENTID_VMC) { - hub_name = "mmhub0"; - hub = &adev->vmhub[AMDGPU_MMHUB_0]; - } else if (entry->client_id == SOC15_IH_CLIENTID_VMC1) { - hub_name = "mmhub1"; - hub = &adev->vmhub[AMDGPU_MMHUB_1]; - } else { - hub_name = "gfxhub0"; - hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + dev_err(adev->dev, + "[%s] %s page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n", hub_name, + retry_fault ? "retry" : "no-retry", + entry->src_id, entry->ring_id, entry->vmid, entry->pasid); + + task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid); + if (task_info) { + amdgpu_vm_print_task_info(adev, task_info); + amdgpu_vm_put_task_info(task_info); } - memset(&task_info, 0, sizeof(struct amdgpu_task_info)); - amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); - - dev_err(adev->dev, - "[%s] %s page fault (src_id:%u ring:%u vmid:%u " - "pasid:%u, for process %s pid %d thread %s pid %d)\n", - hub_name, retry_fault ? "retry" : "no-retry", - entry->src_id, entry->ring_id, entry->vmid, - entry->pasid, task_info.process_name, task_info.tgid, - task_info.task_name, task_info.pid); dev_err(adev->dev, " in page starting at address 0x%016llx from IH client 0x%x (%s)\n", addr, entry->client_id, soc15_ih_clientid_name[entry->client_id]); + if (amdgpu_is_multi_aid(adev)) + dev_err(adev->dev, " cookie node_id %d fault from die %s%d%s\n", + node_id, node_id % 4 == 3 ? "RSV" : "AID", node_id / 4, + node_id % 4 == 1 ? ".XCD0" : node_id % 4 == 2 ? ".XCD1" : ""); + if (amdgpu_sriov_vf(adev)) return 0; @@ -575,45 +657,63 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, * be updated to avoid reading an incorrect value due to * the new fast GRBM interface. */ - if ((entry->vmid_src == AMDGPU_GFXHUB_0) && - (adev->asic_type < CHIP_ALDEBARAN)) + if ((entry->vmid_src == AMDGPU_GFXHUB(0)) && + (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(9, 4, 2))) RREG32(hub->vm_l2_pro_fault_status); status = RREG32(hub->vm_l2_pro_fault_status); cid = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, CID); rw = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, RW); - WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); + fed = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED); + + /* for fed error, kfd will handle it, return directly */ + if (fed && amdgpu_ras_is_poison_mode_supported(adev) && + (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 2))) + return 0; + + /* Only print L2 fault status if the status register could be read and + * contains useful information + */ + if (!status) + return 0; + + if (!amdgpu_sriov_vf(adev)) + WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); + amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status, vmhub); dev_err(adev->dev, "VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", status); - if (hub == &adev->vmhub[AMDGPU_GFXHUB_0]) { + if (entry->vmid_src == AMDGPU_GFXHUB(0)) { dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n", cid >= ARRAY_SIZE(gfxhub_client_ids) ? "unknown" : gfxhub_client_ids[cid], cid); } else { - switch (adev->asic_type) { - case CHIP_VEGA10: + switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { + case IP_VERSION(9, 0, 0): mmhub_cid = mmhub_client_ids_vega10[cid][rw]; break; - case CHIP_VEGA12: + case IP_VERSION(9, 3, 0): mmhub_cid = mmhub_client_ids_vega12[cid][rw]; break; - case CHIP_VEGA20: + case IP_VERSION(9, 4, 0): mmhub_cid = mmhub_client_ids_vega20[cid][rw]; break; - case CHIP_ARCTURUS: + case IP_VERSION(9, 4, 1): mmhub_cid = mmhub_client_ids_arcturus[cid][rw]; break; - case CHIP_RAVEN: + case IP_VERSION(9, 1, 0): + case IP_VERSION(9, 2, 0): mmhub_cid = mmhub_client_ids_raven[cid][rw]; break; - case CHIP_RENOIR: + case IP_VERSION(1, 5, 0): + case IP_VERSION(2, 4, 0): mmhub_cid = mmhub_client_ids_renoir[cid][rw]; break; - case CHIP_ALDEBARAN: + case IP_VERSION(1, 8, 0): + case IP_VERSION(9, 4, 2): mmhub_cid = mmhub_client_ids_aldebaran[cid][rw]; break; default: @@ -656,7 +756,8 @@ static void gmc_v9_0_set_irq_funcs(struct amdgpu_device *adev) adev->gmc.vm_fault.funcs = &gmc_v9_0_irq_funcs; if (!amdgpu_sriov_vf(adev) && - !adev->gmc.xgmi.connected_to_cpu) { + !adev->gmc.xgmi.connected_to_cpu && + !adev->gmc.is_app_apu) { adev->gmc.ecc_irq.num_types = 1; adev->gmc.ecc_irq.funcs = &gmc_v9_0_ecc_funcs; } @@ -691,11 +792,12 @@ static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid, static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev, uint32_t vmhub) { - if (adev->asic_type == CHIP_ALDEBARAN) + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) || + amdgpu_is_multi_aid(adev)) return false; - return ((vmhub == AMDGPU_MMHUB_0 || - vmhub == AMDGPU_MMHUB_1) && + return ((vmhub == AMDGPU_MMHUB0(0) || + vmhub == AMDGPU_MMHUB1(0)) && (!amdgpu_sriov_vf(adev)) && (!(!(adev->apu_flags & AMD_APU_IS_RAVEN2) && (adev->apu_flags & AMD_APU_IS_PICASSO)))); @@ -734,43 +836,37 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, uint32_t vmhub, uint32_t flush_type) { bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub); - const unsigned eng = 17; - u32 j, inv_req, inv_req2, tmp; + u32 j, inv_req, tmp, sem, req, ack, inst; + const unsigned int eng = 17; struct amdgpu_vmhub *hub; - BUG_ON(vmhub >= adev->num_vmhubs); + BUG_ON(vmhub >= AMDGPU_MAX_VMHUBS); hub = &adev->vmhub[vmhub]; - if (adev->gmc.xgmi.num_physical_nodes && - adev->asic_type == CHIP_VEGA20) { - /* Vega20+XGMI caches PTEs in TC and TLB. Add a - * heavy-weight TLB flush (type 2), which flushes - * both. Due to a race condition with concurrent - * memory accesses using the same TLB cache line, we - * still need a second TLB flush after this. - */ - inv_req = gmc_v9_0_get_invalidate_req(vmid, 2); - inv_req2 = gmc_v9_0_get_invalidate_req(vmid, flush_type); - } else { - inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type); - inv_req2 = 0; - } + inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type); + sem = hub->vm_inv_eng0_sem + hub->eng_distance * eng; + req = hub->vm_inv_eng0_req + hub->eng_distance * eng; + ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; - /* This is necessary for a HW workaround under SRIOV as well - * as GFXOFF under bare metal + if (vmhub >= AMDGPU_MMHUB0(0)) + inst = 0; + else + inst = vmhub; + + /* This is necessary for SRIOV as well as for GFXOFF to function + * properly under bare metal */ - if (adev->gfx.kiq.ring.sched.ready && - (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) && - down_read_trylock(&adev->reset_sem)) { + if (adev->gfx.kiq[inst].ring.sched.ready && + (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) { uint32_t req = hub->vm_inv_eng0_req + hub->eng_distance * eng; uint32_t ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; - amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req, - 1 << vmid); - up_read(&adev->reset_sem); + amdgpu_gmc_fw_reg_write_reg_wait(adev, req, ack, inv_req, + 1 << vmid, inst); return; } + /* This path is needed before KIQ/MES/GFXOFF are set up */ spin_lock(&adev->gmc.invalidate_lock); /* @@ -783,9 +879,11 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ if (use_semaphore) { for (j = 0; j < adev->usec_timeout; j++) { - /* a read return value of 1 means semaphore acuqire */ - tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + - hub->eng_distance * eng); + /* a read return value of 1 means semaphore acquire */ + if (vmhub >= AMDGPU_MMHUB0(0)) + tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, sem, GET_INST(GC, inst)); + else + tmp = RREG32_SOC15_IP_NO_KIQ(GC, sem, GET_INST(GC, inst)); if (tmp & 0x1) break; udelay(1); @@ -795,40 +893,41 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n"); } - do { - WREG32_NO_KIQ(hub->vm_inv_eng0_req + - hub->eng_distance * eng, inv_req); - - /* - * Issue a dummy read to wait for the ACK register to - * be cleared to avoid a false ACK due to the new fast - * GRBM interface. - */ - if ((vmhub == AMDGPU_GFXHUB_0) && - (adev->asic_type < CHIP_ALDEBARAN)) - RREG32_NO_KIQ(hub->vm_inv_eng0_req + - hub->eng_distance * eng); + if (vmhub >= AMDGPU_MMHUB0(0)) + WREG32_SOC15_IP_NO_KIQ(MMHUB, req, inv_req, GET_INST(GC, inst)); + else + WREG32_SOC15_IP_NO_KIQ(GC, req, inv_req, GET_INST(GC, inst)); - for (j = 0; j < adev->usec_timeout; j++) { - tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + - hub->eng_distance * eng); - if (tmp & (1 << vmid)) - break; - udelay(1); - } + /* + * Issue a dummy read to wait for the ACK register to + * be cleared to avoid a false ACK due to the new fast + * GRBM interface. + */ + if ((vmhub == AMDGPU_GFXHUB(0)) && + (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(9, 4, 2))) + RREG32_NO_KIQ(req); - inv_req = inv_req2; - inv_req2 = 0; - } while (inv_req); + for (j = 0; j < adev->usec_timeout; j++) { + if (vmhub >= AMDGPU_MMHUB0(0)) + tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, ack, GET_INST(GC, inst)); + else + tmp = RREG32_SOC15_IP_NO_KIQ(GC, ack, GET_INST(GC, inst)); + if (tmp & (1 << vmid)) + break; + udelay(1); + } /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ - if (use_semaphore) + if (use_semaphore) { /* * add semaphore release after invalidation, * write with 0 means semaphore release */ - WREG32_NO_KIQ(hub->vm_inv_eng0_sem + - hub->eng_distance * eng, 0); + if (vmhub >= AMDGPU_MMHUB0(0)) + WREG32_SOC15_IP_NO_KIQ(MMHUB, sem, 0, GET_INST(GC, inst)); + else + WREG32_SOC15_IP_NO_KIQ(GC, sem, 0, GET_INST(GC, inst)); + } spin_unlock(&adev->gmc.invalidate_lock); @@ -845,96 +944,46 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, * @pasid: pasid to be flush * @flush_type: the flush type * @all_hub: flush all hubs + * @inst: is used to select which instance of KIQ to use for the invalidation * * Flush the TLB for the requested pasid. */ -static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, - uint16_t pasid, uint32_t flush_type, - bool all_hub) +static void gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, + uint16_t pasid, uint32_t flush_type, + bool all_hub, uint32_t inst) { - int vmid, i; - signed long r; - uint32_t seq; - uint16_t queried_pasid; - bool ret; - struct amdgpu_ring *ring = &adev->gfx.kiq.ring; - struct amdgpu_kiq *kiq = &adev->gfx.kiq; - - if (amdgpu_in_reset(adev)) - return -EIO; - - if (ring->sched.ready && down_read_trylock(&adev->reset_sem)) { - /* Vega20+XGMI caches PTEs in TC and TLB. Add a - * heavy-weight TLB flush (type 2), which flushes - * both. Due to a race condition with concurrent - * memory accesses using the same TLB cache line, we - * still need a second TLB flush after this. - */ - bool vega20_xgmi_wa = (adev->gmc.xgmi.num_physical_nodes && - adev->asic_type == CHIP_VEGA20); - /* 2 dwords flush + 8 dwords fence */ - unsigned int ndw = kiq->pmf->invalidate_tlbs_size + 8; - - if (vega20_xgmi_wa) - ndw += kiq->pmf->invalidate_tlbs_size; - - spin_lock(&adev->gfx.kiq.ring_lock); - /* 2 dwords flush + 8 dwords fence */ - amdgpu_ring_alloc(ring, ndw); - if (vega20_xgmi_wa) - kiq->pmf->kiq_invalidate_tlbs(ring, - pasid, 2, all_hub); - kiq->pmf->kiq_invalidate_tlbs(ring, - pasid, flush_type, all_hub); - r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); - if (r) { - amdgpu_ring_undo(ring); - spin_unlock(&adev->gfx.kiq.ring_lock); - up_read(&adev->reset_sem); - return -ETIME; - } - - amdgpu_ring_commit(ring); - spin_unlock(&adev->gfx.kiq.ring_lock); - r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); - if (r < 1) { - dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r); - up_read(&adev->reset_sem); - return -ETIME; - } - up_read(&adev->reset_sem); - return 0; - } + uint16_t queried; + int i, vmid; for (vmid = 1; vmid < 16; vmid++) { - - ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid, - &queried_pasid); - if (ret && queried_pasid == pasid) { - if (all_hub) { - for (i = 0; i < adev->num_vmhubs; i++) - gmc_v9_0_flush_gpu_tlb(adev, vmid, - i, flush_type); - } else { - gmc_v9_0_flush_gpu_tlb(adev, vmid, - AMDGPU_GFXHUB_0, flush_type); - } - break; + bool valid; + + valid = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid, + &queried); + if (!valid || queried != pasid) + continue; + + if (all_hub) { + for_each_set_bit(i, adev->vmhubs_mask, + AMDGPU_MAX_VMHUBS) + gmc_v9_0_flush_gpu_tlb(adev, vmid, i, + flush_type); + } else { + gmc_v9_0_flush_gpu_tlb(adev, vmid, + AMDGPU_GFXHUB(0), + flush_type); } } - - return 0; - } static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, - unsigned vmid, uint64_t pd_addr) + unsigned int vmid, uint64_t pd_addr) { - bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(ring->adev, ring->funcs->vmhub); + bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(ring->adev, ring->vm_hub); struct amdgpu_device *adev = ring->adev; - struct amdgpu_vmhub *hub = &adev->vmhub[ring->funcs->vmhub]; + struct amdgpu_vmhub *hub = &adev->vmhub[ring->vm_hub]; uint32_t req = gmc_v9_0_get_invalidate_req(vmid, 0); - unsigned eng = ring->vm_inv_eng; + unsigned int eng = ring->vm_inv_eng; /* * It may lose gpuvm invalidate acknowldege state across power-gating @@ -976,17 +1025,17 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, return pd_addr; } -static void gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid, - unsigned pasid) +static void gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned int vmid, + unsigned int pasid) { struct amdgpu_device *adev = ring->adev; uint32_t reg; /* Do nothing because there's no lut register for mmhub1. */ - if (ring->funcs->vmhub == AMDGPU_MMHUB_1) + if (ring->vm_hub == AMDGPU_MMHUB1(0)) return; - if (ring->funcs->vmhub == AMDGPU_GFXHUB_0) + if (ring->vm_hub == AMDGPU_GFXHUB(0)) reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid; else reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid; @@ -1026,27 +1075,6 @@ static void gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid, * 0 valid */ -static uint64_t gmc_v9_0_map_mtype(struct amdgpu_device *adev, uint32_t flags) - -{ - switch (flags) { - case AMDGPU_VM_MTYPE_DEFAULT: - return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC); - case AMDGPU_VM_MTYPE_NC: - return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC); - case AMDGPU_VM_MTYPE_WC: - return AMDGPU_PTE_MTYPE_VG10(MTYPE_WC); - case AMDGPU_VM_MTYPE_RW: - return AMDGPU_PTE_MTYPE_VG10(MTYPE_RW); - case AMDGPU_VM_MTYPE_CC: - return AMDGPU_PTE_MTYPE_VG10(MTYPE_CC); - case AMDGPU_VM_MTYPE_UC: - return AMDGPU_PTE_MTYPE_VG10(MTYPE_UC); - default: - return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC); - } -} - static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level, uint64_t *addr, uint64_t *flags) { @@ -1063,51 +1091,263 @@ static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level, *flags |= AMDGPU_PDE_BFS(0x9); } else if (level == AMDGPU_VM_PDB0) { - if (*flags & AMDGPU_PDE_PTE) + if (*flags & AMDGPU_PDE_PTE) { *flags &= ~AMDGPU_PDE_PTE; - else + if (!(*flags & AMDGPU_PTE_VALID)) + *addr |= 1 << PAGE_SHIFT; + } else { *flags |= AMDGPU_PTE_TF; + } } } +static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + struct amdgpu_bo *bo, + uint32_t vm_flags, + uint64_t *flags) +{ + struct amdgpu_device *bo_adev = amdgpu_ttm_adev(bo->tbo.bdev); + bool is_vram = bo->tbo.resource && + bo->tbo.resource->mem_type == TTM_PL_VRAM; + bool coherent = bo->flags & (AMDGPU_GEM_CREATE_COHERENT | + AMDGPU_GEM_CREATE_EXT_COHERENT); + bool ext_coherent = bo->flags & AMDGPU_GEM_CREATE_EXT_COHERENT; + bool uncached = bo->flags & AMDGPU_GEM_CREATE_UNCACHED; + unsigned int mtype_local, mtype; + uint32_t gc_ip_version = amdgpu_ip_version(adev, GC_HWIP, 0); + bool snoop = false; + bool is_local; + + dma_resv_assert_held(bo->tbo.base.resv); + + switch (gc_ip_version) { + case IP_VERSION(9, 4, 1): + case IP_VERSION(9, 4, 2): + if (is_vram) { + if (bo_adev == adev) { + if (uncached) + mtype = MTYPE_UC; + else if (coherent) + mtype = MTYPE_CC; + else + mtype = MTYPE_RW; + /* FIXME: is this still needed? Or does + * amdgpu_ttm_tt_pde_flags already handle this? + */ + if (gc_ip_version == IP_VERSION(9, 4, 2) && + adev->gmc.xgmi.connected_to_cpu) + snoop = true; + } else { + if (uncached || coherent) + mtype = MTYPE_UC; + else + mtype = MTYPE_NC; + if (amdgpu_xgmi_same_hive(adev, bo_adev)) + snoop = true; + } + } else { + if (uncached || coherent) + mtype = MTYPE_UC; + else + mtype = MTYPE_NC; + /* FIXME: is this still needed? Or does + * amdgpu_ttm_tt_pde_flags already handle this? + */ + snoop = true; + } + break; + case IP_VERSION(9, 4, 3): + case IP_VERSION(9, 4, 4): + case IP_VERSION(9, 5, 0): + /* Only local VRAM BOs or system memory on non-NUMA APUs + * can be assumed to be local in their entirety. Choose + * MTYPE_NC as safe fallback for all system memory BOs on + * NUMA systems. Their MTYPE can be overridden per-page in + * gmc_v9_0_override_vm_pte_flags. + */ + mtype_local = MTYPE_RW; + if (amdgpu_mtype_local == 1) { + DRM_INFO_ONCE("Using MTYPE_NC for local memory\n"); + mtype_local = MTYPE_NC; + } else if (amdgpu_mtype_local == 2) { + DRM_INFO_ONCE("Using MTYPE_CC for local memory\n"); + mtype_local = MTYPE_CC; + } else { + DRM_INFO_ONCE("Using MTYPE_RW for local memory\n"); + } + is_local = (!is_vram && (adev->flags & AMD_IS_APU) && + num_possible_nodes() <= 1) || + (is_vram && adev == bo_adev && + KFD_XCP_MEM_ID(adev, bo->xcp_id) == vm->mem_id); + snoop = true; + if (uncached) { + mtype = MTYPE_UC; + } else if (ext_coherent) { + mtype = is_local ? MTYPE_CC : MTYPE_UC; + } else if (adev->flags & AMD_IS_APU) { + mtype = is_local ? mtype_local : MTYPE_NC; + } else { + /* dGPU */ + if (is_local) + mtype = mtype_local; + else if (gc_ip_version < IP_VERSION(9, 5, 0) && !is_vram) + mtype = MTYPE_UC; + else + mtype = MTYPE_NC; + } + + break; + default: + if (uncached || coherent) + mtype = MTYPE_UC; + else + mtype = MTYPE_NC; + + /* FIXME: is this still needed? Or does + * amdgpu_ttm_tt_pde_flags already handle this? + */ + if (!is_vram) + snoop = true; + } + + if (mtype != MTYPE_NC) + *flags = AMDGPU_PTE_MTYPE_VG10(*flags, mtype); + + *flags |= snoop ? AMDGPU_PTE_SNOOPED : 0; +} + static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev, - struct amdgpu_bo_va_mapping *mapping, + struct amdgpu_vm *vm, + struct amdgpu_bo *bo, + uint32_t vm_flags, uint64_t *flags) { - *flags &= ~AMDGPU_PTE_EXECUTABLE; - *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE; + if (vm_flags & AMDGPU_VM_PAGE_EXECUTABLE) + *flags |= AMDGPU_PTE_EXECUTABLE; + else + *flags &= ~AMDGPU_PTE_EXECUTABLE; - *flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK; - *flags |= mapping->flags & AMDGPU_PTE_MTYPE_VG10_MASK; + switch (vm_flags & AMDGPU_VM_MTYPE_MASK) { + case AMDGPU_VM_MTYPE_DEFAULT: + case AMDGPU_VM_MTYPE_NC: + default: + *flags = AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_NC); + break; + case AMDGPU_VM_MTYPE_WC: + *flags |= AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_WC); + break; + case AMDGPU_VM_MTYPE_RW: + *flags |= AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_RW); + break; + case AMDGPU_VM_MTYPE_CC: + *flags |= AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_CC); + break; + case AMDGPU_VM_MTYPE_UC: + *flags |= AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_UC); + break; + } - if (mapping->flags & AMDGPU_PTE_PRT) { + if (vm_flags & AMDGPU_VM_PAGE_PRT) { *flags |= AMDGPU_PTE_PRT; *flags &= ~AMDGPU_PTE_VALID; } - if ((adev->asic_type == CHIP_ARCTURUS || - adev->asic_type == CHIP_ALDEBARAN) && - !(*flags & AMDGPU_PTE_SYSTEM) && - mapping->bo_va->is_xgmi) - *flags |= AMDGPU_PTE_SNOOPED; + if ((*flags & AMDGPU_PTE_VALID) && bo) + gmc_v9_0_get_coherence_flags(adev, vm, bo, vm_flags, flags); +} + +static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + uint64_t addr, uint64_t *flags) +{ + int local_node, nid; + + /* Only GFX 9.4.3 APUs associate GPUs with NUMA nodes. Local system + * memory can use more efficient MTYPEs. + */ + if (!(adev->flags & AMD_IS_APU) || + amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 3)) + return; + + /* Only direct-mapped memory allows us to determine the NUMA node from + * the DMA address. + */ + if (!adev->ram_is_direct_mapped) { + dev_dbg_ratelimited(adev->dev, "RAM is not direct mapped\n"); + return; + } + + /* MTYPE_NC is the same default and can be overridden. + * MTYPE_UC will be present if the memory is extended-coherent + * and can also be overridden. + */ + if ((*flags & AMDGPU_PTE_MTYPE_VG10_MASK) != + AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_NC) && + (*flags & AMDGPU_PTE_MTYPE_VG10_MASK) != + AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_UC)) { + dev_dbg_ratelimited(adev->dev, "MTYPE is not NC or UC\n"); + return; + } + + /* FIXME: Only supported on native mode for now. For carve-out, the + * NUMA affinity of the GPU/VM needs to come from the PCI info because + * memory partitions are not associated with different NUMA nodes. + */ + if (adev->gmc.is_app_apu && vm->mem_id >= 0) { + local_node = adev->gmc.mem_partitions[vm->mem_id].numa.node; + } else { + dev_dbg_ratelimited(adev->dev, "Only native mode APU is supported.\n"); + return; + } + + /* Only handle real RAM. Mappings of PCIe resources don't have struct + * page or NUMA nodes. + */ + if (!page_is_ram(addr >> PAGE_SHIFT)) { + dev_dbg_ratelimited(adev->dev, "Page is not RAM.\n"); + return; + } + nid = pfn_to_nid(addr >> PAGE_SHIFT); + dev_dbg_ratelimited(adev->dev, "vm->mem_id=%d, local_node=%d, nid=%d\n", + vm->mem_id, local_node, nid); + if (nid == local_node) { + uint64_t old_flags = *flags; + if ((*flags & AMDGPU_PTE_MTYPE_VG10_MASK) == + AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_NC)) { + unsigned int mtype_local = MTYPE_RW; + + if (amdgpu_mtype_local == 1) + mtype_local = MTYPE_NC; + else if (amdgpu_mtype_local == 2) + mtype_local = MTYPE_CC; + + *flags = AMDGPU_PTE_MTYPE_VG10(*flags, mtype_local); + } else { + /* MTYPE_UC case */ + *flags = AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_CC); + } - if (adev->asic_type == CHIP_ALDEBARAN) - *flags |= mapping->flags & AMDGPU_PTE_SNOOPED; + dev_dbg_ratelimited(adev->dev, "flags updated from %llx to %llx\n", + old_flags, *flags); + } } -static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev) +static unsigned int gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev) { u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL); - unsigned size; + unsigned int size; + + /* TODO move to DC so GMC doesn't need to hard-code DCN registers */ if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) { size = AMDGPU_VBIOS_VGA_ALLOCATION; } else { u32 viewport; - switch (adev->asic_type) { - case CHIP_RAVEN: - case CHIP_RENOIR: + switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) { + case IP_VERSION(1, 0, 0): + case IP_VERSION(1, 0, 1): viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION); size = (REG_GET_FIELD(viewport, HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) * @@ -1115,9 +1355,14 @@ static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev) HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_WIDTH) * 4); break; - case CHIP_VEGA10: - case CHIP_VEGA12: - case CHIP_VEGA20: + case IP_VERSION(2, 1, 0): + viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_DCN2); + size = (REG_GET_FIELD(viewport, + HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) * + REG_GET_FIELD(viewport, + HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_WIDTH) * + 4); + break; default: viewport = RREG32_SOC15(DCE, 0, mmSCL0_VIEWPORT_SIZE); size = (REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_HEIGHT) * @@ -1130,15 +1375,29 @@ static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev) return size; } +static bool gmc_v9_0_need_reset_on_init(struct amdgpu_device *adev) +{ + if (adev->nbio.funcs && adev->nbio.funcs->is_nps_switch_requested && + adev->nbio.funcs->is_nps_switch_requested(adev)) { + adev->gmc.reset_flags |= AMDGPU_GMC_INIT_RESET_NPS; + return true; + } + + return false; +} + static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = { .flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb, .flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid, .emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb, .emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping, - .map_mtype = gmc_v9_0_map_mtype, .get_vm_pde = gmc_v9_0_get_vm_pde, .get_vm_pte = gmc_v9_0_get_vm_pte, + .override_vm_pte_flags = gmc_v9_0_override_vm_pte_flags, .get_vbios_fb_size = gmc_v9_0_get_vbios_fb_size, + .query_mem_partition_mode = &amdgpu_gmc_query_memory_partition, + .request_mem_partition_mode = &amdgpu_gmc_request_memory_partition, + .need_reset_on_init = &gmc_v9_0_need_reset_on_init, }; static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev) @@ -1148,25 +1407,52 @@ static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev) static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev) { - switch (adev->asic_type) { - case CHIP_VEGA10: + switch (amdgpu_ip_version(adev, UMC_HWIP, 0)) { + case IP_VERSION(6, 0, 0): adev->umc.funcs = &umc_v6_0_funcs; break; - case CHIP_VEGA20: + case IP_VERSION(6, 1, 1): adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM; adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM; adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM; adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_VG20; + adev->umc.retire_unit = 1; adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0]; - adev->umc.ras_funcs = &umc_v6_1_ras_funcs; + adev->umc.ras = &umc_v6_1_ras; break; - case CHIP_ARCTURUS: + case IP_VERSION(6, 1, 2): adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM; adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM; adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM; adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_ARCT; + adev->umc.retire_unit = 1; adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0]; - adev->umc.ras_funcs = &umc_v6_1_ras_funcs; + adev->umc.ras = &umc_v6_1_ras; + break; + case IP_VERSION(6, 7, 0): + adev->umc.max_ras_err_cnt_per_query = + UMC_V6_7_TOTAL_CHANNEL_NUM * UMC_V6_7_BAD_PAGE_NUM_PER_CHANNEL; + adev->umc.channel_inst_num = UMC_V6_7_CHANNEL_INSTANCE_NUM; + adev->umc.umc_inst_num = UMC_V6_7_UMC_INSTANCE_NUM; + adev->umc.channel_offs = UMC_V6_7_PER_CHANNEL_OFFSET; + adev->umc.retire_unit = (UMC_V6_7_NA_MAP_PA_NUM * 2); + if (!adev->gmc.xgmi.connected_to_cpu) + adev->umc.ras = &umc_v6_7_ras; + if (1 & adev->smuio.funcs->get_die_id(adev)) + adev->umc.channel_idx_tbl = &umc_v6_7_channel_idx_tbl_first[0][0]; + else + adev->umc.channel_idx_tbl = &umc_v6_7_channel_idx_tbl_second[0][0]; + break; + case IP_VERSION(12, 0, 0): + case IP_VERSION(12, 5, 0): + adev->umc.max_ras_err_cnt_per_query = + UMC_V12_0_TOTAL_CHANNEL_NUM(adev) * UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL; + adev->umc.channel_inst_num = UMC_V12_0_CHANNEL_INSTANCE_NUM; + adev->umc.umc_inst_num = UMC_V12_0_UMC_INSTANCE_NUM; + adev->umc.node_inst_num /= UMC_V12_0_UMC_INSTANCE_NUM; + adev->umc.channel_offs = UMC_V12_0_PER_CHANNEL_OFFSET; + if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) + adev->umc.ras = &umc_v12_0_ras; break; default: break; @@ -1175,13 +1461,17 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev) static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device *adev) { - switch (adev->asic_type) { - case CHIP_ARCTURUS: + switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { + case IP_VERSION(9, 4, 1): adev->mmhub.funcs = &mmhub_v9_4_funcs; break; - case CHIP_ALDEBARAN: + case IP_VERSION(9, 4, 2): adev->mmhub.funcs = &mmhub_v1_7_funcs; break; + case IP_VERSION(1, 8, 0): + case IP_VERSION(1, 8, 1): + adev->mmhub.funcs = &mmhub_v1_8_funcs; + break; default: adev->mmhub.funcs = &mmhub_v1_0_funcs; break; @@ -1190,15 +1480,19 @@ static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device *adev) static void gmc_v9_0_set_mmhub_ras_funcs(struct amdgpu_device *adev) { - switch (adev->asic_type) { - case CHIP_VEGA20: - adev->mmhub.ras_funcs = &mmhub_v1_0_ras_funcs; + switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { + case IP_VERSION(9, 4, 0): + adev->mmhub.ras = &mmhub_v1_0_ras; + break; + case IP_VERSION(9, 4, 1): + adev->mmhub.ras = &mmhub_v9_4_ras; break; - case CHIP_ARCTURUS: - adev->mmhub.ras_funcs = &mmhub_v9_4_ras_funcs; + case IP_VERSION(9, 4, 2): + adev->mmhub.ras = &mmhub_v1_7_ras; break; - case CHIP_ALDEBARAN: - adev->mmhub.ras_funcs = &mmhub_v1_7_ras_funcs; + case IP_VERSION(1, 8, 0): + case IP_VERSION(1, 8, 1): + adev->mmhub.ras = &mmhub_v1_8_ras; break; default: /* mmhub ras is not available */ @@ -1208,28 +1502,111 @@ static void gmc_v9_0_set_mmhub_ras_funcs(struct amdgpu_device *adev) static void gmc_v9_0_set_gfxhub_funcs(struct amdgpu_device *adev) { - adev->gfxhub.funcs = &gfxhub_v1_0_funcs; + if (amdgpu_is_multi_aid(adev)) + adev->gfxhub.funcs = &gfxhub_v1_2_funcs; + else + adev->gfxhub.funcs = &gfxhub_v1_0_funcs; } static void gmc_v9_0_set_hdp_ras_funcs(struct amdgpu_device *adev) { - adev->hdp.ras_funcs = &hdp_v4_0_ras_funcs; + adev->hdp.ras = &hdp_v4_0_ras; +} + +static void gmc_v9_0_set_mca_ras_funcs(struct amdgpu_device *adev) +{ + struct amdgpu_mca *mca = &adev->mca; + + /* is UMC the right IP to check for MCA? Maybe DF? */ + switch (amdgpu_ip_version(adev, UMC_HWIP, 0)) { + case IP_VERSION(6, 7, 0): + if (!adev->gmc.xgmi.connected_to_cpu) { + mca->mp0.ras = &mca_v3_0_mp0_ras; + mca->mp1.ras = &mca_v3_0_mp1_ras; + mca->mpio.ras = &mca_v3_0_mpio_ras; + } + break; + default: + break; + } +} + +static void gmc_v9_0_set_xgmi_ras_funcs(struct amdgpu_device *adev) +{ + if (!adev->gmc.xgmi.connected_to_cpu) + adev->gmc.xgmi.ras = &xgmi_ras; } -static int gmc_v9_0_early_init(void *handle) +static void gmc_v9_0_init_nps_details(struct amdgpu_device *adev) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + enum amdgpu_memory_partition mode; + uint32_t supp_modes; + int i; - if (adev->asic_type == CHIP_VEGA20 || - adev->asic_type == CHIP_ARCTURUS) + adev->gmc.supported_nps_modes = 0; + + if (amdgpu_sriov_vf(adev) || (adev->flags & AMD_IS_APU)) + return; + + mode = amdgpu_gmc_get_memory_partition(adev, &supp_modes); + + /* Mode detected by hardware and supported modes available */ + if ((mode != UNKNOWN_MEMORY_PARTITION_MODE) && supp_modes) { + while ((i = ffs(supp_modes))) { + if (AMDGPU_ALL_NPS_MASK & BIT(i)) + adev->gmc.supported_nps_modes |= BIT(i); + supp_modes &= supp_modes - 1; + } + } else { + /*TODO: Check PSP version also which supports NPS switch. Otherwise keep + * supported modes as 0. + */ + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(9, 4, 3): + case IP_VERSION(9, 4, 4): + adev->gmc.supported_nps_modes = + BIT(AMDGPU_NPS1_PARTITION_MODE) | + BIT(AMDGPU_NPS4_PARTITION_MODE); + break; + default: + break; + } + } +} + +static int gmc_v9_0_early_init(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + + /* + * 9.4.0, 9.4.1 and 9.4.3 don't have XGMI defined + * in their IP discovery tables + */ + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 0) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) || + amdgpu_is_multi_aid(adev)) adev->gmc.xgmi.supported = true; - if (adev->asic_type == CHIP_ALDEBARAN) { + if (amdgpu_ip_version(adev, XGMI_HWIP, 0) == IP_VERSION(6, 1, 0)) { adev->gmc.xgmi.supported = true; adev->gmc.xgmi.connected_to_cpu = adev->smuio.funcs->is_host_gpu_xgmi_supported(adev); } + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) { + enum amdgpu_pkg_type pkg_type = + adev->smuio.funcs->get_pkg_type(adev); + /* On GFXIP 9.4.3. APU, there is no physical VRAM domain present + * and the APU, can be in used two possible modes: + * - carveout mode + * - native APU mode + * "is_app_apu" can be used to identify the APU in the native + * mode. + */ + adev->gmc.is_app_apu = (pkg_type == AMDGPU_PKG_TYPE_APU && + !pci_resource_len(adev->pdev, 0)); + } + gmc_v9_0_set_gmc_funcs(adev); gmc_v9_0_set_irq_funcs(adev); gmc_v9_0_set_umc_funcs(adev); @@ -1237,6 +1614,8 @@ static int gmc_v9_0_early_init(void *handle) gmc_v9_0_set_mmhub_ras_funcs(adev); gmc_v9_0_set_gfxhub_funcs(adev); gmc_v9_0_set_hdp_ras_funcs(adev); + gmc_v9_0_set_mca_ras_funcs(adev); + gmc_v9_0_set_xgmi_ras_funcs(adev); adev->gmc.shared_aperture_start = 0x2000000000000000ULL; adev->gmc.shared_aperture_end = @@ -1244,13 +1623,14 @@ static int gmc_v9_0_early_init(void *handle) adev->gmc.private_aperture_start = 0x1000000000000000ULL; adev->gmc.private_aperture_end = adev->gmc.private_aperture_start + (4ULL << 30) - 1; + adev->gmc.noretry_flags = AMDGPU_VM_NORETRY_FLAGS_TF; return 0; } -static int gmc_v9_0_late_init(void *handle) +static int gmc_v9_0_late_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int r; r = amdgpu_gmc_allocate_vm_inv_eng(adev); @@ -1261,21 +1641,18 @@ static int gmc_v9_0_late_init(void *handle) * Workaround performance drop issue with VBIOS enables partial * writes, while disables HBM ECC for vega10. */ - if (!amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_VEGA10)) { + if (!amdgpu_sriov_vf(adev) && + (amdgpu_ip_version(adev, UMC_HWIP, 0) == IP_VERSION(6, 0, 0))) { if (!(adev->ras_enabled & (1 << AMDGPU_RAS_BLOCK__UMC))) { - if (adev->df.funcs->enable_ecc_force_par_wr_rmw) + if (adev->df.funcs && + adev->df.funcs->enable_ecc_force_par_wr_rmw) adev->df.funcs->enable_ecc_force_par_wr_rmw(adev, false); } } if (!amdgpu_persistent_edc_harvesting_supported(adev)) { - if (adev->mmhub.ras_funcs && - adev->mmhub.ras_funcs->reset_ras_error_count) - adev->mmhub.ras_funcs->reset_ras_error_count(adev); - - if (adev->hdp.ras_funcs && - adev->hdp.ras_funcs->reset_ras_error_count) - adev->hdp.ras_funcs->reset_ras_error_count(adev); + amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__MMHUB); + amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__HDP); } r = amdgpu_gmc_ras_late_init(adev); @@ -1290,14 +1667,17 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev, { u64 base = adev->mmhub.funcs->get_fb_location(adev); + amdgpu_gmc_set_agp_default(adev, mc); + /* add the xgmi offset of the physical node */ base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size; - if (adev->gmc.xgmi.connected_to_cpu) { + if (amdgpu_gmc_is_pdb0_enabled(adev)) { amdgpu_gmc_sysvm_location(adev, mc); } else { amdgpu_gmc_vram_location(adev, mc, base); - amdgpu_gmc_gart_location(adev, mc); - amdgpu_gmc_agp_location(adev, mc); + amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT); + if (!amdgpu_sriov_vf(adev) && (amdgpu_agp == 1)) + amdgpu_gmc_agp_location(adev, mc); } /* base offset of vram pages */ adev->vm_manager.vram_base_offset = adev->gfxhub.funcs->get_mc_fb_offset(adev); @@ -1321,8 +1701,13 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) int r; /* size in MB on si */ - adev->gmc.mc_vram_size = - adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL; + if (!adev->gmc.is_app_apu) { + adev->gmc.mc_vram_size = + adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL; + } else { + DRM_DEBUG("Set mc_vram_size = 0 for APP APU\n"); + adev->gmc.mc_vram_size = 0; + } adev->gmc.real_vram_size = adev->gmc.mc_vram_size; if (!(adev->flags & AMD_IS_APU) && @@ -1347,7 +1732,8 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) */ /* check whether both host-gpu and gpu-gpu xgmi links exist */ - if ((adev->flags & AMD_IS_APU) || + if ((!amdgpu_sriov_vf(adev) && + (adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) || (adev->gmc.xgmi.supported && adev->gmc.xgmi.connected_to_cpu)) { adev->gmc.aper_base = @@ -1358,24 +1744,25 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) } #endif - /* In case the PCI BAR is larger than the actual amount of vram */ adev->gmc.visible_vram_size = adev->gmc.aper_size; - if (adev->gmc.visible_vram_size > adev->gmc.real_vram_size) - adev->gmc.visible_vram_size = adev->gmc.real_vram_size; /* set the gart size */ if (amdgpu_gart_size == -1) { - switch (adev->asic_type) { - case CHIP_VEGA10: /* all engines support GPUVM */ - case CHIP_VEGA12: /* all engines support GPUVM */ - case CHIP_VEGA20: - case CHIP_ARCTURUS: - case CHIP_ALDEBARAN: + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(9, 0, 1): /* all engines support GPUVM */ + case IP_VERSION(9, 2, 1): /* all engines support GPUVM */ + case IP_VERSION(9, 4, 0): + case IP_VERSION(9, 4, 1): + case IP_VERSION(9, 4, 2): + case IP_VERSION(9, 4, 3): + case IP_VERSION(9, 4, 4): + case IP_VERSION(9, 5, 0): default: adev->gmc.gart_size = 512ULL << 20; break; - case CHIP_RAVEN: /* DCE SG support */ - case CHIP_RENOIR: + case IP_VERSION(9, 1, 0): /* DCE SG support */ + case IP_VERSION(9, 2, 2): /* DCE SG support */ + case IP_VERSION(9, 3, 0): adev->gmc.gart_size = 1024ULL << 20; break; } @@ -1399,7 +1786,7 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev) return 0; } - if (adev->gmc.xgmi.connected_to_cpu) { + if (amdgpu_gmc_is_pdb0_enabled(adev)) { adev->gmc.vmid0_page_table_depth = 1; adev->gmc.vmid0_page_table_block_size = 12; } else { @@ -1412,15 +1799,21 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev) if (r) return r; adev->gart.table_size = adev->gart.num_gpu_pages * 8; - adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_VG10(MTYPE_UC) | + adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_UC) | AMDGPU_PTE_EXECUTABLE; - r = amdgpu_gart_table_vram_alloc(adev); - if (r) - return r; + if (!adev->gmc.real_vram_size) { + dev_info(adev->dev, "Put GART in system memory for APU\n"); + r = amdgpu_gart_table_ram_alloc(adev); + if (r) + dev_err(adev->dev, "Failed to allocate GART in system memory\n"); + } else { + r = amdgpu_gart_table_vram_alloc(adev); + if (r) + return r; - if (adev->gmc.xgmi.connected_to_cpu) { - r = amdgpu_gmc_pdb0_alloc(adev); + if (amdgpu_gmc_is_pdb0_enabled(adev)) + r = amdgpu_gmc_pdb0_alloc(adev); } return r; @@ -1436,14 +1829,37 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev) */ static void gmc_v9_0_save_registers(struct amdgpu_device *adev) { - if (adev->asic_type == CHIP_RAVEN) + if ((amdgpu_ip_version(adev, DCE_HWIP, 0) == IP_VERSION(1, 0, 0)) || + (amdgpu_ip_version(adev, DCE_HWIP, 0) == IP_VERSION(1, 0, 1))) adev->gmc.sdpif_register = RREG32_SOC15(DCE, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0); } -static int gmc_v9_0_sw_init(void *handle) +static void gmc_v9_4_3_init_vram_info(struct amdgpu_device *adev) { - int r, vram_width = 0, vram_type = 0, vram_vendor = 0; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + static const u32 regBIF_BIOS_SCRATCH_4 = 0x50; + u32 vram_info; + + adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM; + adev->gmc.vram_width = 128 * 64; + + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)) + adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM3E; + + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) && + adev->rev_id == 0x3) + adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM3E; + + if (!(adev->flags & AMD_IS_APU) && !amdgpu_sriov_vf(adev)) { + vram_info = RREG32(regBIF_BIOS_SCRATCH_4); + adev->gmc.vram_vendor = vram_info & 0xF; + } +} + +static int gmc_v9_0_sw_init(struct amdgpu_ip_block *ip_block) +{ + int r, vram_width = 0, vram_type = 0, vram_vendor = 0, dma_addr_bits; + struct amdgpu_device *adev = ip_block->adev; + unsigned long inst_mask = adev->aid_mask; adev->gfxhub.funcs->init(adev); @@ -1451,35 +1867,51 @@ static int gmc_v9_0_sw_init(void *handle) spin_lock_init(&adev->gmc.invalidate_lock); - r = amdgpu_atomfirmware_get_vram_info(adev, - &vram_width, &vram_type, &vram_vendor); - if (amdgpu_sriov_vf(adev)) - /* For Vega10 SR-IOV, vram_width can't be read from ATOM as RAVEN, - * and DF related registers is not readable, seems hardcord is the - * only way to set the correct vram_width - */ - adev->gmc.vram_width = 2048; - else if (amdgpu_emu_mode != 1) - adev->gmc.vram_width = vram_width; - - if (!adev->gmc.vram_width) { - int chansize, numchan; - - /* hbm memory channel size */ - if (adev->flags & AMD_IS_APU) - chansize = 64; - else - chansize = 128; + if (amdgpu_is_multi_aid(adev)) { + gmc_v9_4_3_init_vram_info(adev); + } else if (!adev->bios) { + if (adev->flags & AMD_IS_APU) { + adev->gmc.vram_type = AMDGPU_VRAM_TYPE_DDR4; + adev->gmc.vram_width = 64 * 64; + } else { + adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM; + adev->gmc.vram_width = 128 * 64; + } + } else { + r = amdgpu_atomfirmware_get_vram_info(adev, + &vram_width, &vram_type, &vram_vendor); + if (amdgpu_sriov_vf(adev)) + /* For Vega10 SR-IOV, vram_width can't be read from ATOM as RAVEN, + * and DF related registers is not readable, seems hardcord is the + * only way to set the correct vram_width + */ + adev->gmc.vram_width = 2048; + else if (amdgpu_emu_mode != 1) + adev->gmc.vram_width = vram_width; + + if (!adev->gmc.vram_width) { + int chansize, numchan; + + /* hbm memory channel size */ + if (adev->flags & AMD_IS_APU) + chansize = 64; + else + chansize = 128; + if (adev->df.funcs && + adev->df.funcs->get_hbm_channel_number) { + numchan = adev->df.funcs->get_hbm_channel_number(adev); + adev->gmc.vram_width = numchan * chansize; + } + } - numchan = adev->df.funcs->get_hbm_channel_number(adev); - adev->gmc.vram_width = numchan * chansize; + adev->gmc.vram_type = vram_type; + adev->gmc.vram_vendor = vram_vendor; } - - adev->gmc.vram_type = vram_type; - adev->gmc.vram_vendor = vram_vendor; - switch (adev->asic_type) { - case CHIP_RAVEN: - adev->num_vmhubs = 2; + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(9, 1, 0): + case IP_VERSION(9, 2, 2): + set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask); + set_bit(AMDGPU_MMHUB0(0), adev->vmhubs_mask); if (adev->rev_id == 0x0 || adev->rev_id == 0x1) { amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); @@ -1490,30 +1922,44 @@ static int gmc_v9_0_sw_init(void *handle) adev->vm_manager.num_level > 1; } break; - case CHIP_VEGA10: - case CHIP_VEGA12: - case CHIP_VEGA20: - case CHIP_RENOIR: - case CHIP_ALDEBARAN: - adev->num_vmhubs = 2; - + case IP_VERSION(9, 0, 1): + case IP_VERSION(9, 2, 1): + case IP_VERSION(9, 4, 0): + case IP_VERSION(9, 3, 0): + case IP_VERSION(9, 4, 2): + set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask); + set_bit(AMDGPU_MMHUB0(0), adev->vmhubs_mask); /* * To fulfill 4-level page support, * vm size is 256TB (48bit), maximum size of Vega10, * block size 512 (9bit) */ - /* sriov restrict max_pfn below AMDGPU_GMC_HOLE */ - if (amdgpu_sriov_vf(adev)) - amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 47); - else - amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); + + amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2)) + adev->gmc.translate_further = adev->vm_manager.num_level > 1; break; - case CHIP_ARCTURUS: - adev->num_vmhubs = 3; + case IP_VERSION(9, 4, 1): + set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask); + set_bit(AMDGPU_MMHUB0(0), adev->vmhubs_mask); + set_bit(AMDGPU_MMHUB1(0), adev->vmhubs_mask); /* Keep the vm size same with Vega20 */ amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); + adev->gmc.translate_further = adev->vm_manager.num_level > 1; + break; + case IP_VERSION(9, 4, 3): + case IP_VERSION(9, 4, 4): + case IP_VERSION(9, 5, 0): + bitmap_set(adev->vmhubs_mask, AMDGPU_GFXHUB(0), + NUM_XCC(adev->gfx.xcc_mask)); + + inst_mask <<= AMDGPU_MMHUB0(0); + bitmap_or(adev->vmhubs_mask, adev->vmhubs_mask, &inst_mask, 32); + + amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); + adev->gmc.translate_further = adev->vm_manager.num_level > 1; break; default: break; @@ -1525,7 +1971,7 @@ static int gmc_v9_0_sw_init(void *handle) if (r) return r; - if (adev->asic_type == CHIP_ARCTURUS) { + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1)) { r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC1, VMC_1_0__SRCID__VM_FAULT, &adev->gmc.vm_fault); if (r) @@ -1539,7 +1985,8 @@ static int gmc_v9_0_sw_init(void *handle) return r; if (!amdgpu_sriov_vf(adev) && - !adev->gmc.xgmi.connected_to_cpu) { + !adev->gmc.xgmi.connected_to_cpu && + !adev->gmc.is_app_apu) { /* interrupt sent to DF. */ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DF, 0, &adev->gmc.ecc_irq); @@ -1553,18 +2000,16 @@ static int gmc_v9_0_sw_init(void *handle) */ adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */ - r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44)); + dma_addr_bits = amdgpu_ip_version(adev, GC_HWIP, 0) >= + IP_VERSION(9, 4, 2) ? + 48 : + 44; + r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(dma_addr_bits)); if (r) { - printk(KERN_WARNING "amdgpu: No suitable DMA available.\n"); + dev_warn(adev->dev, "amdgpu: No suitable DMA available.\n"); return r; } - adev->need_swiotlb = drm_need_swiotlb(44); - - if (adev->gmc.xgmi.supported) { - r = adev->gfxhub.funcs->get_xgmi_info(adev); - if (r) - return r; - } + adev->need_swiotlb = drm_need_swiotlb(dma_addr_bits); r = gmc_v9_0_mc_init(adev); if (r) @@ -1572,6 +2017,12 @@ static int gmc_v9_0_sw_init(void *handle) amdgpu_gmc_get_vbios_allocations(adev); + if (amdgpu_is_multi_aid(adev)) { + r = amdgpu_gmc_init_mem_ranges(adev); + if (r) + return r; + } + /* Memory manager */ r = amdgpu_bo_init(adev); if (r) @@ -1581,6 +2032,7 @@ static int gmc_v9_0_sw_init(void *handle) if (r) return r; + gmc_v9_0_init_nps_details(adev); /* * number of VMs * VMID 0 is reserved for System @@ -1592,39 +2044,59 @@ static int gmc_v9_0_sw_init(void *handle) * for video processing. */ adev->vm_manager.first_kfd_vmid = - (adev->asic_type == CHIP_ARCTURUS || - adev->asic_type == CHIP_ALDEBARAN) ? 3 : 8; + (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) || + amdgpu_is_multi_aid(adev)) ? + 3 : + 8; amdgpu_vm_manager_init(adev); gmc_v9_0_save_registers(adev); + r = amdgpu_gmc_ras_sw_init(adev); + if (r) + return r; + + if (amdgpu_is_multi_aid(adev)) + amdgpu_gmc_sysfs_init(adev); + return 0; } -static int gmc_v9_0_sw_fini(void *handle) +static int gmc_v9_0_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; + + if (amdgpu_is_multi_aid(adev)) + amdgpu_gmc_sysfs_fini(adev); amdgpu_gmc_ras_fini(adev); amdgpu_gem_force_release(adev); amdgpu_vm_manager_fini(adev); - amdgpu_gart_table_vram_free(adev); - amdgpu_bo_unref(&adev->gmc.pdb0_bo); + if (!adev->gmc.real_vram_size) { + dev_info(adev->dev, "Put GART in system memory for APU free\n"); + amdgpu_gart_table_ram_free(adev); + } else { + amdgpu_gart_table_vram_free(adev); + } + amdgpu_bo_free_kernel(&adev->gmc.pdb0_bo, NULL, &adev->gmc.ptr_pdb0); amdgpu_bo_fini(adev); + adev->gmc.num_mem_partitions = 0; + kfree(adev->gmc.mem_partitions); + return 0; } static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev) { - - switch (adev->asic_type) { - case CHIP_VEGA10: + switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { + case IP_VERSION(9, 0, 0): if (amdgpu_sriov_vf(adev)) break; fallthrough; - case CHIP_VEGA20: + case IP_VERSION(9, 4, 0): soc15_program_register_sequence(adev, golden_settings_mmhub_1_0_0, ARRAY_SIZE(golden_settings_mmhub_1_0_0)); @@ -1632,9 +2104,8 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev) golden_settings_athub_1_0_0, ARRAY_SIZE(golden_settings_athub_1_0_0)); break; - case CHIP_VEGA12: - break; - case CHIP_RAVEN: + case IP_VERSION(9, 1, 0): + case IP_VERSION(9, 2, 0): /* TODO for renoir */ soc15_program_register_sequence(adev, golden_settings_athub_1_0_0, @@ -1654,7 +2125,8 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev) */ void gmc_v9_0_restore_registers(struct amdgpu_device *adev) { - if (adev->asic_type == CHIP_RAVEN) { + if ((amdgpu_ip_version(adev, DCE_HWIP, 0) == IP_VERSION(1, 0, 0)) || + (amdgpu_ip_version(adev, DCE_HWIP, 0) == IP_VERSION(1, 0, 1))) { WREG32_SOC15(DCE, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0, adev->gmc.sdpif_register); WARN_ON(adev->gmc.sdpif_register != RREG32_SOC15(DCE, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0)); @@ -1670,7 +2142,7 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) { int r; - if (adev->gmc.xgmi.connected_to_cpu) + if (amdgpu_gmc_is_pdb0_enabled(adev)) amdgpu_gmc_init_pdb0(adev); if (adev->gart.bo == NULL) { @@ -1678,35 +2150,45 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) return -EINVAL; } - r = amdgpu_gart_table_vram_pin(adev); - if (r) - return r; + amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr); - r = adev->gfxhub.funcs->gart_enable(adev); - if (r) - return r; + if (!adev->in_s0ix) { + r = adev->gfxhub.funcs->gart_enable(adev); + if (r) + return r; + } r = adev->mmhub.funcs->gart_enable(adev); if (r) return r; DRM_INFO("PCIE GART of %uM enabled.\n", - (unsigned)(adev->gmc.gart_size >> 20)); + (unsigned int)(adev->gmc.gart_size >> 20)); if (adev->gmc.pdb0_bo) DRM_INFO("PDB0 located at 0x%016llX\n", (unsigned long long)amdgpu_bo_gpu_offset(adev->gmc.pdb0_bo)); DRM_INFO("PTB located at 0x%016llX\n", (unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo)); - adev->gart.ready = true; return 0; } -static int gmc_v9_0_hw_init(void *handle) +static int gmc_v9_0_hw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool value; - int r, i; + int i, r; + + adev->gmc.flush_pasid_uses_kiq = true; + + /* Vega20+XGMI caches PTEs in TC and TLB. Add a heavy-weight TLB flush + * (type 2), which flushes both. Due to a race condition with + * concurrent memory accesses using the same TLB cache line, we still + * need a second TLB flush after this. + */ + adev->gmc.flush_tlb_needs_extra_type_2 = + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 0) && + adev->gmc.xgmi.num_physical_nodes; /* The sequence of these two function calls matters.*/ gmc_v9_0_init_golden_registers(adev); @@ -1724,7 +2206,7 @@ static int gmc_v9_0_hw_init(void *handle) adev->hdp.funcs->init_registers(adev); /* After HDP is initialized, flush HDP.*/ - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) value = false; @@ -1732,18 +2214,27 @@ static int gmc_v9_0_hw_init(void *handle) value = true; if (!amdgpu_sriov_vf(adev)) { - adev->gfxhub.funcs->set_fault_enable_default(adev, value); + if (!adev->in_s0ix) + adev->gfxhub.funcs->set_fault_enable_default(adev, value); adev->mmhub.funcs->set_fault_enable_default(adev, value); } - for (i = 0; i < adev->num_vmhubs; ++i) + for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) { + if (adev->in_s0ix && (i == AMDGPU_GFXHUB(0))) + continue; gmc_v9_0_flush_gpu_tlb(adev, 0, i, 0); + } if (adev->umc.funcs && adev->umc.funcs->init_registers) adev->umc.funcs->init_registers(adev); r = gmc_v9_0_gart_enable(adev); + if (r) + return r; - return r; + if (amdgpu_emu_mode == 1) + return amdgpu_gmc_vram_checking(adev); + + return 0; } /** @@ -1755,14 +2246,16 @@ static int gmc_v9_0_hw_init(void *handle) */ static void gmc_v9_0_gart_disable(struct amdgpu_device *adev) { - adev->gfxhub.funcs->gart_disable(adev); + if (!adev->in_s0ix) + adev->gfxhub.funcs->gart_disable(adev); adev->mmhub.funcs->gart_disable(adev); - amdgpu_gart_table_vram_unpin(adev); } -static int gmc_v9_0_hw_fini(void *handle) +static int gmc_v9_0_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; + + gmc_v9_0_gart_disable(adev); if (amdgpu_sriov_vf(adev)) { /* full access mode, so don't touch any GMC register */ @@ -1770,56 +2263,78 @@ static int gmc_v9_0_hw_fini(void *handle) return 0; } - amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0); - amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0); - gmc_v9_0_gart_disable(adev); + /* + * Pair the operations did in gmc_v9_0_hw_init and thus maintain + * a correct cached state for GMC. Otherwise, the "gate" again + * operation on S3 resuming will fail due to wrong cached state. + */ + if (adev->mmhub.funcs->update_power_gating) + adev->mmhub.funcs->update_power_gating(adev, false); + + /* + * For minimal init, late_init is not called, hence VM fault/RAS irqs + * are not enabled. + */ + if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) { + amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0); + + if (adev->gmc.ecc_irq.funcs && + amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC)) + amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0); + } return 0; } -static int gmc_v9_0_suspend(void *handle) +static int gmc_v9_0_suspend(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - return gmc_v9_0_hw_fini(adev); + return gmc_v9_0_hw_fini(ip_block); } -static int gmc_v9_0_resume(void *handle) +static int gmc_v9_0_resume(struct amdgpu_ip_block *ip_block) { + struct amdgpu_device *adev = ip_block->adev; int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = gmc_v9_0_hw_init(adev); + /* If a reset is done for NPS mode switch, read the memory range + * information again. + */ + if (adev->gmc.reset_flags & AMDGPU_GMC_INIT_RESET_NPS) { + amdgpu_gmc_init_sw_mem_ranges(adev, adev->gmc.mem_partitions); + adev->gmc.reset_flags &= ~AMDGPU_GMC_INIT_RESET_NPS; + } + + r = gmc_v9_0_hw_init(ip_block); if (r) return r; - amdgpu_vmid_reset_all(adev); + amdgpu_vmid_reset_all(ip_block->adev); return 0; } -static bool gmc_v9_0_is_idle(void *handle) +static bool gmc_v9_0_is_idle(struct amdgpu_ip_block *ip_block) { /* MC is always ready in GMC v9.*/ return true; } -static int gmc_v9_0_wait_for_idle(void *handle) +static int gmc_v9_0_wait_for_idle(struct amdgpu_ip_block *ip_block) { /* There is no need to wait for MC idle in GMC v9.*/ return 0; } -static int gmc_v9_0_soft_reset(void *handle) +static int gmc_v9_0_soft_reset(struct amdgpu_ip_block *ip_block) { /* XXX for emulation.*/ return 0; } -static int gmc_v9_0_set_clockgating_state(void *handle, +static int gmc_v9_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; adev->mmhub.funcs->set_clockgating(adev, state); @@ -1828,16 +2343,16 @@ static int gmc_v9_0_set_clockgating_state(void *handle, return 0; } -static void gmc_v9_0_get_clockgating_state(void *handle, u32 *flags) +static void gmc_v9_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; adev->mmhub.funcs->get_clockgating(adev, flags); athub_v1_0_get_clockgating(adev, flags); } -static int gmc_v9_0_set_powergating_state(void *handle, +static int gmc_v9_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; @@ -1861,8 +2376,7 @@ const struct amd_ip_funcs gmc_v9_0_ip_funcs = { .get_clockgating_state = gmc_v9_0_get_clockgating_state, }; -const struct amdgpu_ip_block_version gmc_v9_0_ip_block = -{ +const struct amdgpu_ip_block_version gmc_v9_0_ip_block = { .type = AMD_IP_BLOCK_TYPE_GMC, .major = 9, .minor = 0, |
