summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c270
1 files changed, 85 insertions, 185 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index fa87a85e1017..d8a4fddab9c1 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -51,8 +51,6 @@
#include "athub_v2_0.h"
#include "athub_v2_1.h"
-#include "amdgpu_reset.h"
-
static int gmc_v10_0_ecc_interrupt_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *src,
unsigned int type,
@@ -145,11 +143,15 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev,
* the new fast GRBM interface.
*/
if ((entry->vmid_src == AMDGPU_GFXHUB(0)) &&
- (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 3, 0)))
+ (amdgpu_ip_version(adev, GC_HWIP, 0) <
+ IP_VERSION(10, 3, 0)))
RREG32(hub->vm_l2_pro_fault_status);
status = RREG32(hub->vm_l2_pro_fault_status);
WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
+
+ amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status,
+ entry->vmid_src ? AMDGPU_MMHUB0(0) : AMDGPU_GFXHUB(0));
}
if (!printk_ratelimit())
@@ -230,20 +232,47 @@ static bool gmc_v10_0_get_atc_vmid_pasid_mapping_info(
* by the amdgpu vm/hsa code.
*/
-static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
- unsigned int vmhub, uint32_t flush_type)
+/**
+ * gmc_v10_0_flush_gpu_tlb - gart tlb flush callback
+ *
+ * @adev: amdgpu_device pointer
+ * @vmid: vm instance to flush
+ * @vmhub: vmhub type
+ * @flush_type: the flush type
+ *
+ * Flush the TLB for the requested page table.
+ */
+static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
+ uint32_t vmhub, uint32_t flush_type)
{
bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(adev, vmhub);
struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type);
- u32 tmp;
/* Use register 17 for GART */
const unsigned int eng = 17;
- unsigned int i;
unsigned char hub_ip = 0;
+ u32 sem, req, ack;
+ unsigned int i;
+ u32 tmp;
+
+ sem = hub->vm_inv_eng0_sem + hub->eng_distance * eng;
+ req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
+ ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
+
+ /* flush hdp cache */
+ adev->hdp.funcs->flush_hdp(adev, NULL);
+
+ /* For SRIOV run time, driver shouldn't access the register through MMIO
+ * Directly use kiq to do the vm invalidation instead
+ */
+ if (adev->gfx.kiq[0].ring.sched.ready && !adev->enable_mes &&
+ (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
+ amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
+ 1 << vmid);
+ return;
+ }
- hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ?
- GC_HWIP : MMHUB_HWIP;
+ hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ? GC_HWIP : MMHUB_HWIP;
spin_lock(&adev->gmc.invalidate_lock);
/*
@@ -257,9 +286,7 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
if (use_semaphore) {
for (i = 0; i < adev->usec_timeout; i++) {
/* a read return value of 1 means semaphore acuqire */
- tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem +
- hub->eng_distance * eng, hub_ip);
-
+ tmp = RREG32_RLC_NO_KIQ(sem, hub_ip);
if (tmp & 0x1)
break;
udelay(1);
@@ -269,24 +296,19 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
}
- WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req +
- hub->eng_distance * eng,
- inv_req, hub_ip);
+ WREG32_RLC_NO_KIQ(req, inv_req, hub_ip);
/*
* Issue a dummy read to wait for the ACK register to be cleared
* to avoid a false ACK due to the new fast GRBM interface.
*/
if ((vmhub == AMDGPU_GFXHUB(0)) &&
- (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 3, 0)))
- RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req +
- hub->eng_distance * eng, hub_ip);
+ (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 3, 0)))
+ RREG32_RLC_NO_KIQ(req, hub_ip);
/* Wait for ACK with a delay.*/
for (i = 0; i < adev->usec_timeout; i++) {
- tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_ack +
- hub->eng_distance * eng, hub_ip);
-
+ tmp = RREG32_RLC_NO_KIQ(ack, hub_ip);
tmp &= 1 << vmid;
if (tmp)
break;
@@ -296,109 +318,13 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
if (use_semaphore)
- /*
- * add semaphore release after invalidation,
- * write with 0 means semaphore release
- */
- WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem +
- hub->eng_distance * eng, 0, hub_ip);
+ WREG32_RLC_NO_KIQ(sem, 0, hub_ip);
spin_unlock(&adev->gmc.invalidate_lock);
- if (i < adev->usec_timeout)
- return;
-
- DRM_ERROR("Timeout waiting for VM flush hub: %d!\n", vmhub);
-}
-
-/**
- * gmc_v10_0_flush_gpu_tlb - gart tlb flush callback
- *
- * @adev: amdgpu_device pointer
- * @vmid: vm instance to flush
- * @vmhub: vmhub type
- * @flush_type: the flush type
- *
- * Flush the TLB for the requested page table.
- */
-static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
- uint32_t vmhub, uint32_t flush_type)
-{
- struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
- struct dma_fence *fence;
- struct amdgpu_job *job;
-
- int r;
-
- /* flush hdp cache */
- adev->hdp.funcs->flush_hdp(adev, NULL);
-
- /* For SRIOV run time, driver shouldn't access the register through MMIO
- * Directly use kiq to do the vm invalidation instead
- */
- if (adev->gfx.kiq[0].ring.sched.ready && !adev->enable_mes &&
- (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
- down_read_trylock(&adev->reset_domain->sem)) {
- struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
- const unsigned int eng = 17;
- u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type);
- u32 req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
- u32 ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
-
- amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
- 1 << vmid);
-
- up_read(&adev->reset_domain->sem);
- return;
- }
-
- mutex_lock(&adev->mman.gtt_window_lock);
-
- if (vmhub == AMDGPU_MMHUB0(0)) {
- gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB0(0), 0);
- mutex_unlock(&adev->mman.gtt_window_lock);
- return;
- }
-
- BUG_ON(vmhub != AMDGPU_GFXHUB(0));
-
- if (!adev->mman.buffer_funcs_enabled ||
- !adev->ib_pool_ready ||
- amdgpu_in_reset(adev) ||
- ring->sched.ready == false) {
- gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB(0), 0);
- mutex_unlock(&adev->mman.gtt_window_lock);
- return;
- }
-
- /* The SDMA on Navi has a bug which can theoretically result in memory
- * corruption if an invalidation happens at the same time as an VA
- * translation. Avoid this by doing the invalidation from the SDMA
- * itself.
- */
- r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.high_pr,
- AMDGPU_FENCE_OWNER_UNDEFINED,
- 16 * 4, AMDGPU_IB_POOL_IMMEDIATE,
- &job);
- if (r)
- goto error_alloc;
-
- job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
- job->vm_needs_flush = true;
- job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop;
- amdgpu_ring_pad_ib(ring, &job->ibs[0]);
- fence = amdgpu_job_submit(job);
-
- mutex_unlock(&adev->mman.gtt_window_lock);
-
- dma_fence_wait(fence, false);
- dma_fence_put(fence);
-
- return;
-
-error_alloc:
- mutex_unlock(&adev->mman.gtt_window_lock);
- DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r);
+ if (i >= adev->usec_timeout)
+ dev_err(adev->dev, "Timeout waiting for VM flush hub: %d!\n",
+ vmhub);
}
/**
@@ -412,62 +338,31 @@ error_alloc:
*
* Flush the TLB for the requested pasid.
*/
-static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
- uint16_t pasid, uint32_t flush_type,
- bool all_hub, uint32_t inst)
+static void gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
+ uint16_t pasid, uint32_t flush_type,
+ bool all_hub, uint32_t inst)
{
+ uint16_t queried;
int vmid, i;
- signed long r;
- uint32_t seq;
- uint16_t queried_pasid;
- bool ret;
- u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout;
- struct amdgpu_ring *ring = &adev->gfx.kiq[0].ring;
- struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
-
- if (amdgpu_emu_mode == 0 && ring->sched.ready) {
- spin_lock(&adev->gfx.kiq[0].ring_lock);
- /* 2 dwords flush + 8 dwords fence */
- amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8);
- kiq->pmf->kiq_invalidate_tlbs(ring,
- pasid, flush_type, all_hub);
- r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
- if (r) {
- amdgpu_ring_undo(ring);
- spin_unlock(&adev->gfx.kiq[0].ring_lock);
- return -ETIME;
- }
-
- amdgpu_ring_commit(ring);
- spin_unlock(&adev->gfx.kiq[0].ring_lock);
- r = amdgpu_fence_wait_polling(ring, seq, usec_timeout);
- if (r < 1) {
- dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
- return -ETIME;
- }
-
- return 0;
- }
for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
-
- ret = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
- &queried_pasid);
- if (ret && queried_pasid == pasid) {
- if (all_hub) {
- for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS)
- gmc_v10_0_flush_gpu_tlb(adev, vmid,
- i, flush_type);
- } else {
- gmc_v10_0_flush_gpu_tlb(adev, vmid,
- AMDGPU_GFXHUB(0), flush_type);
- }
- if (!adev->enable_mes)
- break;
+ bool valid;
+
+ valid = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
+ &queried);
+ if (!valid || queried != pasid)
+ continue;
+
+ if (all_hub) {
+ for_each_set_bit(i, adev->vmhubs_mask,
+ AMDGPU_MAX_VMHUBS)
+ gmc_v10_0_flush_gpu_tlb(adev, vmid, i,
+ flush_type);
+ } else {
+ gmc_v10_0_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB(0),
+ flush_type);
}
}
-
- return 0;
}
static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
@@ -634,6 +529,7 @@ static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev,
}
if (bo && bo->flags & (AMDGPU_GEM_CREATE_COHERENT |
+ AMDGPU_GEM_CREATE_EXT_COHERENT |
AMDGPU_GEM_CREATE_UNCACHED))
*flags = (*flags & ~AMDGPU_PTE_MTYPE_NV10_MASK) |
AMDGPU_PTE_MTYPE_NV10(MTYPE_UC);
@@ -680,7 +576,7 @@ static void gmc_v10_0_set_gmc_funcs(struct amdgpu_device *adev)
static void gmc_v10_0_set_umc_funcs(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[UMC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, UMC_HWIP, 0)) {
case IP_VERSION(8, 7, 0):
adev->umc.max_ras_err_cnt_per_query = UMC_V8_7_TOTAL_CHANNEL_NUM;
adev->umc.channel_inst_num = UMC_V8_7_CHANNEL_INSTANCE_NUM;
@@ -697,7 +593,7 @@ static void gmc_v10_0_set_umc_funcs(struct amdgpu_device *adev)
static void gmc_v10_0_set_mmhub_funcs(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[MMHUB_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
case IP_VERSION(2, 3, 0):
case IP_VERSION(2, 4, 0):
case IP_VERSION(2, 4, 1):
@@ -711,7 +607,7 @@ static void gmc_v10_0_set_mmhub_funcs(struct amdgpu_device *adev)
static void gmc_v10_0_set_gfxhub_funcs(struct amdgpu_device *adev)
{
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 3, 0):
case IP_VERSION(10, 3, 2):
case IP_VERSION(10, 3, 1):
@@ -777,8 +673,9 @@ static void gmc_v10_0_vram_gtt_location(struct amdgpu_device *adev,
base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
amdgpu_gmc_vram_location(adev, &adev->gmc, base);
- amdgpu_gmc_gart_location(adev, mc);
- amdgpu_gmc_agp_location(adev, mc);
+ amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT);
+ if (!amdgpu_sriov_vf(adev))
+ amdgpu_gmc_agp_location(adev, mc);
/* base offset of vram pages */
adev->vm_manager.vram_base_offset = adev->gfxhub.funcs->get_mc_fb_offset(adev);
@@ -825,7 +722,7 @@ static int gmc_v10_0_mc_init(struct amdgpu_device *adev)
/* set the gart size */
if (amdgpu_gart_size == -1) {
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
default:
adev->gmc.gart_size = 512ULL << 20;
break;
@@ -892,7 +789,7 @@ static int gmc_v10_0_sw_init(void *handle)
adev->gmc.vram_vendor = vram_vendor;
}
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 3, 0):
adev->gmc.mall_size = 128 * 1024 * 1024;
break;
@@ -910,7 +807,7 @@ static int gmc_v10_0_sw_init(void *handle)
break;
}
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 1, 10):
case IP_VERSION(10, 1, 1):
case IP_VERSION(10, 1, 2):
@@ -1084,8 +981,10 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev)
static int gmc_v10_0_hw_init(void *handle)
{
- int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ adev->gmc.flush_pasid_uses_kiq = !amdgpu_emu_mode;
/* The sequence of these two function calls matters.*/
gmc_v10_0_init_golden_registers(adev);
@@ -1195,7 +1094,8 @@ static int gmc_v10_0_set_clockgating_state(void *handle,
* is a new problem observed at DF 3.0.3, however with the same suspend sequence not
* seen any issue on the DF 3.0.2 series platform.
*/
- if (adev->in_s0ix && adev->ip_versions[DF_HWIP][0] > IP_VERSION(3, 0, 2)) {
+ if (adev->in_s0ix &&
+ amdgpu_ip_version(adev, DF_HWIP, 0) > IP_VERSION(3, 0, 2)) {
dev_dbg(adev->dev, "keep mmhub clock gating being enabled for s0ix\n");
return 0;
}
@@ -1204,7 +1104,7 @@ static int gmc_v10_0_set_clockgating_state(void *handle,
if (r)
return r;
- if (adev->ip_versions[ATHUB_HWIP][0] >= IP_VERSION(2, 1, 0))
+ if (amdgpu_ip_version(adev, ATHUB_HWIP, 0) >= IP_VERSION(2, 1, 0))
return athub_v2_1_set_clockgating(adev, state);
else
return athub_v2_0_set_clockgating(adev, state);
@@ -1214,13 +1114,13 @@ static void gmc_v10_0_get_clockgating_state(void *handle, u64 *flags)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 3) ||
- adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 4))
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 3) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 4))
return;
adev->mmhub.funcs->get_clockgating(adev, flags);
- if (adev->ip_versions[ATHUB_HWIP][0] >= IP_VERSION(2, 1, 0))
+ if (amdgpu_ip_version(adev, ATHUB_HWIP, 0) >= IP_VERSION(2, 1, 0))
athub_v2_1_get_clockgating(adev, flags);
else
athub_v2_0_get_clockgating(adev, flags);