diff options
Diffstat (limited to 'drivers/gpu/drm/msm/adreno/a6xx_gpu.c')
| -rw-r--r-- | drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 860 |
1 files changed, 574 insertions, 286 deletions
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 019610341df1..0200a7e71cdf 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -16,6 +16,97 @@ #define GPU_PAS_ID 13 +static u64 read_gmu_ao_counter(struct a6xx_gpu *a6xx_gpu) +{ + u64 count_hi, count_lo, temp; + + do { + count_hi = gmu_read(&a6xx_gpu->gmu, REG_A6XX_GMU_ALWAYS_ON_COUNTER_H); + count_lo = gmu_read(&a6xx_gpu->gmu, REG_A6XX_GMU_ALWAYS_ON_COUNTER_L); + temp = gmu_read(&a6xx_gpu->gmu, REG_A6XX_GMU_ALWAYS_ON_COUNTER_H); + } while (unlikely(count_hi != temp)); + + return (count_hi << 32) | count_lo; +} + +static bool fence_status_check(struct msm_gpu *gpu, u32 offset, u32 value, u32 status, u32 mask) +{ + /* Success if !writedropped0/1 */ + if (!(status & mask)) + return true; + + udelay(10); + + /* Try to update fenced register again */ + gpu_write(gpu, offset, value); + + /* We can't do a posted write here because the power domain could be + * in collapse state. So use the heaviest barrier instead + */ + mb(); + return false; +} + +static int fenced_write(struct a6xx_gpu *a6xx_gpu, u32 offset, u32 value, u32 mask) +{ + struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; + struct msm_gpu *gpu = &adreno_gpu->base; + struct a6xx_gmu *gmu = &a6xx_gpu->gmu; + u32 status; + + gpu_write(gpu, offset, value); + + /* Nothing else to be done in the case of no-GMU */ + if (adreno_has_gmu_wrapper(adreno_gpu)) + return 0; + + /* We can't do a posted write here because the power domain could be + * in collapse state. So use the heaviest barrier instead + */ + mb(); + + if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status, + fence_status_check(gpu, offset, value, status, mask), 0, 1000)) + return 0; + + /* Try again for another 1ms before failing */ + gpu_write(gpu, offset, value); + mb(); + + if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status, + fence_status_check(gpu, offset, value, status, mask), 0, 1000)) { + /* + * The 'delay' warning is here because the pause to print this + * warning will allow gpu to move to power collapse which + * defeats the purpose of continuous polling for 2 ms + */ + dev_err_ratelimited(gmu->dev, "delay in fenced register write (0x%x)\n", + offset); + return 0; + } + + dev_err_ratelimited(gmu->dev, "fenced register write (0x%x) fail\n", + offset); + + return -ETIMEDOUT; +} + +int a6xx_fenced_write(struct a6xx_gpu *a6xx_gpu, u32 offset, u64 value, u32 mask, bool is_64b) +{ + int ret; + + ret = fenced_write(a6xx_gpu, offset, lower_32_bits(value), mask); + if (ret) + return ret; + + if (!is_64b) + return 0; + + ret = fenced_write(a6xx_gpu, offset + 1, upper_32_bits(value), mask); + + return ret; +} + static inline bool _a6xx_check_idle(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); @@ -66,7 +157,7 @@ static void update_shadow_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) } } -static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) +void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); @@ -86,7 +177,7 @@ static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) /* Update HW if this is the current ring and we are not in preempt*/ if (!a6xx_in_preempt(a6xx_gpu)) { if (a6xx_gpu->cur_ring == ring) - gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr); + a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_RB_WPTR, wptr, BIT(0), false); else ring->restore_wptr = true; } else { @@ -111,7 +202,8 @@ static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu, struct msm_ringbuffer *ring, struct msm_gem_submit *submit) { bool sysprof = refcount_read(&a6xx_gpu->base.base.sysprof_active) > 1; - struct msm_file_private *ctx = submit->queue->ctx; + struct msm_context *ctx = submit->queue->ctx; + struct drm_gpuvm *vm = msm_context_vm(submit->dev, ctx); struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; phys_addr_t ttbr; u32 asid; @@ -120,7 +212,7 @@ static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu, if (ctx->seqno == ring->cur_ctx_seqno) return; - if (msm_iommu_pagetable_params(ctx->aspace->mmu, &ttbr, &asid)) + if (msm_iommu_pagetable_params(to_msm_vm(vm)->mmu, &ttbr, &asid)) return; if (adreno_gpu->info->family >= ADRENO_7XX_GEN1) { @@ -130,17 +222,44 @@ static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu, OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence))); OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence))); OUT_RING(ring, submit->seqno - 1); + + OUT_PKT7(ring, CP_THREAD_CONTROL, 1); + OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | CP_SET_THREAD_BOTH); + + /* Reset state used to synchronize BR and BV */ + OUT_PKT7(ring, CP_RESET_CONTEXT_STATE, 1); + OUT_RING(ring, + CP_RESET_CONTEXT_STATE_0_CLEAR_ON_CHIP_TS | + CP_RESET_CONTEXT_STATE_0_CLEAR_RESOURCE_TABLE | + CP_RESET_CONTEXT_STATE_0_CLEAR_BV_BR_COUNTER | + CP_RESET_CONTEXT_STATE_0_RESET_GLOBAL_LOCAL_TS); + + OUT_PKT7(ring, CP_THREAD_CONTROL, 1); + OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | CP_SET_THREAD_BOTH); + + OUT_PKT7(ring, CP_EVENT_WRITE, 1); + OUT_RING(ring, LRZ_FLUSH_INVALIDATE); + + OUT_PKT7(ring, CP_THREAD_CONTROL, 1); + OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | CP_SET_THREAD_BR); } if (!sysprof) { - if (!adreno_is_a7xx(adreno_gpu)) { + if (!(adreno_is_a7xx(adreno_gpu) || adreno_is_a8xx(adreno_gpu))) { /* Turn off protected mode to write to special registers */ OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); OUT_RING(ring, 0); } - OUT_PKT4(ring, REG_A6XX_RBBM_PERFCTR_SRAM_INIT_CMD, 1); - OUT_RING(ring, 1); + if (adreno_is_a8xx(adreno_gpu)) { + OUT_PKT4(ring, REG_A8XX_RBBM_PERFCTR_SRAM_INIT_CMD, 1); + OUT_RING(ring, 1); + OUT_PKT4(ring, REG_A8XX_RBBM_SLICE_PERFCTR_SRAM_INIT_CMD, 1); + OUT_RING(ring, 1); + } else { + OUT_PKT4(ring, REG_A6XX_RBBM_PERFCTR_SRAM_INIT_CMD, 1); + OUT_RING(ring, 1); + } } /* Execute the table update */ @@ -158,8 +277,8 @@ static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu, * Needed for preemption */ OUT_PKT7(ring, CP_MEM_WRITE, 5); - OUT_RING(ring, CP_MEM_WRITE_0_ADDR_LO(lower_32_bits(memptr))); - OUT_RING(ring, CP_MEM_WRITE_1_ADDR_HI(upper_32_bits(memptr))); + OUT_RING(ring, A5XX_CP_MEM_WRITE_ADDR_LO(lower_32_bits(memptr))); + OUT_RING(ring, A5XX_CP_MEM_WRITE_ADDR_HI(upper_32_bits(memptr))); OUT_RING(ring, lower_32_bits(ttbr)); OUT_RING(ring, upper_32_bits(ttbr)); OUT_RING(ring, ctx->seqno); @@ -169,7 +288,7 @@ static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu, * to make sure BV doesn't race ahead while BR is still switching * pagetables. */ - if (adreno_is_a7xx(&a6xx_gpu->base)) { + if (adreno_is_a7xx(&a6xx_gpu->base) || adreno_is_a8xx(&a6xx_gpu->base)) { OUT_PKT7(ring, CP_THREAD_CONTROL, 1); OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | CP_SET_THREAD_BR); } @@ -183,20 +302,22 @@ static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu, OUT_RING(ring, CACHE_INVALIDATE); if (!sysprof) { + u32 reg_status = adreno_is_a8xx(adreno_gpu) ? + REG_A8XX_RBBM_PERFCTR_SRAM_INIT_STATUS : + REG_A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS; /* * Wait for SRAM clear after the pgtable update, so the * two can happen in parallel: */ OUT_PKT7(ring, CP_WAIT_REG_MEM, 6); OUT_RING(ring, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ)); - OUT_RING(ring, CP_WAIT_REG_MEM_1_POLL_ADDR_LO( - REG_A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS)); - OUT_RING(ring, CP_WAIT_REG_MEM_2_POLL_ADDR_HI(0)); + OUT_RING(ring, CP_WAIT_REG_MEM_POLL_ADDR_LO(reg_status)); + OUT_RING(ring, CP_WAIT_REG_MEM_POLL_ADDR_HI(0)); OUT_RING(ring, CP_WAIT_REG_MEM_3_REF(0x1)); OUT_RING(ring, CP_WAIT_REG_MEM_4_MASK(0x1)); OUT_RING(ring, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(0)); - if (!adreno_is_a7xx(adreno_gpu)) { + if (!(adreno_is_a7xx(adreno_gpu) || adreno_is_a8xx(adreno_gpu))) { /* Re-enable protected mode: */ OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); OUT_RING(ring, 1); @@ -212,6 +333,8 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) struct msm_ringbuffer *ring = submit->ring; unsigned int i, ibs = 0; + adreno_check_and_reenable_stall(adreno_gpu); + a6xx_set_pagetable(a6xx_gpu, ring, submit); get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP(0), @@ -242,10 +365,10 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) break; fallthrough; case MSM_SUBMIT_CMD_BUF: - OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3); + OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3); OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); OUT_RING(ring, upper_32_bits(submit->cmd[i].iova)); - OUT_RING(ring, submit->cmd[i].size); + OUT_RING(ring, A5XX_CP_INDIRECT_BUFFER_2_IB_SIZE(submit->cmd[i].size)); ibs++; break; } @@ -267,7 +390,7 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) rbmemptr_stats(ring, index, alwayson_end)); /* Write the fence to the scratch register */ - OUT_PKT4(ring, REG_A6XX_CP_SCRATCH_REG(2), 1); + OUT_PKT4(ring, REG_A6XX_CP_SCRATCH(2), 1); OUT_RING(ring, submit->seqno); /* @@ -281,8 +404,7 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence))); OUT_RING(ring, submit->seqno); - trace_msm_gpu_submit_flush(submit, - gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER)); + trace_msm_gpu_submit_flush(submit, read_gmu_ao_counter(a6xx_gpu)); a6xx_flush(gpu, ring); } @@ -333,8 +455,11 @@ static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); struct msm_ringbuffer *ring = submit->ring; + u32 rbbm_perfctr_cp0, cp_always_on_counter; unsigned int i, ibs = 0; + adreno_check_and_reenable_stall(adreno_gpu); + /* * Toggle concurrent binning for pagetable switch and set the thread to * BR since only it can execute the pagetable switch packets. @@ -351,10 +476,16 @@ static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) if (gpu->nr_rings > 1) a6xx_emit_set_pseudo_reg(ring, a6xx_gpu, submit->queue); - get_stats_counter(ring, REG_A7XX_RBBM_PERFCTR_CP(0), - rbmemptr_stats(ring, index, cpcycles_start)); - get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER, - rbmemptr_stats(ring, index, alwayson_start)); + if (adreno_is_a8xx(adreno_gpu)) { + rbbm_perfctr_cp0 = REG_A8XX_RBBM_PERFCTR_CP(0); + cp_always_on_counter = REG_A8XX_CP_ALWAYS_ON_COUNTER; + } else { + rbbm_perfctr_cp0 = REG_A7XX_RBBM_PERFCTR_CP(0); + cp_always_on_counter = REG_A6XX_CP_ALWAYS_ON_COUNTER; + } + + get_stats_counter(ring, rbbm_perfctr_cp0, rbmemptr_stats(ring, index, cpcycles_start)); + get_stats_counter(ring, cp_always_on_counter, rbmemptr_stats(ring, index, alwayson_start)); OUT_PKT7(ring, CP_THREAD_CONTROL, 1); OUT_RING(ring, CP_SET_THREAD_BOTH); @@ -377,10 +508,10 @@ static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) break; fallthrough; case MSM_SUBMIT_CMD_BUF: - OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3); + OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3); OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); OUT_RING(ring, upper_32_bits(submit->cmd[i].iova)); - OUT_RING(ring, submit->cmd[i].size); + OUT_RING(ring, A5XX_CP_INDIRECT_BUFFER_2_IB_SIZE(submit->cmd[i].size)); ibs++; break; } @@ -401,14 +532,17 @@ static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) OUT_RING(ring, 0x00e); /* IB1LIST end */ } - get_stats_counter(ring, REG_A7XX_RBBM_PERFCTR_CP(0), - rbmemptr_stats(ring, index, cpcycles_end)); - get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER, - rbmemptr_stats(ring, index, alwayson_end)); + get_stats_counter(ring, rbbm_perfctr_cp0, rbmemptr_stats(ring, index, cpcycles_end)); + get_stats_counter(ring, cp_always_on_counter, rbmemptr_stats(ring, index, alwayson_end)); /* Write the fence to the scratch register */ - OUT_PKT4(ring, REG_A6XX_CP_SCRATCH_REG(2), 1); - OUT_RING(ring, submit->seqno); + if (adreno_is_a8xx(adreno_gpu)) { + OUT_PKT4(ring, REG_A8XX_CP_SCRATCH_GLOBAL(2), 1); + OUT_RING(ring, submit->seqno); + } else { + OUT_PKT4(ring, REG_A6XX_CP_SCRATCH(2), 1); + OUT_RING(ring, submit->seqno); + } OUT_PKT7(ring, CP_THREAD_CONTROL, 1); OUT_RING(ring, CP_SET_THREAD_BR); @@ -480,8 +614,7 @@ static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) } - trace_msm_gpu_submit_flush(submit, - gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER)); + trace_msm_gpu_submit_flush(submit, read_gmu_ao_counter(a6xx_gpu)); a6xx_flush(gpu, ring); @@ -504,15 +637,26 @@ static void a6xx_set_hwcg(struct msm_gpu *gpu, bool state) if (adreno_is_a630(adreno_gpu)) clock_cntl_on = 0x8aa8aa02; - else if (adreno_is_a610(adreno_gpu)) + else if (adreno_is_a610(adreno_gpu) || adreno_is_a612(adreno_gpu)) clock_cntl_on = 0xaaa8aa82; else if (adreno_is_a702(adreno_gpu)) clock_cntl_on = 0xaaaaaa82; else clock_cntl_on = 0x8aa8aa82; - cgc_delay = adreno_is_a615_family(adreno_gpu) ? 0x111 : 0x10111; - cgc_hyst = adreno_is_a615_family(adreno_gpu) ? 0x555 : 0x5555; + if (adreno_is_a612(adreno_gpu)) + cgc_delay = 0x11; + else if (adreno_is_a615_family(adreno_gpu)) + cgc_delay = 0x111; + else + cgc_delay = 0x10111; + + if (adreno_is_a612(adreno_gpu)) + cgc_hyst = 0x55; + else if (adreno_is_a615_family(adreno_gpu)) + cgc_hyst = 0x555; + else + cgc_hyst = 0x5555; gmu_write(&a6xx_gpu->gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_MODE_CNTL, state ? adreno_gpu->info->a6xx->gmu_cgc_mode : 0); @@ -585,110 +729,124 @@ static void a6xx_set_cp_protect(struct msm_gpu *gpu) gpu_write(gpu, REG_A6XX_CP_PROTECT(protect->count_max - 1), protect->regs[i]); } -static void a6xx_calc_ubwc_config(struct adreno_gpu *gpu) +static int a6xx_calc_ubwc_config(struct adreno_gpu *gpu) { - gpu->ubwc_config.rgb565_predicator = 0; - gpu->ubwc_config.uavflagprd_inv = 0; - gpu->ubwc_config.min_acc_len = 0; - gpu->ubwc_config.ubwc_swizzle = 0x6; - gpu->ubwc_config.macrotile_mode = 0; - gpu->ubwc_config.highest_bank_bit = 15; + const struct qcom_ubwc_cfg_data *common_cfg; + struct qcom_ubwc_cfg_data *cfg = &gpu->_ubwc_config; + + /* Inherit the common config and make some necessary fixups */ + common_cfg = qcom_ubwc_config_get_data(); + if (IS_ERR(common_cfg)) + return PTR_ERR(common_cfg); + + /* Copy the data into the internal struct to drop the const qualifier (temporarily) */ + *cfg = *common_cfg; + + /* Use common config as is for A8x */ + if (!adreno_is_a8xx(gpu)) { + cfg->ubwc_swizzle = 0x6; + cfg->highest_bank_bit = 15; + } if (adreno_is_a610(gpu)) { - gpu->ubwc_config.highest_bank_bit = 13; - gpu->ubwc_config.min_acc_len = 1; - gpu->ubwc_config.ubwc_swizzle = 0x7; + cfg->highest_bank_bit = 13; + cfg->ubwc_swizzle = 0x7; } + if (adreno_is_a612(gpu)) + cfg->highest_bank_bit = 14; + if (adreno_is_a618(gpu)) - gpu->ubwc_config.highest_bank_bit = 14; + cfg->highest_bank_bit = 14; if (adreno_is_a619(gpu)) /* TODO: Should be 14 but causes corruption at e.g. 1920x1200 on DP */ - gpu->ubwc_config.highest_bank_bit = 13; + cfg->highest_bank_bit = 13; if (adreno_is_a619_holi(gpu)) - gpu->ubwc_config.highest_bank_bit = 13; + cfg->highest_bank_bit = 13; - if (adreno_is_a621(gpu)) { - gpu->ubwc_config.highest_bank_bit = 13; - gpu->ubwc_config.amsbc = 1; - gpu->ubwc_config.uavflagprd_inv = 2; - } - - if (adreno_is_a640_family(gpu)) - gpu->ubwc_config.amsbc = 1; + if (adreno_is_a621(gpu)) + cfg->highest_bank_bit = 13; - if (adreno_is_a680(gpu)) - gpu->ubwc_config.macrotile_mode = 1; + if (adreno_is_a623(gpu)) + cfg->highest_bank_bit = 16; if (adreno_is_a650(gpu) || adreno_is_a660(gpu) || adreno_is_a690(gpu) || adreno_is_a730(gpu) || adreno_is_a740_family(gpu)) { - /* TODO: get ddr type from bootloader and use 2 for LPDDR4 */ - gpu->ubwc_config.highest_bank_bit = 16; - gpu->ubwc_config.amsbc = 1; - gpu->ubwc_config.rgb565_predicator = 1; - gpu->ubwc_config.uavflagprd_inv = 2; - gpu->ubwc_config.macrotile_mode = 1; + /* TODO: get ddr type from bootloader and use 15 for LPDDR4 */ + cfg->highest_bank_bit = 16; } if (adreno_is_a663(gpu)) { - gpu->ubwc_config.highest_bank_bit = 13; - gpu->ubwc_config.amsbc = 1; - gpu->ubwc_config.rgb565_predicator = 1; - gpu->ubwc_config.uavflagprd_inv = 2; - gpu->ubwc_config.macrotile_mode = 1; - gpu->ubwc_config.ubwc_swizzle = 0x4; + cfg->highest_bank_bit = 13; + cfg->ubwc_swizzle = 0x4; } - if (adreno_is_7c3(gpu)) { - gpu->ubwc_config.highest_bank_bit = 14; - gpu->ubwc_config.amsbc = 1; - gpu->ubwc_config.rgb565_predicator = 1; - gpu->ubwc_config.uavflagprd_inv = 2; - gpu->ubwc_config.macrotile_mode = 1; - } + if (adreno_is_7c3(gpu)) + cfg->highest_bank_bit = 14; - if (adreno_is_a702(gpu)) { - gpu->ubwc_config.highest_bank_bit = 14; - gpu->ubwc_config.min_acc_len = 1; - } + if (adreno_is_a702(gpu)) + cfg->highest_bank_bit = 14; + + if (cfg->highest_bank_bit != common_cfg->highest_bank_bit) + DRM_WARN_ONCE("Inconclusive highest_bank_bit value: %u (GPU) vs %u (UBWC_CFG)\n", + cfg->highest_bank_bit, common_cfg->highest_bank_bit); + + if (cfg->ubwc_swizzle != common_cfg->ubwc_swizzle) + DRM_WARN_ONCE("Inconclusive ubwc_swizzle value: %u (GPU) vs %u (UBWC_CFG)\n", + cfg->ubwc_swizzle, common_cfg->ubwc_swizzle); + + gpu->ubwc_config = &gpu->_ubwc_config; + + return 0; } static void a6xx_set_ubwc_config(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + const struct qcom_ubwc_cfg_data *cfg = adreno_gpu->ubwc_config; /* * We subtract 13 from the highest bank bit (13 is the minimum value * allowed by hw) and write the lowest two bits of the remaining value * as hbb_lo and the one above it as hbb_hi to the hardware. */ - BUG_ON(adreno_gpu->ubwc_config.highest_bank_bit < 13); - u32 hbb = adreno_gpu->ubwc_config.highest_bank_bit - 13; + BUG_ON(cfg->highest_bank_bit < 13); + u32 hbb = cfg->highest_bank_bit - 13; + bool rgb565_predicator = cfg->ubwc_enc_version >= UBWC_4_0; + u32 level2_swizzling_dis = !(cfg->ubwc_swizzle & UBWC_SWIZZLE_ENABLE_LVL2); + bool ubwc_mode = qcom_ubwc_get_ubwc_mode(cfg); + bool amsbc = cfg->ubwc_enc_version >= UBWC_3_0; + bool min_acc_len_64b = false; + u8 uavflagprd_inv = 0; u32 hbb_hi = hbb >> 2; u32 hbb_lo = hbb & 3; - u32 ubwc_mode = adreno_gpu->ubwc_config.ubwc_swizzle & 1; - u32 level2_swizzling_dis = !(adreno_gpu->ubwc_config.ubwc_swizzle & 2); + + if (adreno_is_a650_family(adreno_gpu) || adreno_is_a7xx(adreno_gpu)) + uavflagprd_inv = 2; + + if (adreno_is_a610(adreno_gpu) || adreno_is_a702(adreno_gpu)) + min_acc_len_64b = true; gpu_write(gpu, REG_A6XX_RB_NC_MODE_CNTL, level2_swizzling_dis << 12 | - adreno_gpu->ubwc_config.rgb565_predicator << 11 | - hbb_hi << 10 | adreno_gpu->ubwc_config.amsbc << 4 | - adreno_gpu->ubwc_config.min_acc_len << 3 | + rgb565_predicator << 11 | + hbb_hi << 10 | amsbc << 4 | + min_acc_len_64b << 3 | hbb_lo << 1 | ubwc_mode); gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL, level2_swizzling_dis << 6 | hbb_hi << 4 | - adreno_gpu->ubwc_config.min_acc_len << 3 | + min_acc_len_64b << 3 | hbb_lo << 1 | ubwc_mode); gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL, level2_swizzling_dis << 12 | hbb_hi << 10 | - adreno_gpu->ubwc_config.uavflagprd_inv << 4 | - adreno_gpu->ubwc_config.min_acc_len << 3 | + uavflagprd_inv << 4 | + min_acc_len_64b << 3 | hbb_lo << 1 | ubwc_mode); if (adreno_is_a7xx(adreno_gpu)) @@ -696,10 +854,10 @@ static void a6xx_set_ubwc_config(struct msm_gpu *gpu) FIELD_PREP(GENMASK(8, 5), hbb_lo)); gpu_write(gpu, REG_A6XX_UCHE_MODE_CNTL, - adreno_gpu->ubwc_config.min_acc_len << 23 | hbb_lo << 21); + min_acc_len_64b << 23 | hbb_lo << 21); gpu_write(gpu, REG_A6XX_RBBM_NC_MODE_CNTL, - adreno_gpu->ubwc_config.macrotile_mode); + cfg->macrotile_mode); } static void a7xx_patch_pwrup_reglist(struct msm_gpu *gpu) @@ -712,11 +870,10 @@ static void a7xx_patch_pwrup_reglist(struct msm_gpu *gpu) u32 *dest = (u32 *)&lock->regs[0]; int i; - reglist = adreno_gpu->info->a6xx->pwrup_reglist; - lock->gpu_req = lock->cpu_req = lock->turn = 0; - lock->ifpc_list_len = 0; - lock->preemption_list_len = reglist->count; + + reglist = adreno_gpu->info->a6xx->ifpc_reglist; + lock->ifpc_list_len = reglist->count; /* * For each entry in each of the lists, write the offset and the current @@ -727,6 +884,14 @@ static void a7xx_patch_pwrup_reglist(struct msm_gpu *gpu) *dest++ = gpu_read(gpu, reglist->regs[i]); } + reglist = adreno_gpu->info->a6xx->pwrup_reglist; + lock->preemption_list_len = reglist->count; + + for (i = 0; i < reglist->count; i++) { + *dest++ = reglist->regs[i]; + *dest++ = gpu_read(gpu, reglist->regs[i]); + } + /* * The overall register list is composed of * 1. Static IFPC-only registers @@ -870,7 +1035,7 @@ static bool a6xx_ucode_check_version(struct a6xx_gpu *a6xx_gpu, return false; /* A7xx is safe! */ - if (adreno_is_a7xx(adreno_gpu) || adreno_is_a702(adreno_gpu)) + if (adreno_is_a7xx(adreno_gpu) || adreno_is_a702(adreno_gpu) || adreno_is_a8xx(adreno_gpu)) return true; /* @@ -945,7 +1110,7 @@ static int a6xx_ucode_load(struct msm_gpu *gpu) msm_gem_object_set_name(a6xx_gpu->sqe_bo, "sqefw"); if (!a6xx_ucode_check_version(a6xx_gpu, a6xx_gpu->sqe_bo)) { - msm_gem_unpin_iova(a6xx_gpu->sqe_bo, gpu->aspace); + msm_gem_unpin_iova(a6xx_gpu->sqe_bo, gpu->vm); drm_gem_object_put(a6xx_gpu->sqe_bo); a6xx_gpu->sqe_bo = NULL; @@ -953,6 +1118,23 @@ static int a6xx_ucode_load(struct msm_gpu *gpu) } } + if (!a6xx_gpu->aqe_bo && adreno_gpu->fw[ADRENO_FW_AQE]) { + a6xx_gpu->aqe_bo = adreno_fw_create_bo(gpu, + adreno_gpu->fw[ADRENO_FW_AQE], &a6xx_gpu->aqe_iova); + + if (IS_ERR(a6xx_gpu->aqe_bo)) { + int ret = PTR_ERR(a6xx_gpu->aqe_bo); + + a6xx_gpu->aqe_bo = NULL; + DRM_DEV_ERROR(&gpu->pdev->dev, + "Could not allocate AQE ucode: %d\n", ret); + + return ret; + } + + msm_gem_object_set_name(a6xx_gpu->aqe_bo, "aqefw"); + } + /* * Expanded APRIV and targets that support WHERE_AM_I both need a * privileged buffer to store the RPTR shadow @@ -962,7 +1144,7 @@ static int a6xx_ucode_load(struct msm_gpu *gpu) a6xx_gpu->shadow = msm_gem_kernel_new(gpu->dev, sizeof(u32) * gpu->nr_rings, MSM_BO_WC | MSM_BO_MAP_PRIV, - gpu->aspace, &a6xx_gpu->shadow_bo, + gpu->vm, &a6xx_gpu->shadow_bo, &a6xx_gpu->shadow_iova); if (IS_ERR(a6xx_gpu->shadow)) @@ -973,7 +1155,7 @@ static int a6xx_ucode_load(struct msm_gpu *gpu) a6xx_gpu->pwrup_reglist_ptr = msm_gem_kernel_new(gpu->dev, PAGE_SIZE, MSM_BO_WC | MSM_BO_MAP_PRIV, - gpu->aspace, &a6xx_gpu->pwrup_reglist_bo, + gpu->vm, &a6xx_gpu->pwrup_reglist_bo, &a6xx_gpu->pwrup_reglist_iova); if (IS_ERR(a6xx_gpu->pwrup_reglist_ptr)) @@ -984,7 +1166,7 @@ static int a6xx_ucode_load(struct msm_gpu *gpu) return 0; } -static int a6xx_zap_shader_init(struct msm_gpu *gpu) +int a6xx_zap_shader_init(struct msm_gpu *gpu) { static bool loaded; int ret; @@ -1097,17 +1279,20 @@ static int hw_init(struct msm_gpu *gpu) /* enable hardware clockgating */ a6xx_set_hwcg(gpu, true); - /* VBIF/GBIF start*/ - if (adreno_is_a610_family(adreno_gpu) || - adreno_is_a640_family(adreno_gpu) || - adreno_is_a650_family(adreno_gpu) || - adreno_is_a7xx(adreno_gpu)) { + /* For gmuwrapper implementations, do the VBIF/GBIF CX configuration here */ + if (adreno_is_a610_family(adreno_gpu)) { gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE0, 0x00071620); gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE1, 0x00071620); gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE2, 0x00071620); gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE3, 0x00071620); - gpu_write(gpu, REG_A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, - adreno_is_a7xx(adreno_gpu) ? 0x2120212 : 0x3); + } + + if (adreno_is_a610_family(adreno_gpu) || + adreno_is_a640_family(adreno_gpu) || + adreno_is_a650_family(adreno_gpu)) { + gpu_write(gpu, REG_A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x3); + } else if (adreno_is_a7xx(adreno_gpu)) { + gpu_write(gpu, REG_A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x2120212); } else { gpu_write(gpu, REG_A6XX_RBBM_VBIF_CLIENT_QOS_CNTL, 0x3); } @@ -1123,12 +1308,12 @@ static int hw_init(struct msm_gpu *gpu) /* Disable L2 bypass in the UCHE */ if (adreno_is_a7xx(adreno_gpu)) { - gpu_write64(gpu, REG_A6XX_UCHE_TRAP_BASE, 0x0001fffffffff000llu); - gpu_write64(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE, 0x0001fffffffff000llu); + gpu_write64(gpu, REG_A6XX_UCHE_TRAP_BASE, adreno_gpu->uche_trap_base); + gpu_write64(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base); } else { - gpu_write64(gpu, REG_A6XX_UCHE_WRITE_RANGE_MAX, 0x0001ffffffffffc0llu); - gpu_write64(gpu, REG_A6XX_UCHE_TRAP_BASE, 0x0001fffffffff000llu); - gpu_write64(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE, 0x0001fffffffff000llu); + gpu_write64(gpu, REG_A6XX_UCHE_WRITE_RANGE_MAX, adreno_gpu->uche_trap_base + 0xfc0); + gpu_write64(gpu, REG_A6XX_UCHE_TRAP_BASE, adreno_gpu->uche_trap_base); + gpu_write64(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base); } if (!(adreno_is_a650_family(adreno_gpu) || @@ -1162,10 +1347,10 @@ static int hw_init(struct msm_gpu *gpu) } if (adreno_is_a660_family(adreno_gpu)) - gpu_write(gpu, REG_A6XX_CP_LPAC_PROG_FIFO_SIZE, 0x00000020); + gpu_write(gpu, REG_A7XX_CP_LPAC_PROG_FIFO_SIZE, 0x00000020); /* Setting the mem pool size */ - if (adreno_is_a610(adreno_gpu)) { + if (adreno_is_a610(adreno_gpu) || adreno_is_a612(adreno_gpu)) { gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 48); gpu_write(gpu, REG_A6XX_CP_MEM_POOL_DBG_ADDR, 47); } else if (adreno_is_a702(adreno_gpu)) { @@ -1198,7 +1383,8 @@ static int hw_init(struct msm_gpu *gpu) a6xx_set_ubwc_config(gpu); /* Enable fault detection */ - if (adreno_is_a730(adreno_gpu) || + if (adreno_is_a612(adreno_gpu) || + adreno_is_a730(adreno_gpu) || adreno_is_a740_family(adreno_gpu)) gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0xcfffff); else if (adreno_is_a690(adreno_gpu)) @@ -1214,14 +1400,14 @@ static int hw_init(struct msm_gpu *gpu) /* Set weights for bicubic filtering */ if (adreno_is_a650_family(adreno_gpu) || adreno_is_x185(adreno_gpu)) { - gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_0, 0); - gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_1, + gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(0), 0); + gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(1), 0x3fe05ff4); - gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_2, + gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(2), 0x3fa0ebee); - gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_3, + gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(3), 0x3f5193ed); - gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_4, + gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(4), 0x3f0243f0); } @@ -1417,25 +1603,25 @@ static void a6xx_recover(struct msm_gpu *gpu) struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); struct a6xx_gmu *gmu = &a6xx_gpu->gmu; - int i, active_submits; + int active_submits; adreno_dump_info(gpu); - for (i = 0; i < 8; i++) - DRM_DEV_INFO(&gpu->pdev->dev, "CP_SCRATCH_REG%d: %u\n", i, - gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(i))); + if (a6xx_gmu_gx_is_on(&a6xx_gpu->gmu)) { + /* Sometimes crashstate capture is skipped, so SQE should be halted here again */ + gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 3); + + if (hang_debug) + a6xx_dump(gpu); - if (hang_debug) - a6xx_dump(gpu); + } /* * To handle recovery specific sequences during the rpm suspend we are * about to trigger */ - a6xx_gpu->hung = true; - /* Halt SQE first */ - gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 3); + a6xx_gpu->hung = true; pm_runtime_dont_use_autosuspend(&gpu->pdev->dev); @@ -1449,9 +1635,9 @@ static void a6xx_recover(struct msm_gpu *gpu) */ gpu->active_submits = 0; - if (adreno_has_gmu_wrapper(adreno_gpu)) { + if (adreno_has_gmu_wrapper(adreno_gpu) || adreno_has_rgmu(adreno_gpu)) { /* Drain the outstanding traffic on memory buses */ - a6xx_bus_clear_pending_transactions(adreno_gpu, true); + adreno_gpu->funcs->bus_halt(adreno_gpu, true); /* Reset the GPU to a clean state */ a6xx_gpu_sw_reset(gpu, true); @@ -1610,10 +1796,10 @@ static int a6xx_fault_handler(void *arg, unsigned long iova, int flags, void *da const char *block = "unknown"; u32 scratch[] = { - gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(4)), - gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(5)), - gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(6)), - gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(7)), + gpu_read(gpu, REG_A6XX_CP_SCRATCH(4)), + gpu_read(gpu, REG_A6XX_CP_SCRATCH(5)), + gpu_read(gpu, REG_A6XX_CP_SCRATCH(6)), + gpu_read(gpu, REG_A6XX_CP_SCRATCH(7)), }; if (info) @@ -1666,8 +1852,6 @@ static void a6xx_cp_hw_err_irq(struct msm_gpu *gpu) static void a6xx_fault_detect_irq(struct msm_gpu *gpu) { - struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); - struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu); /* @@ -1679,13 +1863,6 @@ static void a6xx_fault_detect_irq(struct msm_gpu *gpu) if (gpu_read(gpu, REG_A6XX_RBBM_STATUS3) & A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT) return; - /* - * Force the GPU to stay on until after we finish - * collecting information - */ - if (!adreno_has_gmu_wrapper(adreno_gpu)) - gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, 1); - DRM_DEV_ERROR(&gpu->pdev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n", ring ? ring->id : -1, ring ? ring->fctx->last_fence : 0, @@ -1698,7 +1875,10 @@ static void a6xx_fault_detect_irq(struct msm_gpu *gpu) gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE)); /* Turn off the hangcheck timer to keep it from bothering us */ - del_timer(&gpu->hangcheck_timer); + timer_delete(&gpu->hangcheck_timer); + + /* Turn off interrupts to avoid triggering recovery again */ + gpu_write(gpu, REG_A6XX_RBBM_INT_0_MASK, 0); kthread_queue_work(gpu->worker, &gpu->recover_work); } @@ -1718,15 +1898,55 @@ static void a7xx_sw_fuse_violation_irq(struct msm_gpu *gpu) */ if (status & (A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING | A7XX_CX_MISC_SW_FUSE_VALUE_LPAC)) { - del_timer(&gpu->hangcheck_timer); + timer_delete(&gpu->hangcheck_timer); kthread_queue_work(gpu->worker, &gpu->recover_work); } } +static void a6xx_gpu_keepalive_vote(struct msm_gpu *gpu, bool on) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + + if (adreno_has_gmu_wrapper(adreno_gpu)) + return; + + gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, on); +} + +static int irq_poll_fence(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + struct a6xx_gmu *gmu = &a6xx_gpu->gmu; + u32 status; + + if (adreno_has_gmu_wrapper(adreno_gpu)) + return 0; + + if (gmu_poll_timeout_atomic(gmu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, status, !status, 1, 100)) { + u32 rbbm_unmasked = gmu_read(gmu, REG_A6XX_GMU_RBBM_INT_UNMASKED_STATUS); + + dev_err_ratelimited(&gpu->pdev->dev, + "irq fence poll timeout, fence_ctrl=0x%x, unmasked_status=0x%x\n", + status, rbbm_unmasked); + return -ETIMEDOUT; + } + + return 0; +} + static irqreturn_t a6xx_irq(struct msm_gpu *gpu) { struct msm_drm_private *priv = gpu->dev->dev_private; + + /* Set keepalive vote to avoid power collapse after RBBM_INT_0_STATUS is read */ + a6xx_gpu_keepalive_vote(gpu, true); + + if (irq_poll_fence(gpu)) + goto done; + u32 status = gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS); gpu_write(gpu, REG_A6XX_RBBM_INT_CLEAR_CMD, status); @@ -1763,6 +1983,9 @@ static irqreturn_t a6xx_irq(struct msm_gpu *gpu) if (status & A6XX_RBBM_INT_0_MASK_CP_SW) a6xx_preempt_irq(gpu); +done: + a6xx_gpu_keepalive_vote(gpu, false); + return IRQ_HANDLED; } @@ -1908,7 +2131,7 @@ static int a7xx_cx_mem_init(struct a6xx_gpu *a6xx_gpu) u32 fuse_val; int ret; - if (adreno_is_a750(adreno_gpu)) { + if (adreno_is_a750(adreno_gpu) || adreno_is_a8xx(adreno_gpu)) { /* * Assume that if qcom scm isn't available, that whatever * replacement allows writing the fuse register ourselves. @@ -1934,9 +2157,9 @@ static int a7xx_cx_mem_init(struct a6xx_gpu *a6xx_gpu) return ret; /* - * On a750 raytracing may be disabled by the firmware, find out - * whether that's the case. The scm call above sets the fuse - * register. + * On A7XX_GEN3 and newer, raytracing may be disabled by the + * firmware, find out whether that's the case. The scm call + * above sets the fuse register. */ fuse_val = a6xx_llc_read(a6xx_gpu, REG_A7XX_CX_MISC_SW_FUSE_VALUE); @@ -1997,7 +2220,7 @@ void a6xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu, bool gx_ void a6xx_gpu_sw_reset(struct msm_gpu *gpu, bool assert) { /* 11nm chips (e.g. ones with A610) have hw issues with the reset line! */ - if (adreno_is_a610(to_adreno_gpu(gpu))) + if (adreno_is_a610(to_adreno_gpu(gpu)) || adreno_is_a8xx(to_adreno_gpu(gpu))) return; gpu_write(gpu, REG_A6XX_RBBM_SW_RESET_CMD, assert); @@ -2028,7 +2251,12 @@ static int a6xx_gmu_pm_resume(struct msm_gpu *gpu) msm_devfreq_resume(gpu); - adreno_is_a7xx(adreno_gpu) ? a7xx_llc_activate(a6xx_gpu) : a6xx_llc_activate(a6xx_gpu); + if (adreno_is_a8xx(adreno_gpu)) + a8xx_llc_activate(a6xx_gpu); + else if (adreno_is_a7xx(adreno_gpu)) + a7xx_llc_activate(a6xx_gpu); + else + a6xx_llc_activate(a6xx_gpu); return ret; } @@ -2065,6 +2293,12 @@ static int a6xx_pm_resume(struct msm_gpu *gpu) if (ret) goto err_bulk_clk; + ret = clk_bulk_prepare_enable(gmu->nr_clocks, gmu->clocks); + if (ret) { + clk_bulk_disable_unprepare(gpu->nr_clocks, gpu->grp_clks); + goto err_bulk_clk; + } + if (adreno_is_a619_holi(adreno_gpu)) a6xx_sptprac_enable(gmu); @@ -2078,8 +2312,10 @@ err_bulk_clk: err_set_opp: mutex_unlock(&a6xx_gpu->gmu.lock); - if (!ret) + if (!ret) { msm_devfreq_resume(gpu); + a6xx_llc_activate(a6xx_gpu); + } return ret; } @@ -2120,17 +2356,20 @@ static int a6xx_pm_suspend(struct msm_gpu *gpu) trace_msm_gpu_suspend(0); + a6xx_llc_deactivate(a6xx_gpu); + msm_devfreq_suspend(gpu); mutex_lock(&a6xx_gpu->gmu.lock); /* Drain the outstanding traffic on memory buses */ - a6xx_bus_clear_pending_transactions(adreno_gpu, true); + adreno_gpu->funcs->bus_halt(adreno_gpu, true); if (adreno_is_a619_holi(adreno_gpu)) a6xx_sptprac_disable(gmu); clk_bulk_disable_unprepare(gpu->nr_clocks, gpu->grp_clks); + clk_bulk_disable_unprepare(gmu->nr_clocks, gmu->clocks); pm_runtime_put_sync(gmu->gxpd); dev_pm_opp_set_opp(&gpu->pdev->dev, NULL); @@ -2152,16 +2391,7 @@ static int a6xx_gmu_get_timestamp(struct msm_gpu *gpu, uint64_t *value) struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); - mutex_lock(&a6xx_gpu->gmu.lock); - - /* Force the GPU power on so we can read this register */ - a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET); - - *value = gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER); - - a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET); - - mutex_unlock(&a6xx_gpu->gmu.lock); + *value = read_gmu_ao_counter(a6xx_gpu); return 0; } @@ -2186,12 +2416,17 @@ static void a6xx_destroy(struct msm_gpu *gpu) struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); if (a6xx_gpu->sqe_bo) { - msm_gem_unpin_iova(a6xx_gpu->sqe_bo, gpu->aspace); + msm_gem_unpin_iova(a6xx_gpu->sqe_bo, gpu->vm); drm_gem_object_put(a6xx_gpu->sqe_bo); } + if (a6xx_gpu->aqe_bo) { + msm_gem_unpin_iova(a6xx_gpu->aqe_bo, gpu->vm); + drm_gem_object_put(a6xx_gpu->aqe_bo); + } + if (a6xx_gpu->shadow_bo) { - msm_gem_unpin_iova(a6xx_gpu->shadow_bo, gpu->aspace); + msm_gem_unpin_iova(a6xx_gpu->shadow_bo, gpu->vm); drm_gem_object_put(a6xx_gpu->shadow_bo); } @@ -2231,8 +2466,8 @@ static void a6xx_gpu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp, mutex_unlock(&a6xx_gpu->gmu.lock); } -static struct msm_gem_address_space * -a6xx_create_address_space(struct msm_gpu *gpu, struct platform_device *pdev) +static struct drm_gpuvm * +a6xx_create_vm(struct msm_gpu *gpu, struct platform_device *pdev) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); @@ -2246,22 +2481,21 @@ a6xx_create_address_space(struct msm_gpu *gpu, struct platform_device *pdev) !device_iommu_capable(&pdev->dev, IOMMU_CAP_CACHE_COHERENCY)) quirks |= IO_PGTABLE_QUIRK_ARM_OUTER_WBWA; - return adreno_iommu_create_address_space(gpu, pdev, quirks); + return adreno_iommu_create_vm(gpu, pdev, quirks); } -static struct msm_gem_address_space * -a6xx_create_private_address_space(struct msm_gpu *gpu) +static struct drm_gpuvm * +a6xx_create_private_vm(struct msm_gpu *gpu, bool kernel_managed) { struct msm_mmu *mmu; - mmu = msm_iommu_pagetable_create(gpu->aspace->mmu); + mmu = msm_iommu_pagetable_create(to_msm_vm(gpu->vm)->mmu, kernel_managed); if (IS_ERR(mmu)) return ERR_CAST(mmu); - return msm_gem_address_space_create(mmu, - "gpu", 0x100000000ULL, - adreno_private_address_space_size(gpu)); + return msm_gem_vm_create(gpu->dev, mmu, "gpu", ADRENO_VM_START, + adreno_private_vm_size(gpu), kernel_managed); } static uint32_t a6xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) @@ -2272,18 +2506,36 @@ static uint32_t a6xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) if (adreno_gpu->base.hw_apriv || a6xx_gpu->has_whereami) return a6xx_gpu->shadow[ring->id]; + /* + * This is true only on an A6XX_GEN1 with GMU, has IFPC enabled and a super old SQE firmware + * without 'whereami' support + */ + WARN_ONCE((to_adreno_gpu(gpu)->info->quirks & ADRENO_QUIRK_IFPC), + "Can't read CP_RB_RPTR register reliably\n"); + return ring->memptrs->rptr = gpu_read(gpu, REG_A6XX_CP_RB_RPTR); } static bool a6xx_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring) { - struct msm_cp_state cp_state = { + struct msm_cp_state cp_state; + bool progress; + + /* + * With IFPC, KMD doesn't know whether GX power domain is collapsed + * or not. So, we can't blindly read the below registers in GX domain. + * Lets trust the hang detection in HW and lie to the caller that + * there was progress. + */ + if (to_adreno_gpu(gpu)->info->quirks & ADRENO_QUIRK_IFPC) + return true; + + cp_state = (struct msm_cp_state) { .ib1_base = gpu_read64(gpu, REG_A6XX_CP_IB1_BASE), .ib2_base = gpu_read64(gpu, REG_A6XX_CP_IB2_BASE), .ib1_rem = gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE), .ib2_rem = gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE), }; - bool progress; /* * Adjust the remaining data to account for what has already been @@ -2355,7 +2607,105 @@ static int a6xx_set_supported_hw(struct device *dev, const struct adreno_info *i return 0; } -static const struct adreno_gpu_funcs funcs = { +static struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) +{ + struct msm_drm_private *priv = dev->dev_private; + struct platform_device *pdev = priv->gpu_pdev; + struct adreno_platform_config *config = pdev->dev.platform_data; + struct device_node *node; + struct a6xx_gpu *a6xx_gpu; + struct adreno_gpu *adreno_gpu; + struct msm_gpu *gpu; + extern int enable_preemption; + bool is_a7xx; + int ret, nr_rings = 1; + + a6xx_gpu = kzalloc(sizeof(*a6xx_gpu), GFP_KERNEL); + if (!a6xx_gpu) + return ERR_PTR(-ENOMEM); + + adreno_gpu = &a6xx_gpu->base; + gpu = &adreno_gpu->base; + + mutex_init(&a6xx_gpu->gmu.lock); + + adreno_gpu->registers = NULL; + + /* Check if there is a GMU phandle and set it up */ + node = of_parse_phandle(pdev->dev.of_node, "qcom,gmu", 0); + /* FIXME: How do we gracefully handle this? */ + BUG_ON(!node); + + adreno_gpu->gmu_is_wrapper = of_device_is_compatible(node, "qcom,adreno-gmu-wrapper"); + + adreno_gpu->base.hw_apriv = + !!(config->info->quirks & ADRENO_QUIRK_HAS_HW_APRIV); + + /* gpu->info only gets assigned in adreno_gpu_init(). A8x is included intentionally */ + is_a7xx = config->info->family >= ADRENO_7XX_GEN1; + + a6xx_llc_slices_init(pdev, a6xx_gpu, is_a7xx); + + ret = a6xx_set_supported_hw(&pdev->dev, config->info); + if (ret) { + a6xx_llc_slices_destroy(a6xx_gpu); + kfree(a6xx_gpu); + return ERR_PTR(ret); + } + + if ((enable_preemption == 1) || (enable_preemption == -1 && + (config->info->quirks & ADRENO_QUIRK_PREEMPTION))) + nr_rings = 4; + + ret = adreno_gpu_init(dev, pdev, adreno_gpu, config->info->funcs, nr_rings); + if (ret) { + a6xx_destroy(&(a6xx_gpu->base.base)); + return ERR_PTR(ret); + } + + /* + * For now only clamp to idle freq for devices where this is known not + * to cause power supply issues: + */ + if (adreno_is_a618(adreno_gpu) || adreno_is_7c3(adreno_gpu)) + priv->gpu_clamp_to_idle = true; + + if (adreno_has_gmu_wrapper(adreno_gpu) || adreno_has_rgmu(adreno_gpu)) + ret = a6xx_gmu_wrapper_init(a6xx_gpu, node); + else + ret = a6xx_gmu_init(a6xx_gpu, node); + of_node_put(node); + if (ret) { + a6xx_destroy(&(a6xx_gpu->base.base)); + return ERR_PTR(ret); + } + + if (adreno_is_a7xx(adreno_gpu) || adreno_is_a8xx(adreno_gpu)) { + ret = a7xx_cx_mem_init(a6xx_gpu); + if (ret) { + a6xx_destroy(&(a6xx_gpu->base.base)); + return ERR_PTR(ret); + } + } + + adreno_gpu->uche_trap_base = 0x1fffffffff000ull; + + msm_mmu_set_fault_handler(to_msm_vm(gpu->vm)->mmu, gpu, + adreno_gpu->funcs->mmu_fault_handler); + + ret = a6xx_calc_ubwc_config(adreno_gpu); + if (ret) { + a6xx_destroy(&(a6xx_gpu->base.base)); + return ERR_PTR(ret); + } + + /* Set up the preemption specific bits and pieces for each ringbuffer */ + a6xx_preempt_init(gpu); + + return gpu; +} + +const struct adreno_gpu_funcs a6xx_gpu_funcs = { .base = { .get_param = adreno_get_param, .set_param = adreno_set_param, @@ -2378,15 +2728,18 @@ static const struct adreno_gpu_funcs funcs = { .gpu_state_get = a6xx_gpu_state_get, .gpu_state_put = a6xx_gpu_state_put, #endif - .create_address_space = a6xx_create_address_space, - .create_private_address_space = a6xx_create_private_address_space, + .create_vm = a6xx_create_vm, + .create_private_vm = a6xx_create_private_vm, .get_rptr = a6xx_get_rptr, .progress = a6xx_progress, }, + .init = a6xx_gpu_init, .get_timestamp = a6xx_gmu_get_timestamp, + .bus_halt = a6xx_bus_clear_pending_transactions, + .mmu_fault_handler = a6xx_fault_handler, }; -static const struct adreno_gpu_funcs funcs_gmuwrapper = { +const struct adreno_gpu_funcs a6xx_gmuwrapper_funcs = { .base = { .get_param = adreno_get_param, .set_param = adreno_set_param, @@ -2407,15 +2760,18 @@ static const struct adreno_gpu_funcs funcs_gmuwrapper = { .gpu_state_get = a6xx_gpu_state_get, .gpu_state_put = a6xx_gpu_state_put, #endif - .create_address_space = a6xx_create_address_space, - .create_private_address_space = a6xx_create_private_address_space, + .create_vm = a6xx_create_vm, + .create_private_vm = a6xx_create_private_vm, .get_rptr = a6xx_get_rptr, .progress = a6xx_progress, }, + .init = a6xx_gpu_init, .get_timestamp = a6xx_get_timestamp, + .bus_halt = a6xx_bus_clear_pending_transactions, + .mmu_fault_handler = a6xx_fault_handler, }; -static const struct adreno_gpu_funcs funcs_a7xx = { +const struct adreno_gpu_funcs a7xx_gpu_funcs = { .base = { .get_param = adreno_get_param, .set_param = adreno_set_param, @@ -2438,108 +2794,40 @@ static const struct adreno_gpu_funcs funcs_a7xx = { .gpu_state_get = a6xx_gpu_state_get, .gpu_state_put = a6xx_gpu_state_put, #endif - .create_address_space = a6xx_create_address_space, - .create_private_address_space = a6xx_create_private_address_space, + .create_vm = a6xx_create_vm, + .create_private_vm = a6xx_create_private_vm, .get_rptr = a6xx_get_rptr, .progress = a6xx_progress, }, + .init = a6xx_gpu_init, .get_timestamp = a6xx_gmu_get_timestamp, + .bus_halt = a6xx_bus_clear_pending_transactions, + .mmu_fault_handler = a6xx_fault_handler, }; -struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) -{ - struct msm_drm_private *priv = dev->dev_private; - struct platform_device *pdev = priv->gpu_pdev; - struct adreno_platform_config *config = pdev->dev.platform_data; - struct device_node *node; - struct a6xx_gpu *a6xx_gpu; - struct adreno_gpu *adreno_gpu; - struct msm_gpu *gpu; - extern int enable_preemption; - bool is_a7xx; - int ret; - - a6xx_gpu = kzalloc(sizeof(*a6xx_gpu), GFP_KERNEL); - if (!a6xx_gpu) - return ERR_PTR(-ENOMEM); - - adreno_gpu = &a6xx_gpu->base; - gpu = &adreno_gpu->base; - - mutex_init(&a6xx_gpu->gmu.lock); - - adreno_gpu->registers = NULL; - - /* Check if there is a GMU phandle and set it up */ - node = of_parse_phandle(pdev->dev.of_node, "qcom,gmu", 0); - /* FIXME: How do we gracefully handle this? */ - BUG_ON(!node); - - adreno_gpu->gmu_is_wrapper = of_device_is_compatible(node, "qcom,adreno-gmu-wrapper"); - - adreno_gpu->base.hw_apriv = - !!(config->info->quirks & ADRENO_QUIRK_HAS_HW_APRIV); - - /* gpu->info only gets assigned in adreno_gpu_init() */ - is_a7xx = config->info->family == ADRENO_7XX_GEN1 || - config->info->family == ADRENO_7XX_GEN2 || - config->info->family == ADRENO_7XX_GEN3; - - a6xx_llc_slices_init(pdev, a6xx_gpu, is_a7xx); - - ret = a6xx_set_supported_hw(&pdev->dev, config->info); - if (ret) { - a6xx_llc_slices_destroy(a6xx_gpu); - kfree(a6xx_gpu); - return ERR_PTR(ret); - } - - if ((enable_preemption == 1) || (enable_preemption == -1 && - (config->info->quirks & ADRENO_QUIRK_PREEMPTION))) - ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs_a7xx, 4); - else if (is_a7xx) - ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs_a7xx, 1); - else if (adreno_has_gmu_wrapper(adreno_gpu)) - ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs_gmuwrapper, 1); - else - ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1); - if (ret) { - a6xx_destroy(&(a6xx_gpu->base.base)); - return ERR_PTR(ret); - } - - /* - * For now only clamp to idle freq for devices where this is known not - * to cause power supply issues: - */ - if (adreno_is_a618(adreno_gpu) || adreno_is_7c3(adreno_gpu)) - priv->gpu_clamp_to_idle = true; - - if (adreno_has_gmu_wrapper(adreno_gpu)) - ret = a6xx_gmu_wrapper_init(a6xx_gpu, node); - else - ret = a6xx_gmu_init(a6xx_gpu, node); - of_node_put(node); - if (ret) { - a6xx_destroy(&(a6xx_gpu->base.base)); - return ERR_PTR(ret); - } - - if (adreno_is_a7xx(adreno_gpu)) { - ret = a7xx_cx_mem_init(a6xx_gpu); - if (ret) { - a6xx_destroy(&(a6xx_gpu->base.base)); - return ERR_PTR(ret); - } - } - - if (gpu->aspace) - msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, - a6xx_fault_handler); - - a6xx_calc_ubwc_config(adreno_gpu); - /* Set up the preemption specific bits and pieces for each ringbuffer */ - a6xx_preempt_init(gpu); - - return gpu; -} +const struct adreno_gpu_funcs a8xx_gpu_funcs = { + .base = { + .get_param = adreno_get_param, + .set_param = adreno_set_param, + .hw_init = a8xx_hw_init, + .ucode_load = a6xx_ucode_load, + .pm_suspend = a6xx_gmu_pm_suspend, + .pm_resume = a6xx_gmu_pm_resume, + .recover = a8xx_recover, + .submit = a7xx_submit, + .active_ring = a6xx_active_ring, + .irq = a8xx_irq, + .destroy = a6xx_destroy, + .gpu_busy = a8xx_gpu_busy, + .gpu_get_freq = a6xx_gmu_get_freq, + .gpu_set_freq = a6xx_gpu_set_freq, + .create_vm = a6xx_create_vm, + .create_private_vm = a6xx_create_private_vm, + .get_rptr = a6xx_get_rptr, + .progress = a8xx_progress, + }, + .init = a6xx_gpu_init, + .get_timestamp = a8xx_gmu_get_timestamp, + .bus_halt = a8xx_bus_clear_pending_transactions, + .mmu_fault_handler = a8xx_fault_handler, +}; |
