summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/msm/adreno/a6xx_gpu.c')
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_gpu.c860
1 files changed, 574 insertions, 286 deletions
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index 019610341df1..0200a7e71cdf 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -16,6 +16,97 @@
#define GPU_PAS_ID 13
+static u64 read_gmu_ao_counter(struct a6xx_gpu *a6xx_gpu)
+{
+ u64 count_hi, count_lo, temp;
+
+ do {
+ count_hi = gmu_read(&a6xx_gpu->gmu, REG_A6XX_GMU_ALWAYS_ON_COUNTER_H);
+ count_lo = gmu_read(&a6xx_gpu->gmu, REG_A6XX_GMU_ALWAYS_ON_COUNTER_L);
+ temp = gmu_read(&a6xx_gpu->gmu, REG_A6XX_GMU_ALWAYS_ON_COUNTER_H);
+ } while (unlikely(count_hi != temp));
+
+ return (count_hi << 32) | count_lo;
+}
+
+static bool fence_status_check(struct msm_gpu *gpu, u32 offset, u32 value, u32 status, u32 mask)
+{
+ /* Success if !writedropped0/1 */
+ if (!(status & mask))
+ return true;
+
+ udelay(10);
+
+ /* Try to update fenced register again */
+ gpu_write(gpu, offset, value);
+
+ /* We can't do a posted write here because the power domain could be
+ * in collapse state. So use the heaviest barrier instead
+ */
+ mb();
+ return false;
+}
+
+static int fenced_write(struct a6xx_gpu *a6xx_gpu, u32 offset, u32 value, u32 mask)
+{
+ struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
+ struct msm_gpu *gpu = &adreno_gpu->base;
+ struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
+ u32 status;
+
+ gpu_write(gpu, offset, value);
+
+ /* Nothing else to be done in the case of no-GMU */
+ if (adreno_has_gmu_wrapper(adreno_gpu))
+ return 0;
+
+ /* We can't do a posted write here because the power domain could be
+ * in collapse state. So use the heaviest barrier instead
+ */
+ mb();
+
+ if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status,
+ fence_status_check(gpu, offset, value, status, mask), 0, 1000))
+ return 0;
+
+ /* Try again for another 1ms before failing */
+ gpu_write(gpu, offset, value);
+ mb();
+
+ if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status,
+ fence_status_check(gpu, offset, value, status, mask), 0, 1000)) {
+ /*
+ * The 'delay' warning is here because the pause to print this
+ * warning will allow gpu to move to power collapse which
+ * defeats the purpose of continuous polling for 2 ms
+ */
+ dev_err_ratelimited(gmu->dev, "delay in fenced register write (0x%x)\n",
+ offset);
+ return 0;
+ }
+
+ dev_err_ratelimited(gmu->dev, "fenced register write (0x%x) fail\n",
+ offset);
+
+ return -ETIMEDOUT;
+}
+
+int a6xx_fenced_write(struct a6xx_gpu *a6xx_gpu, u32 offset, u64 value, u32 mask, bool is_64b)
+{
+ int ret;
+
+ ret = fenced_write(a6xx_gpu, offset, lower_32_bits(value), mask);
+ if (ret)
+ return ret;
+
+ if (!is_64b)
+ return 0;
+
+ ret = fenced_write(a6xx_gpu, offset + 1, upper_32_bits(value), mask);
+
+ return ret;
+}
+
static inline bool _a6xx_check_idle(struct msm_gpu *gpu)
{
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
@@ -66,7 +157,7 @@ static void update_shadow_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
}
}
-static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
+void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
{
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
@@ -86,7 +177,7 @@ static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
/* Update HW if this is the current ring and we are not in preempt*/
if (!a6xx_in_preempt(a6xx_gpu)) {
if (a6xx_gpu->cur_ring == ring)
- gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr);
+ a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_RB_WPTR, wptr, BIT(0), false);
else
ring->restore_wptr = true;
} else {
@@ -111,7 +202,8 @@ static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu,
struct msm_ringbuffer *ring, struct msm_gem_submit *submit)
{
bool sysprof = refcount_read(&a6xx_gpu->base.base.sysprof_active) > 1;
- struct msm_file_private *ctx = submit->queue->ctx;
+ struct msm_context *ctx = submit->queue->ctx;
+ struct drm_gpuvm *vm = msm_context_vm(submit->dev, ctx);
struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
phys_addr_t ttbr;
u32 asid;
@@ -120,7 +212,7 @@ static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu,
if (ctx->seqno == ring->cur_ctx_seqno)
return;
- if (msm_iommu_pagetable_params(ctx->aspace->mmu, &ttbr, &asid))
+ if (msm_iommu_pagetable_params(to_msm_vm(vm)->mmu, &ttbr, &asid))
return;
if (adreno_gpu->info->family >= ADRENO_7XX_GEN1) {
@@ -130,17 +222,44 @@ static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu,
OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
OUT_RING(ring, submit->seqno - 1);
+
+ OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
+ OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | CP_SET_THREAD_BOTH);
+
+ /* Reset state used to synchronize BR and BV */
+ OUT_PKT7(ring, CP_RESET_CONTEXT_STATE, 1);
+ OUT_RING(ring,
+ CP_RESET_CONTEXT_STATE_0_CLEAR_ON_CHIP_TS |
+ CP_RESET_CONTEXT_STATE_0_CLEAR_RESOURCE_TABLE |
+ CP_RESET_CONTEXT_STATE_0_CLEAR_BV_BR_COUNTER |
+ CP_RESET_CONTEXT_STATE_0_RESET_GLOBAL_LOCAL_TS);
+
+ OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
+ OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | CP_SET_THREAD_BOTH);
+
+ OUT_PKT7(ring, CP_EVENT_WRITE, 1);
+ OUT_RING(ring, LRZ_FLUSH_INVALIDATE);
+
+ OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
+ OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | CP_SET_THREAD_BR);
}
if (!sysprof) {
- if (!adreno_is_a7xx(adreno_gpu)) {
+ if (!(adreno_is_a7xx(adreno_gpu) || adreno_is_a8xx(adreno_gpu))) {
/* Turn off protected mode to write to special registers */
OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
OUT_RING(ring, 0);
}
- OUT_PKT4(ring, REG_A6XX_RBBM_PERFCTR_SRAM_INIT_CMD, 1);
- OUT_RING(ring, 1);
+ if (adreno_is_a8xx(adreno_gpu)) {
+ OUT_PKT4(ring, REG_A8XX_RBBM_PERFCTR_SRAM_INIT_CMD, 1);
+ OUT_RING(ring, 1);
+ OUT_PKT4(ring, REG_A8XX_RBBM_SLICE_PERFCTR_SRAM_INIT_CMD, 1);
+ OUT_RING(ring, 1);
+ } else {
+ OUT_PKT4(ring, REG_A6XX_RBBM_PERFCTR_SRAM_INIT_CMD, 1);
+ OUT_RING(ring, 1);
+ }
}
/* Execute the table update */
@@ -158,8 +277,8 @@ static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu,
* Needed for preemption
*/
OUT_PKT7(ring, CP_MEM_WRITE, 5);
- OUT_RING(ring, CP_MEM_WRITE_0_ADDR_LO(lower_32_bits(memptr)));
- OUT_RING(ring, CP_MEM_WRITE_1_ADDR_HI(upper_32_bits(memptr)));
+ OUT_RING(ring, A5XX_CP_MEM_WRITE_ADDR_LO(lower_32_bits(memptr)));
+ OUT_RING(ring, A5XX_CP_MEM_WRITE_ADDR_HI(upper_32_bits(memptr)));
OUT_RING(ring, lower_32_bits(ttbr));
OUT_RING(ring, upper_32_bits(ttbr));
OUT_RING(ring, ctx->seqno);
@@ -169,7 +288,7 @@ static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu,
* to make sure BV doesn't race ahead while BR is still switching
* pagetables.
*/
- if (adreno_is_a7xx(&a6xx_gpu->base)) {
+ if (adreno_is_a7xx(&a6xx_gpu->base) || adreno_is_a8xx(&a6xx_gpu->base)) {
OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | CP_SET_THREAD_BR);
}
@@ -183,20 +302,22 @@ static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu,
OUT_RING(ring, CACHE_INVALIDATE);
if (!sysprof) {
+ u32 reg_status = adreno_is_a8xx(adreno_gpu) ?
+ REG_A8XX_RBBM_PERFCTR_SRAM_INIT_STATUS :
+ REG_A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS;
/*
* Wait for SRAM clear after the pgtable update, so the
* two can happen in parallel:
*/
OUT_PKT7(ring, CP_WAIT_REG_MEM, 6);
OUT_RING(ring, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ));
- OUT_RING(ring, CP_WAIT_REG_MEM_1_POLL_ADDR_LO(
- REG_A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS));
- OUT_RING(ring, CP_WAIT_REG_MEM_2_POLL_ADDR_HI(0));
+ OUT_RING(ring, CP_WAIT_REG_MEM_POLL_ADDR_LO(reg_status));
+ OUT_RING(ring, CP_WAIT_REG_MEM_POLL_ADDR_HI(0));
OUT_RING(ring, CP_WAIT_REG_MEM_3_REF(0x1));
OUT_RING(ring, CP_WAIT_REG_MEM_4_MASK(0x1));
OUT_RING(ring, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(0));
- if (!adreno_is_a7xx(adreno_gpu)) {
+ if (!(adreno_is_a7xx(adreno_gpu) || adreno_is_a8xx(adreno_gpu))) {
/* Re-enable protected mode: */
OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
OUT_RING(ring, 1);
@@ -212,6 +333,8 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
struct msm_ringbuffer *ring = submit->ring;
unsigned int i, ibs = 0;
+ adreno_check_and_reenable_stall(adreno_gpu);
+
a6xx_set_pagetable(a6xx_gpu, ring, submit);
get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP(0),
@@ -242,10 +365,10 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
break;
fallthrough;
case MSM_SUBMIT_CMD_BUF:
- OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
+ OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3);
OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
- OUT_RING(ring, submit->cmd[i].size);
+ OUT_RING(ring, A5XX_CP_INDIRECT_BUFFER_2_IB_SIZE(submit->cmd[i].size));
ibs++;
break;
}
@@ -267,7 +390,7 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
rbmemptr_stats(ring, index, alwayson_end));
/* Write the fence to the scratch register */
- OUT_PKT4(ring, REG_A6XX_CP_SCRATCH_REG(2), 1);
+ OUT_PKT4(ring, REG_A6XX_CP_SCRATCH(2), 1);
OUT_RING(ring, submit->seqno);
/*
@@ -281,8 +404,7 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
OUT_RING(ring, submit->seqno);
- trace_msm_gpu_submit_flush(submit,
- gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER));
+ trace_msm_gpu_submit_flush(submit, read_gmu_ao_counter(a6xx_gpu));
a6xx_flush(gpu, ring);
}
@@ -333,8 +455,11 @@ static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
struct msm_ringbuffer *ring = submit->ring;
+ u32 rbbm_perfctr_cp0, cp_always_on_counter;
unsigned int i, ibs = 0;
+ adreno_check_and_reenable_stall(adreno_gpu);
+
/*
* Toggle concurrent binning for pagetable switch and set the thread to
* BR since only it can execute the pagetable switch packets.
@@ -351,10 +476,16 @@ static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
if (gpu->nr_rings > 1)
a6xx_emit_set_pseudo_reg(ring, a6xx_gpu, submit->queue);
- get_stats_counter(ring, REG_A7XX_RBBM_PERFCTR_CP(0),
- rbmemptr_stats(ring, index, cpcycles_start));
- get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER,
- rbmemptr_stats(ring, index, alwayson_start));
+ if (adreno_is_a8xx(adreno_gpu)) {
+ rbbm_perfctr_cp0 = REG_A8XX_RBBM_PERFCTR_CP(0);
+ cp_always_on_counter = REG_A8XX_CP_ALWAYS_ON_COUNTER;
+ } else {
+ rbbm_perfctr_cp0 = REG_A7XX_RBBM_PERFCTR_CP(0);
+ cp_always_on_counter = REG_A6XX_CP_ALWAYS_ON_COUNTER;
+ }
+
+ get_stats_counter(ring, rbbm_perfctr_cp0, rbmemptr_stats(ring, index, cpcycles_start));
+ get_stats_counter(ring, cp_always_on_counter, rbmemptr_stats(ring, index, alwayson_start));
OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
OUT_RING(ring, CP_SET_THREAD_BOTH);
@@ -377,10 +508,10 @@ static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
break;
fallthrough;
case MSM_SUBMIT_CMD_BUF:
- OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
+ OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3);
OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
- OUT_RING(ring, submit->cmd[i].size);
+ OUT_RING(ring, A5XX_CP_INDIRECT_BUFFER_2_IB_SIZE(submit->cmd[i].size));
ibs++;
break;
}
@@ -401,14 +532,17 @@ static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
OUT_RING(ring, 0x00e); /* IB1LIST end */
}
- get_stats_counter(ring, REG_A7XX_RBBM_PERFCTR_CP(0),
- rbmemptr_stats(ring, index, cpcycles_end));
- get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER,
- rbmemptr_stats(ring, index, alwayson_end));
+ get_stats_counter(ring, rbbm_perfctr_cp0, rbmemptr_stats(ring, index, cpcycles_end));
+ get_stats_counter(ring, cp_always_on_counter, rbmemptr_stats(ring, index, alwayson_end));
/* Write the fence to the scratch register */
- OUT_PKT4(ring, REG_A6XX_CP_SCRATCH_REG(2), 1);
- OUT_RING(ring, submit->seqno);
+ if (adreno_is_a8xx(adreno_gpu)) {
+ OUT_PKT4(ring, REG_A8XX_CP_SCRATCH_GLOBAL(2), 1);
+ OUT_RING(ring, submit->seqno);
+ } else {
+ OUT_PKT4(ring, REG_A6XX_CP_SCRATCH(2), 1);
+ OUT_RING(ring, submit->seqno);
+ }
OUT_PKT7(ring, CP_THREAD_CONTROL, 1);
OUT_RING(ring, CP_SET_THREAD_BR);
@@ -480,8 +614,7 @@ static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
}
- trace_msm_gpu_submit_flush(submit,
- gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER));
+ trace_msm_gpu_submit_flush(submit, read_gmu_ao_counter(a6xx_gpu));
a6xx_flush(gpu, ring);
@@ -504,15 +637,26 @@ static void a6xx_set_hwcg(struct msm_gpu *gpu, bool state)
if (adreno_is_a630(adreno_gpu))
clock_cntl_on = 0x8aa8aa02;
- else if (adreno_is_a610(adreno_gpu))
+ else if (adreno_is_a610(adreno_gpu) || adreno_is_a612(adreno_gpu))
clock_cntl_on = 0xaaa8aa82;
else if (adreno_is_a702(adreno_gpu))
clock_cntl_on = 0xaaaaaa82;
else
clock_cntl_on = 0x8aa8aa82;
- cgc_delay = adreno_is_a615_family(adreno_gpu) ? 0x111 : 0x10111;
- cgc_hyst = adreno_is_a615_family(adreno_gpu) ? 0x555 : 0x5555;
+ if (adreno_is_a612(adreno_gpu))
+ cgc_delay = 0x11;
+ else if (adreno_is_a615_family(adreno_gpu))
+ cgc_delay = 0x111;
+ else
+ cgc_delay = 0x10111;
+
+ if (adreno_is_a612(adreno_gpu))
+ cgc_hyst = 0x55;
+ else if (adreno_is_a615_family(adreno_gpu))
+ cgc_hyst = 0x555;
+ else
+ cgc_hyst = 0x5555;
gmu_write(&a6xx_gpu->gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_MODE_CNTL,
state ? adreno_gpu->info->a6xx->gmu_cgc_mode : 0);
@@ -585,110 +729,124 @@ static void a6xx_set_cp_protect(struct msm_gpu *gpu)
gpu_write(gpu, REG_A6XX_CP_PROTECT(protect->count_max - 1), protect->regs[i]);
}
-static void a6xx_calc_ubwc_config(struct adreno_gpu *gpu)
+static int a6xx_calc_ubwc_config(struct adreno_gpu *gpu)
{
- gpu->ubwc_config.rgb565_predicator = 0;
- gpu->ubwc_config.uavflagprd_inv = 0;
- gpu->ubwc_config.min_acc_len = 0;
- gpu->ubwc_config.ubwc_swizzle = 0x6;
- gpu->ubwc_config.macrotile_mode = 0;
- gpu->ubwc_config.highest_bank_bit = 15;
+ const struct qcom_ubwc_cfg_data *common_cfg;
+ struct qcom_ubwc_cfg_data *cfg = &gpu->_ubwc_config;
+
+ /* Inherit the common config and make some necessary fixups */
+ common_cfg = qcom_ubwc_config_get_data();
+ if (IS_ERR(common_cfg))
+ return PTR_ERR(common_cfg);
+
+ /* Copy the data into the internal struct to drop the const qualifier (temporarily) */
+ *cfg = *common_cfg;
+
+ /* Use common config as is for A8x */
+ if (!adreno_is_a8xx(gpu)) {
+ cfg->ubwc_swizzle = 0x6;
+ cfg->highest_bank_bit = 15;
+ }
if (adreno_is_a610(gpu)) {
- gpu->ubwc_config.highest_bank_bit = 13;
- gpu->ubwc_config.min_acc_len = 1;
- gpu->ubwc_config.ubwc_swizzle = 0x7;
+ cfg->highest_bank_bit = 13;
+ cfg->ubwc_swizzle = 0x7;
}
+ if (adreno_is_a612(gpu))
+ cfg->highest_bank_bit = 14;
+
if (adreno_is_a618(gpu))
- gpu->ubwc_config.highest_bank_bit = 14;
+ cfg->highest_bank_bit = 14;
if (adreno_is_a619(gpu))
/* TODO: Should be 14 but causes corruption at e.g. 1920x1200 on DP */
- gpu->ubwc_config.highest_bank_bit = 13;
+ cfg->highest_bank_bit = 13;
if (adreno_is_a619_holi(gpu))
- gpu->ubwc_config.highest_bank_bit = 13;
+ cfg->highest_bank_bit = 13;
- if (adreno_is_a621(gpu)) {
- gpu->ubwc_config.highest_bank_bit = 13;
- gpu->ubwc_config.amsbc = 1;
- gpu->ubwc_config.uavflagprd_inv = 2;
- }
-
- if (adreno_is_a640_family(gpu))
- gpu->ubwc_config.amsbc = 1;
+ if (adreno_is_a621(gpu))
+ cfg->highest_bank_bit = 13;
- if (adreno_is_a680(gpu))
- gpu->ubwc_config.macrotile_mode = 1;
+ if (adreno_is_a623(gpu))
+ cfg->highest_bank_bit = 16;
if (adreno_is_a650(gpu) ||
adreno_is_a660(gpu) ||
adreno_is_a690(gpu) ||
adreno_is_a730(gpu) ||
adreno_is_a740_family(gpu)) {
- /* TODO: get ddr type from bootloader and use 2 for LPDDR4 */
- gpu->ubwc_config.highest_bank_bit = 16;
- gpu->ubwc_config.amsbc = 1;
- gpu->ubwc_config.rgb565_predicator = 1;
- gpu->ubwc_config.uavflagprd_inv = 2;
- gpu->ubwc_config.macrotile_mode = 1;
+ /* TODO: get ddr type from bootloader and use 15 for LPDDR4 */
+ cfg->highest_bank_bit = 16;
}
if (adreno_is_a663(gpu)) {
- gpu->ubwc_config.highest_bank_bit = 13;
- gpu->ubwc_config.amsbc = 1;
- gpu->ubwc_config.rgb565_predicator = 1;
- gpu->ubwc_config.uavflagprd_inv = 2;
- gpu->ubwc_config.macrotile_mode = 1;
- gpu->ubwc_config.ubwc_swizzle = 0x4;
+ cfg->highest_bank_bit = 13;
+ cfg->ubwc_swizzle = 0x4;
}
- if (adreno_is_7c3(gpu)) {
- gpu->ubwc_config.highest_bank_bit = 14;
- gpu->ubwc_config.amsbc = 1;
- gpu->ubwc_config.rgb565_predicator = 1;
- gpu->ubwc_config.uavflagprd_inv = 2;
- gpu->ubwc_config.macrotile_mode = 1;
- }
+ if (adreno_is_7c3(gpu))
+ cfg->highest_bank_bit = 14;
- if (adreno_is_a702(gpu)) {
- gpu->ubwc_config.highest_bank_bit = 14;
- gpu->ubwc_config.min_acc_len = 1;
- }
+ if (adreno_is_a702(gpu))
+ cfg->highest_bank_bit = 14;
+
+ if (cfg->highest_bank_bit != common_cfg->highest_bank_bit)
+ DRM_WARN_ONCE("Inconclusive highest_bank_bit value: %u (GPU) vs %u (UBWC_CFG)\n",
+ cfg->highest_bank_bit, common_cfg->highest_bank_bit);
+
+ if (cfg->ubwc_swizzle != common_cfg->ubwc_swizzle)
+ DRM_WARN_ONCE("Inconclusive ubwc_swizzle value: %u (GPU) vs %u (UBWC_CFG)\n",
+ cfg->ubwc_swizzle, common_cfg->ubwc_swizzle);
+
+ gpu->ubwc_config = &gpu->_ubwc_config;
+
+ return 0;
}
static void a6xx_set_ubwc_config(struct msm_gpu *gpu)
{
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+ const struct qcom_ubwc_cfg_data *cfg = adreno_gpu->ubwc_config;
/*
* We subtract 13 from the highest bank bit (13 is the minimum value
* allowed by hw) and write the lowest two bits of the remaining value
* as hbb_lo and the one above it as hbb_hi to the hardware.
*/
- BUG_ON(adreno_gpu->ubwc_config.highest_bank_bit < 13);
- u32 hbb = adreno_gpu->ubwc_config.highest_bank_bit - 13;
+ BUG_ON(cfg->highest_bank_bit < 13);
+ u32 hbb = cfg->highest_bank_bit - 13;
+ bool rgb565_predicator = cfg->ubwc_enc_version >= UBWC_4_0;
+ u32 level2_swizzling_dis = !(cfg->ubwc_swizzle & UBWC_SWIZZLE_ENABLE_LVL2);
+ bool ubwc_mode = qcom_ubwc_get_ubwc_mode(cfg);
+ bool amsbc = cfg->ubwc_enc_version >= UBWC_3_0;
+ bool min_acc_len_64b = false;
+ u8 uavflagprd_inv = 0;
u32 hbb_hi = hbb >> 2;
u32 hbb_lo = hbb & 3;
- u32 ubwc_mode = adreno_gpu->ubwc_config.ubwc_swizzle & 1;
- u32 level2_swizzling_dis = !(adreno_gpu->ubwc_config.ubwc_swizzle & 2);
+
+ if (adreno_is_a650_family(adreno_gpu) || adreno_is_a7xx(adreno_gpu))
+ uavflagprd_inv = 2;
+
+ if (adreno_is_a610(adreno_gpu) || adreno_is_a702(adreno_gpu))
+ min_acc_len_64b = true;
gpu_write(gpu, REG_A6XX_RB_NC_MODE_CNTL,
level2_swizzling_dis << 12 |
- adreno_gpu->ubwc_config.rgb565_predicator << 11 |
- hbb_hi << 10 | adreno_gpu->ubwc_config.amsbc << 4 |
- adreno_gpu->ubwc_config.min_acc_len << 3 |
+ rgb565_predicator << 11 |
+ hbb_hi << 10 | amsbc << 4 |
+ min_acc_len_64b << 3 |
hbb_lo << 1 | ubwc_mode);
gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL,
level2_swizzling_dis << 6 | hbb_hi << 4 |
- adreno_gpu->ubwc_config.min_acc_len << 3 |
+ min_acc_len_64b << 3 |
hbb_lo << 1 | ubwc_mode);
gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL,
level2_swizzling_dis << 12 | hbb_hi << 10 |
- adreno_gpu->ubwc_config.uavflagprd_inv << 4 |
- adreno_gpu->ubwc_config.min_acc_len << 3 |
+ uavflagprd_inv << 4 |
+ min_acc_len_64b << 3 |
hbb_lo << 1 | ubwc_mode);
if (adreno_is_a7xx(adreno_gpu))
@@ -696,10 +854,10 @@ static void a6xx_set_ubwc_config(struct msm_gpu *gpu)
FIELD_PREP(GENMASK(8, 5), hbb_lo));
gpu_write(gpu, REG_A6XX_UCHE_MODE_CNTL,
- adreno_gpu->ubwc_config.min_acc_len << 23 | hbb_lo << 21);
+ min_acc_len_64b << 23 | hbb_lo << 21);
gpu_write(gpu, REG_A6XX_RBBM_NC_MODE_CNTL,
- adreno_gpu->ubwc_config.macrotile_mode);
+ cfg->macrotile_mode);
}
static void a7xx_patch_pwrup_reglist(struct msm_gpu *gpu)
@@ -712,11 +870,10 @@ static void a7xx_patch_pwrup_reglist(struct msm_gpu *gpu)
u32 *dest = (u32 *)&lock->regs[0];
int i;
- reglist = adreno_gpu->info->a6xx->pwrup_reglist;
-
lock->gpu_req = lock->cpu_req = lock->turn = 0;
- lock->ifpc_list_len = 0;
- lock->preemption_list_len = reglist->count;
+
+ reglist = adreno_gpu->info->a6xx->ifpc_reglist;
+ lock->ifpc_list_len = reglist->count;
/*
* For each entry in each of the lists, write the offset and the current
@@ -727,6 +884,14 @@ static void a7xx_patch_pwrup_reglist(struct msm_gpu *gpu)
*dest++ = gpu_read(gpu, reglist->regs[i]);
}
+ reglist = adreno_gpu->info->a6xx->pwrup_reglist;
+ lock->preemption_list_len = reglist->count;
+
+ for (i = 0; i < reglist->count; i++) {
+ *dest++ = reglist->regs[i];
+ *dest++ = gpu_read(gpu, reglist->regs[i]);
+ }
+
/*
* The overall register list is composed of
* 1. Static IFPC-only registers
@@ -870,7 +1035,7 @@ static bool a6xx_ucode_check_version(struct a6xx_gpu *a6xx_gpu,
return false;
/* A7xx is safe! */
- if (adreno_is_a7xx(adreno_gpu) || adreno_is_a702(adreno_gpu))
+ if (adreno_is_a7xx(adreno_gpu) || adreno_is_a702(adreno_gpu) || adreno_is_a8xx(adreno_gpu))
return true;
/*
@@ -945,7 +1110,7 @@ static int a6xx_ucode_load(struct msm_gpu *gpu)
msm_gem_object_set_name(a6xx_gpu->sqe_bo, "sqefw");
if (!a6xx_ucode_check_version(a6xx_gpu, a6xx_gpu->sqe_bo)) {
- msm_gem_unpin_iova(a6xx_gpu->sqe_bo, gpu->aspace);
+ msm_gem_unpin_iova(a6xx_gpu->sqe_bo, gpu->vm);
drm_gem_object_put(a6xx_gpu->sqe_bo);
a6xx_gpu->sqe_bo = NULL;
@@ -953,6 +1118,23 @@ static int a6xx_ucode_load(struct msm_gpu *gpu)
}
}
+ if (!a6xx_gpu->aqe_bo && adreno_gpu->fw[ADRENO_FW_AQE]) {
+ a6xx_gpu->aqe_bo = adreno_fw_create_bo(gpu,
+ adreno_gpu->fw[ADRENO_FW_AQE], &a6xx_gpu->aqe_iova);
+
+ if (IS_ERR(a6xx_gpu->aqe_bo)) {
+ int ret = PTR_ERR(a6xx_gpu->aqe_bo);
+
+ a6xx_gpu->aqe_bo = NULL;
+ DRM_DEV_ERROR(&gpu->pdev->dev,
+ "Could not allocate AQE ucode: %d\n", ret);
+
+ return ret;
+ }
+
+ msm_gem_object_set_name(a6xx_gpu->aqe_bo, "aqefw");
+ }
+
/*
* Expanded APRIV and targets that support WHERE_AM_I both need a
* privileged buffer to store the RPTR shadow
@@ -962,7 +1144,7 @@ static int a6xx_ucode_load(struct msm_gpu *gpu)
a6xx_gpu->shadow = msm_gem_kernel_new(gpu->dev,
sizeof(u32) * gpu->nr_rings,
MSM_BO_WC | MSM_BO_MAP_PRIV,
- gpu->aspace, &a6xx_gpu->shadow_bo,
+ gpu->vm, &a6xx_gpu->shadow_bo,
&a6xx_gpu->shadow_iova);
if (IS_ERR(a6xx_gpu->shadow))
@@ -973,7 +1155,7 @@ static int a6xx_ucode_load(struct msm_gpu *gpu)
a6xx_gpu->pwrup_reglist_ptr = msm_gem_kernel_new(gpu->dev, PAGE_SIZE,
MSM_BO_WC | MSM_BO_MAP_PRIV,
- gpu->aspace, &a6xx_gpu->pwrup_reglist_bo,
+ gpu->vm, &a6xx_gpu->pwrup_reglist_bo,
&a6xx_gpu->pwrup_reglist_iova);
if (IS_ERR(a6xx_gpu->pwrup_reglist_ptr))
@@ -984,7 +1166,7 @@ static int a6xx_ucode_load(struct msm_gpu *gpu)
return 0;
}
-static int a6xx_zap_shader_init(struct msm_gpu *gpu)
+int a6xx_zap_shader_init(struct msm_gpu *gpu)
{
static bool loaded;
int ret;
@@ -1097,17 +1279,20 @@ static int hw_init(struct msm_gpu *gpu)
/* enable hardware clockgating */
a6xx_set_hwcg(gpu, true);
- /* VBIF/GBIF start*/
- if (adreno_is_a610_family(adreno_gpu) ||
- adreno_is_a640_family(adreno_gpu) ||
- adreno_is_a650_family(adreno_gpu) ||
- adreno_is_a7xx(adreno_gpu)) {
+ /* For gmuwrapper implementations, do the VBIF/GBIF CX configuration here */
+ if (adreno_is_a610_family(adreno_gpu)) {
gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE0, 0x00071620);
gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE1, 0x00071620);
gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE2, 0x00071620);
gpu_write(gpu, REG_A6XX_GBIF_QSB_SIDE3, 0x00071620);
- gpu_write(gpu, REG_A6XX_RBBM_GBIF_CLIENT_QOS_CNTL,
- adreno_is_a7xx(adreno_gpu) ? 0x2120212 : 0x3);
+ }
+
+ if (adreno_is_a610_family(adreno_gpu) ||
+ adreno_is_a640_family(adreno_gpu) ||
+ adreno_is_a650_family(adreno_gpu)) {
+ gpu_write(gpu, REG_A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x3);
+ } else if (adreno_is_a7xx(adreno_gpu)) {
+ gpu_write(gpu, REG_A6XX_RBBM_GBIF_CLIENT_QOS_CNTL, 0x2120212);
} else {
gpu_write(gpu, REG_A6XX_RBBM_VBIF_CLIENT_QOS_CNTL, 0x3);
}
@@ -1123,12 +1308,12 @@ static int hw_init(struct msm_gpu *gpu)
/* Disable L2 bypass in the UCHE */
if (adreno_is_a7xx(adreno_gpu)) {
- gpu_write64(gpu, REG_A6XX_UCHE_TRAP_BASE, 0x0001fffffffff000llu);
- gpu_write64(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE, 0x0001fffffffff000llu);
+ gpu_write64(gpu, REG_A6XX_UCHE_TRAP_BASE, adreno_gpu->uche_trap_base);
+ gpu_write64(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base);
} else {
- gpu_write64(gpu, REG_A6XX_UCHE_WRITE_RANGE_MAX, 0x0001ffffffffffc0llu);
- gpu_write64(gpu, REG_A6XX_UCHE_TRAP_BASE, 0x0001fffffffff000llu);
- gpu_write64(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE, 0x0001fffffffff000llu);
+ gpu_write64(gpu, REG_A6XX_UCHE_WRITE_RANGE_MAX, adreno_gpu->uche_trap_base + 0xfc0);
+ gpu_write64(gpu, REG_A6XX_UCHE_TRAP_BASE, adreno_gpu->uche_trap_base);
+ gpu_write64(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base);
}
if (!(adreno_is_a650_family(adreno_gpu) ||
@@ -1162,10 +1347,10 @@ static int hw_init(struct msm_gpu *gpu)
}
if (adreno_is_a660_family(adreno_gpu))
- gpu_write(gpu, REG_A6XX_CP_LPAC_PROG_FIFO_SIZE, 0x00000020);
+ gpu_write(gpu, REG_A7XX_CP_LPAC_PROG_FIFO_SIZE, 0x00000020);
/* Setting the mem pool size */
- if (adreno_is_a610(adreno_gpu)) {
+ if (adreno_is_a610(adreno_gpu) || adreno_is_a612(adreno_gpu)) {
gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 48);
gpu_write(gpu, REG_A6XX_CP_MEM_POOL_DBG_ADDR, 47);
} else if (adreno_is_a702(adreno_gpu)) {
@@ -1198,7 +1383,8 @@ static int hw_init(struct msm_gpu *gpu)
a6xx_set_ubwc_config(gpu);
/* Enable fault detection */
- if (adreno_is_a730(adreno_gpu) ||
+ if (adreno_is_a612(adreno_gpu) ||
+ adreno_is_a730(adreno_gpu) ||
adreno_is_a740_family(adreno_gpu))
gpu_write(gpu, REG_A6XX_RBBM_INTERFACE_HANG_INT_CNTL, (1 << 30) | 0xcfffff);
else if (adreno_is_a690(adreno_gpu))
@@ -1214,14 +1400,14 @@ static int hw_init(struct msm_gpu *gpu)
/* Set weights for bicubic filtering */
if (adreno_is_a650_family(adreno_gpu) || adreno_is_x185(adreno_gpu)) {
- gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_0, 0);
- gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_1,
+ gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(0), 0);
+ gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(1),
0x3fe05ff4);
- gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_2,
+ gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(2),
0x3fa0ebee);
- gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_3,
+ gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(3),
0x3f5193ed);
- gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_4,
+ gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE(4),
0x3f0243f0);
}
@@ -1417,25 +1603,25 @@ static void a6xx_recover(struct msm_gpu *gpu)
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
- int i, active_submits;
+ int active_submits;
adreno_dump_info(gpu);
- for (i = 0; i < 8; i++)
- DRM_DEV_INFO(&gpu->pdev->dev, "CP_SCRATCH_REG%d: %u\n", i,
- gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(i)));
+ if (a6xx_gmu_gx_is_on(&a6xx_gpu->gmu)) {
+ /* Sometimes crashstate capture is skipped, so SQE should be halted here again */
+ gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 3);
+
+ if (hang_debug)
+ a6xx_dump(gpu);
- if (hang_debug)
- a6xx_dump(gpu);
+ }
/*
* To handle recovery specific sequences during the rpm suspend we are
* about to trigger
*/
- a6xx_gpu->hung = true;
- /* Halt SQE first */
- gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 3);
+ a6xx_gpu->hung = true;
pm_runtime_dont_use_autosuspend(&gpu->pdev->dev);
@@ -1449,9 +1635,9 @@ static void a6xx_recover(struct msm_gpu *gpu)
*/
gpu->active_submits = 0;
- if (adreno_has_gmu_wrapper(adreno_gpu)) {
+ if (adreno_has_gmu_wrapper(adreno_gpu) || adreno_has_rgmu(adreno_gpu)) {
/* Drain the outstanding traffic on memory buses */
- a6xx_bus_clear_pending_transactions(adreno_gpu, true);
+ adreno_gpu->funcs->bus_halt(adreno_gpu, true);
/* Reset the GPU to a clean state */
a6xx_gpu_sw_reset(gpu, true);
@@ -1610,10 +1796,10 @@ static int a6xx_fault_handler(void *arg, unsigned long iova, int flags, void *da
const char *block = "unknown";
u32 scratch[] = {
- gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(4)),
- gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(5)),
- gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(6)),
- gpu_read(gpu, REG_A6XX_CP_SCRATCH_REG(7)),
+ gpu_read(gpu, REG_A6XX_CP_SCRATCH(4)),
+ gpu_read(gpu, REG_A6XX_CP_SCRATCH(5)),
+ gpu_read(gpu, REG_A6XX_CP_SCRATCH(6)),
+ gpu_read(gpu, REG_A6XX_CP_SCRATCH(7)),
};
if (info)
@@ -1666,8 +1852,6 @@ static void a6xx_cp_hw_err_irq(struct msm_gpu *gpu)
static void a6xx_fault_detect_irq(struct msm_gpu *gpu)
{
- struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
- struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
/*
@@ -1679,13 +1863,6 @@ static void a6xx_fault_detect_irq(struct msm_gpu *gpu)
if (gpu_read(gpu, REG_A6XX_RBBM_STATUS3) & A6XX_RBBM_STATUS3_SMMU_STALLED_ON_FAULT)
return;
- /*
- * Force the GPU to stay on until after we finish
- * collecting information
- */
- if (!adreno_has_gmu_wrapper(adreno_gpu))
- gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, 1);
-
DRM_DEV_ERROR(&gpu->pdev->dev,
"gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
ring ? ring->id : -1, ring ? ring->fctx->last_fence : 0,
@@ -1698,7 +1875,10 @@ static void a6xx_fault_detect_irq(struct msm_gpu *gpu)
gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE));
/* Turn off the hangcheck timer to keep it from bothering us */
- del_timer(&gpu->hangcheck_timer);
+ timer_delete(&gpu->hangcheck_timer);
+
+ /* Turn off interrupts to avoid triggering recovery again */
+ gpu_write(gpu, REG_A6XX_RBBM_INT_0_MASK, 0);
kthread_queue_work(gpu->worker, &gpu->recover_work);
}
@@ -1718,15 +1898,55 @@ static void a7xx_sw_fuse_violation_irq(struct msm_gpu *gpu)
*/
if (status & (A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING |
A7XX_CX_MISC_SW_FUSE_VALUE_LPAC)) {
- del_timer(&gpu->hangcheck_timer);
+ timer_delete(&gpu->hangcheck_timer);
kthread_queue_work(gpu->worker, &gpu->recover_work);
}
}
+static void a6xx_gpu_keepalive_vote(struct msm_gpu *gpu, bool on)
+{
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+ struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
+
+ if (adreno_has_gmu_wrapper(adreno_gpu))
+ return;
+
+ gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, on);
+}
+
+static int irq_poll_fence(struct msm_gpu *gpu)
+{
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+ struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
+ struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
+ u32 status;
+
+ if (adreno_has_gmu_wrapper(adreno_gpu))
+ return 0;
+
+ if (gmu_poll_timeout_atomic(gmu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, status, !status, 1, 100)) {
+ u32 rbbm_unmasked = gmu_read(gmu, REG_A6XX_GMU_RBBM_INT_UNMASKED_STATUS);
+
+ dev_err_ratelimited(&gpu->pdev->dev,
+ "irq fence poll timeout, fence_ctrl=0x%x, unmasked_status=0x%x\n",
+ status, rbbm_unmasked);
+ return -ETIMEDOUT;
+ }
+
+ return 0;
+}
+
static irqreturn_t a6xx_irq(struct msm_gpu *gpu)
{
struct msm_drm_private *priv = gpu->dev->dev_private;
+
+ /* Set keepalive vote to avoid power collapse after RBBM_INT_0_STATUS is read */
+ a6xx_gpu_keepalive_vote(gpu, true);
+
+ if (irq_poll_fence(gpu))
+ goto done;
+
u32 status = gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS);
gpu_write(gpu, REG_A6XX_RBBM_INT_CLEAR_CMD, status);
@@ -1763,6 +1983,9 @@ static irqreturn_t a6xx_irq(struct msm_gpu *gpu)
if (status & A6XX_RBBM_INT_0_MASK_CP_SW)
a6xx_preempt_irq(gpu);
+done:
+ a6xx_gpu_keepalive_vote(gpu, false);
+
return IRQ_HANDLED;
}
@@ -1908,7 +2131,7 @@ static int a7xx_cx_mem_init(struct a6xx_gpu *a6xx_gpu)
u32 fuse_val;
int ret;
- if (adreno_is_a750(adreno_gpu)) {
+ if (adreno_is_a750(adreno_gpu) || adreno_is_a8xx(adreno_gpu)) {
/*
* Assume that if qcom scm isn't available, that whatever
* replacement allows writing the fuse register ourselves.
@@ -1934,9 +2157,9 @@ static int a7xx_cx_mem_init(struct a6xx_gpu *a6xx_gpu)
return ret;
/*
- * On a750 raytracing may be disabled by the firmware, find out
- * whether that's the case. The scm call above sets the fuse
- * register.
+ * On A7XX_GEN3 and newer, raytracing may be disabled by the
+ * firmware, find out whether that's the case. The scm call
+ * above sets the fuse register.
*/
fuse_val = a6xx_llc_read(a6xx_gpu,
REG_A7XX_CX_MISC_SW_FUSE_VALUE);
@@ -1997,7 +2220,7 @@ void a6xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu, bool gx_
void a6xx_gpu_sw_reset(struct msm_gpu *gpu, bool assert)
{
/* 11nm chips (e.g. ones with A610) have hw issues with the reset line! */
- if (adreno_is_a610(to_adreno_gpu(gpu)))
+ if (adreno_is_a610(to_adreno_gpu(gpu)) || adreno_is_a8xx(to_adreno_gpu(gpu)))
return;
gpu_write(gpu, REG_A6XX_RBBM_SW_RESET_CMD, assert);
@@ -2028,7 +2251,12 @@ static int a6xx_gmu_pm_resume(struct msm_gpu *gpu)
msm_devfreq_resume(gpu);
- adreno_is_a7xx(adreno_gpu) ? a7xx_llc_activate(a6xx_gpu) : a6xx_llc_activate(a6xx_gpu);
+ if (adreno_is_a8xx(adreno_gpu))
+ a8xx_llc_activate(a6xx_gpu);
+ else if (adreno_is_a7xx(adreno_gpu))
+ a7xx_llc_activate(a6xx_gpu);
+ else
+ a6xx_llc_activate(a6xx_gpu);
return ret;
}
@@ -2065,6 +2293,12 @@ static int a6xx_pm_resume(struct msm_gpu *gpu)
if (ret)
goto err_bulk_clk;
+ ret = clk_bulk_prepare_enable(gmu->nr_clocks, gmu->clocks);
+ if (ret) {
+ clk_bulk_disable_unprepare(gpu->nr_clocks, gpu->grp_clks);
+ goto err_bulk_clk;
+ }
+
if (adreno_is_a619_holi(adreno_gpu))
a6xx_sptprac_enable(gmu);
@@ -2078,8 +2312,10 @@ err_bulk_clk:
err_set_opp:
mutex_unlock(&a6xx_gpu->gmu.lock);
- if (!ret)
+ if (!ret) {
msm_devfreq_resume(gpu);
+ a6xx_llc_activate(a6xx_gpu);
+ }
return ret;
}
@@ -2120,17 +2356,20 @@ static int a6xx_pm_suspend(struct msm_gpu *gpu)
trace_msm_gpu_suspend(0);
+ a6xx_llc_deactivate(a6xx_gpu);
+
msm_devfreq_suspend(gpu);
mutex_lock(&a6xx_gpu->gmu.lock);
/* Drain the outstanding traffic on memory buses */
- a6xx_bus_clear_pending_transactions(adreno_gpu, true);
+ adreno_gpu->funcs->bus_halt(adreno_gpu, true);
if (adreno_is_a619_holi(adreno_gpu))
a6xx_sptprac_disable(gmu);
clk_bulk_disable_unprepare(gpu->nr_clocks, gpu->grp_clks);
+ clk_bulk_disable_unprepare(gmu->nr_clocks, gmu->clocks);
pm_runtime_put_sync(gmu->gxpd);
dev_pm_opp_set_opp(&gpu->pdev->dev, NULL);
@@ -2152,16 +2391,7 @@ static int a6xx_gmu_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
- mutex_lock(&a6xx_gpu->gmu.lock);
-
- /* Force the GPU power on so we can read this register */
- a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET);
-
- *value = gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER);
-
- a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET);
-
- mutex_unlock(&a6xx_gpu->gmu.lock);
+ *value = read_gmu_ao_counter(a6xx_gpu);
return 0;
}
@@ -2186,12 +2416,17 @@ static void a6xx_destroy(struct msm_gpu *gpu)
struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
if (a6xx_gpu->sqe_bo) {
- msm_gem_unpin_iova(a6xx_gpu->sqe_bo, gpu->aspace);
+ msm_gem_unpin_iova(a6xx_gpu->sqe_bo, gpu->vm);
drm_gem_object_put(a6xx_gpu->sqe_bo);
}
+ if (a6xx_gpu->aqe_bo) {
+ msm_gem_unpin_iova(a6xx_gpu->aqe_bo, gpu->vm);
+ drm_gem_object_put(a6xx_gpu->aqe_bo);
+ }
+
if (a6xx_gpu->shadow_bo) {
- msm_gem_unpin_iova(a6xx_gpu->shadow_bo, gpu->aspace);
+ msm_gem_unpin_iova(a6xx_gpu->shadow_bo, gpu->vm);
drm_gem_object_put(a6xx_gpu->shadow_bo);
}
@@ -2231,8 +2466,8 @@ static void a6xx_gpu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp,
mutex_unlock(&a6xx_gpu->gmu.lock);
}
-static struct msm_gem_address_space *
-a6xx_create_address_space(struct msm_gpu *gpu, struct platform_device *pdev)
+static struct drm_gpuvm *
+a6xx_create_vm(struct msm_gpu *gpu, struct platform_device *pdev)
{
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
@@ -2246,22 +2481,21 @@ a6xx_create_address_space(struct msm_gpu *gpu, struct platform_device *pdev)
!device_iommu_capable(&pdev->dev, IOMMU_CAP_CACHE_COHERENCY))
quirks |= IO_PGTABLE_QUIRK_ARM_OUTER_WBWA;
- return adreno_iommu_create_address_space(gpu, pdev, quirks);
+ return adreno_iommu_create_vm(gpu, pdev, quirks);
}
-static struct msm_gem_address_space *
-a6xx_create_private_address_space(struct msm_gpu *gpu)
+static struct drm_gpuvm *
+a6xx_create_private_vm(struct msm_gpu *gpu, bool kernel_managed)
{
struct msm_mmu *mmu;
- mmu = msm_iommu_pagetable_create(gpu->aspace->mmu);
+ mmu = msm_iommu_pagetable_create(to_msm_vm(gpu->vm)->mmu, kernel_managed);
if (IS_ERR(mmu))
return ERR_CAST(mmu);
- return msm_gem_address_space_create(mmu,
- "gpu", 0x100000000ULL,
- adreno_private_address_space_size(gpu));
+ return msm_gem_vm_create(gpu->dev, mmu, "gpu", ADRENO_VM_START,
+ adreno_private_vm_size(gpu), kernel_managed);
}
static uint32_t a6xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
@@ -2272,18 +2506,36 @@ static uint32_t a6xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
if (adreno_gpu->base.hw_apriv || a6xx_gpu->has_whereami)
return a6xx_gpu->shadow[ring->id];
+ /*
+ * This is true only on an A6XX_GEN1 with GMU, has IFPC enabled and a super old SQE firmware
+ * without 'whereami' support
+ */
+ WARN_ONCE((to_adreno_gpu(gpu)->info->quirks & ADRENO_QUIRK_IFPC),
+ "Can't read CP_RB_RPTR register reliably\n");
+
return ring->memptrs->rptr = gpu_read(gpu, REG_A6XX_CP_RB_RPTR);
}
static bool a6xx_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
{
- struct msm_cp_state cp_state = {
+ struct msm_cp_state cp_state;
+ bool progress;
+
+ /*
+ * With IFPC, KMD doesn't know whether GX power domain is collapsed
+ * or not. So, we can't blindly read the below registers in GX domain.
+ * Lets trust the hang detection in HW and lie to the caller that
+ * there was progress.
+ */
+ if (to_adreno_gpu(gpu)->info->quirks & ADRENO_QUIRK_IFPC)
+ return true;
+
+ cp_state = (struct msm_cp_state) {
.ib1_base = gpu_read64(gpu, REG_A6XX_CP_IB1_BASE),
.ib2_base = gpu_read64(gpu, REG_A6XX_CP_IB2_BASE),
.ib1_rem = gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE),
.ib2_rem = gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE),
};
- bool progress;
/*
* Adjust the remaining data to account for what has already been
@@ -2355,7 +2607,105 @@ static int a6xx_set_supported_hw(struct device *dev, const struct adreno_info *i
return 0;
}
-static const struct adreno_gpu_funcs funcs = {
+static struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
+{
+ struct msm_drm_private *priv = dev->dev_private;
+ struct platform_device *pdev = priv->gpu_pdev;
+ struct adreno_platform_config *config = pdev->dev.platform_data;
+ struct device_node *node;
+ struct a6xx_gpu *a6xx_gpu;
+ struct adreno_gpu *adreno_gpu;
+ struct msm_gpu *gpu;
+ extern int enable_preemption;
+ bool is_a7xx;
+ int ret, nr_rings = 1;
+
+ a6xx_gpu = kzalloc(sizeof(*a6xx_gpu), GFP_KERNEL);
+ if (!a6xx_gpu)
+ return ERR_PTR(-ENOMEM);
+
+ adreno_gpu = &a6xx_gpu->base;
+ gpu = &adreno_gpu->base;
+
+ mutex_init(&a6xx_gpu->gmu.lock);
+
+ adreno_gpu->registers = NULL;
+
+ /* Check if there is a GMU phandle and set it up */
+ node = of_parse_phandle(pdev->dev.of_node, "qcom,gmu", 0);
+ /* FIXME: How do we gracefully handle this? */
+ BUG_ON(!node);
+
+ adreno_gpu->gmu_is_wrapper = of_device_is_compatible(node, "qcom,adreno-gmu-wrapper");
+
+ adreno_gpu->base.hw_apriv =
+ !!(config->info->quirks & ADRENO_QUIRK_HAS_HW_APRIV);
+
+ /* gpu->info only gets assigned in adreno_gpu_init(). A8x is included intentionally */
+ is_a7xx = config->info->family >= ADRENO_7XX_GEN1;
+
+ a6xx_llc_slices_init(pdev, a6xx_gpu, is_a7xx);
+
+ ret = a6xx_set_supported_hw(&pdev->dev, config->info);
+ if (ret) {
+ a6xx_llc_slices_destroy(a6xx_gpu);
+ kfree(a6xx_gpu);
+ return ERR_PTR(ret);
+ }
+
+ if ((enable_preemption == 1) || (enable_preemption == -1 &&
+ (config->info->quirks & ADRENO_QUIRK_PREEMPTION)))
+ nr_rings = 4;
+
+ ret = adreno_gpu_init(dev, pdev, adreno_gpu, config->info->funcs, nr_rings);
+ if (ret) {
+ a6xx_destroy(&(a6xx_gpu->base.base));
+ return ERR_PTR(ret);
+ }
+
+ /*
+ * For now only clamp to idle freq for devices where this is known not
+ * to cause power supply issues:
+ */
+ if (adreno_is_a618(adreno_gpu) || adreno_is_7c3(adreno_gpu))
+ priv->gpu_clamp_to_idle = true;
+
+ if (adreno_has_gmu_wrapper(adreno_gpu) || adreno_has_rgmu(adreno_gpu))
+ ret = a6xx_gmu_wrapper_init(a6xx_gpu, node);
+ else
+ ret = a6xx_gmu_init(a6xx_gpu, node);
+ of_node_put(node);
+ if (ret) {
+ a6xx_destroy(&(a6xx_gpu->base.base));
+ return ERR_PTR(ret);
+ }
+
+ if (adreno_is_a7xx(adreno_gpu) || adreno_is_a8xx(adreno_gpu)) {
+ ret = a7xx_cx_mem_init(a6xx_gpu);
+ if (ret) {
+ a6xx_destroy(&(a6xx_gpu->base.base));
+ return ERR_PTR(ret);
+ }
+ }
+
+ adreno_gpu->uche_trap_base = 0x1fffffffff000ull;
+
+ msm_mmu_set_fault_handler(to_msm_vm(gpu->vm)->mmu, gpu,
+ adreno_gpu->funcs->mmu_fault_handler);
+
+ ret = a6xx_calc_ubwc_config(adreno_gpu);
+ if (ret) {
+ a6xx_destroy(&(a6xx_gpu->base.base));
+ return ERR_PTR(ret);
+ }
+
+ /* Set up the preemption specific bits and pieces for each ringbuffer */
+ a6xx_preempt_init(gpu);
+
+ return gpu;
+}
+
+const struct adreno_gpu_funcs a6xx_gpu_funcs = {
.base = {
.get_param = adreno_get_param,
.set_param = adreno_set_param,
@@ -2378,15 +2728,18 @@ static const struct adreno_gpu_funcs funcs = {
.gpu_state_get = a6xx_gpu_state_get,
.gpu_state_put = a6xx_gpu_state_put,
#endif
- .create_address_space = a6xx_create_address_space,
- .create_private_address_space = a6xx_create_private_address_space,
+ .create_vm = a6xx_create_vm,
+ .create_private_vm = a6xx_create_private_vm,
.get_rptr = a6xx_get_rptr,
.progress = a6xx_progress,
},
+ .init = a6xx_gpu_init,
.get_timestamp = a6xx_gmu_get_timestamp,
+ .bus_halt = a6xx_bus_clear_pending_transactions,
+ .mmu_fault_handler = a6xx_fault_handler,
};
-static const struct adreno_gpu_funcs funcs_gmuwrapper = {
+const struct adreno_gpu_funcs a6xx_gmuwrapper_funcs = {
.base = {
.get_param = adreno_get_param,
.set_param = adreno_set_param,
@@ -2407,15 +2760,18 @@ static const struct adreno_gpu_funcs funcs_gmuwrapper = {
.gpu_state_get = a6xx_gpu_state_get,
.gpu_state_put = a6xx_gpu_state_put,
#endif
- .create_address_space = a6xx_create_address_space,
- .create_private_address_space = a6xx_create_private_address_space,
+ .create_vm = a6xx_create_vm,
+ .create_private_vm = a6xx_create_private_vm,
.get_rptr = a6xx_get_rptr,
.progress = a6xx_progress,
},
+ .init = a6xx_gpu_init,
.get_timestamp = a6xx_get_timestamp,
+ .bus_halt = a6xx_bus_clear_pending_transactions,
+ .mmu_fault_handler = a6xx_fault_handler,
};
-static const struct adreno_gpu_funcs funcs_a7xx = {
+const struct adreno_gpu_funcs a7xx_gpu_funcs = {
.base = {
.get_param = adreno_get_param,
.set_param = adreno_set_param,
@@ -2438,108 +2794,40 @@ static const struct adreno_gpu_funcs funcs_a7xx = {
.gpu_state_get = a6xx_gpu_state_get,
.gpu_state_put = a6xx_gpu_state_put,
#endif
- .create_address_space = a6xx_create_address_space,
- .create_private_address_space = a6xx_create_private_address_space,
+ .create_vm = a6xx_create_vm,
+ .create_private_vm = a6xx_create_private_vm,
.get_rptr = a6xx_get_rptr,
.progress = a6xx_progress,
},
+ .init = a6xx_gpu_init,
.get_timestamp = a6xx_gmu_get_timestamp,
+ .bus_halt = a6xx_bus_clear_pending_transactions,
+ .mmu_fault_handler = a6xx_fault_handler,
};
-struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
-{
- struct msm_drm_private *priv = dev->dev_private;
- struct platform_device *pdev = priv->gpu_pdev;
- struct adreno_platform_config *config = pdev->dev.platform_data;
- struct device_node *node;
- struct a6xx_gpu *a6xx_gpu;
- struct adreno_gpu *adreno_gpu;
- struct msm_gpu *gpu;
- extern int enable_preemption;
- bool is_a7xx;
- int ret;
-
- a6xx_gpu = kzalloc(sizeof(*a6xx_gpu), GFP_KERNEL);
- if (!a6xx_gpu)
- return ERR_PTR(-ENOMEM);
-
- adreno_gpu = &a6xx_gpu->base;
- gpu = &adreno_gpu->base;
-
- mutex_init(&a6xx_gpu->gmu.lock);
-
- adreno_gpu->registers = NULL;
-
- /* Check if there is a GMU phandle and set it up */
- node = of_parse_phandle(pdev->dev.of_node, "qcom,gmu", 0);
- /* FIXME: How do we gracefully handle this? */
- BUG_ON(!node);
-
- adreno_gpu->gmu_is_wrapper = of_device_is_compatible(node, "qcom,adreno-gmu-wrapper");
-
- adreno_gpu->base.hw_apriv =
- !!(config->info->quirks & ADRENO_QUIRK_HAS_HW_APRIV);
-
- /* gpu->info only gets assigned in adreno_gpu_init() */
- is_a7xx = config->info->family == ADRENO_7XX_GEN1 ||
- config->info->family == ADRENO_7XX_GEN2 ||
- config->info->family == ADRENO_7XX_GEN3;
-
- a6xx_llc_slices_init(pdev, a6xx_gpu, is_a7xx);
-
- ret = a6xx_set_supported_hw(&pdev->dev, config->info);
- if (ret) {
- a6xx_llc_slices_destroy(a6xx_gpu);
- kfree(a6xx_gpu);
- return ERR_PTR(ret);
- }
-
- if ((enable_preemption == 1) || (enable_preemption == -1 &&
- (config->info->quirks & ADRENO_QUIRK_PREEMPTION)))
- ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs_a7xx, 4);
- else if (is_a7xx)
- ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs_a7xx, 1);
- else if (adreno_has_gmu_wrapper(adreno_gpu))
- ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs_gmuwrapper, 1);
- else
- ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
- if (ret) {
- a6xx_destroy(&(a6xx_gpu->base.base));
- return ERR_PTR(ret);
- }
-
- /*
- * For now only clamp to idle freq for devices where this is known not
- * to cause power supply issues:
- */
- if (adreno_is_a618(adreno_gpu) || adreno_is_7c3(adreno_gpu))
- priv->gpu_clamp_to_idle = true;
-
- if (adreno_has_gmu_wrapper(adreno_gpu))
- ret = a6xx_gmu_wrapper_init(a6xx_gpu, node);
- else
- ret = a6xx_gmu_init(a6xx_gpu, node);
- of_node_put(node);
- if (ret) {
- a6xx_destroy(&(a6xx_gpu->base.base));
- return ERR_PTR(ret);
- }
-
- if (adreno_is_a7xx(adreno_gpu)) {
- ret = a7xx_cx_mem_init(a6xx_gpu);
- if (ret) {
- a6xx_destroy(&(a6xx_gpu->base.base));
- return ERR_PTR(ret);
- }
- }
-
- if (gpu->aspace)
- msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu,
- a6xx_fault_handler);
-
- a6xx_calc_ubwc_config(adreno_gpu);
- /* Set up the preemption specific bits and pieces for each ringbuffer */
- a6xx_preempt_init(gpu);
-
- return gpu;
-}
+const struct adreno_gpu_funcs a8xx_gpu_funcs = {
+ .base = {
+ .get_param = adreno_get_param,
+ .set_param = adreno_set_param,
+ .hw_init = a8xx_hw_init,
+ .ucode_load = a6xx_ucode_load,
+ .pm_suspend = a6xx_gmu_pm_suspend,
+ .pm_resume = a6xx_gmu_pm_resume,
+ .recover = a8xx_recover,
+ .submit = a7xx_submit,
+ .active_ring = a6xx_active_ring,
+ .irq = a8xx_irq,
+ .destroy = a6xx_destroy,
+ .gpu_busy = a8xx_gpu_busy,
+ .gpu_get_freq = a6xx_gmu_get_freq,
+ .gpu_set_freq = a6xx_gpu_set_freq,
+ .create_vm = a6xx_create_vm,
+ .create_private_vm = a6xx_create_private_vm,
+ .get_rptr = a6xx_get_rptr,
+ .progress = a8xx_progress,
+ },
+ .init = a6xx_gpu_init,
+ .get_timestamp = a8xx_gmu_get_timestamp,
+ .bus_halt = a8xx_bus_clear_pending_transactions,
+ .mmu_fault_handler = a8xx_fault_handler,
+};