diff options
Diffstat (limited to 'drivers/gpu/drm/msm/adreno/a6xx_gpu.c')
-rw-r--r-- | drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 1400 |
1 files changed, 434 insertions, 966 deletions
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index cf0b1de1c071..491fde0083a2 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -10,6 +10,7 @@ #include <linux/bitfield.h> #include <linux/devfreq.h> +#include <linux/firmware/qcom/qcom_scm.h> #include <linux/pm_domain.h> #include <linux/soc/qcom/llcc-qcom.h> @@ -67,6 +68,8 @@ static void update_shadow_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) { + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); uint32_t wptr; unsigned long flags; @@ -80,12 +83,17 @@ static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) /* Make sure to wrap wptr if we need to */ wptr = get_wptr(ring); - spin_unlock_irqrestore(&ring->preempt_lock, flags); - - /* Make sure everything is posted before making a decision */ - mb(); + /* Update HW if this is the current ring and we are not in preempt*/ + if (!a6xx_in_preempt(a6xx_gpu)) { + if (a6xx_gpu->cur_ring == ring) + gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr); + else + ring->restore_wptr = true; + } else { + ring->restore_wptr = true; + } - gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr); + spin_unlock_irqrestore(&ring->preempt_lock, flags); } static void get_stats_counter(struct msm_ringbuffer *ring, u32 counter, @@ -100,20 +108,44 @@ static void get_stats_counter(struct msm_ringbuffer *ring, u32 counter, } static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu, - struct msm_ringbuffer *ring, struct msm_file_private *ctx) + struct msm_ringbuffer *ring, struct msm_gem_submit *submit) { bool sysprof = refcount_read(&a6xx_gpu->base.base.sysprof_active) > 1; + struct msm_file_private *ctx = submit->queue->ctx; struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; phys_addr_t ttbr; u32 asid; u64 memptr = rbmemptr(ring, ttbr0); - if (ctx->seqno == a6xx_gpu->base.base.cur_ctx_seqno) + if (ctx->seqno == ring->cur_ctx_seqno) return; if (msm_iommu_pagetable_params(ctx->aspace->mmu, &ttbr, &asid)) return; + if (adreno_gpu->info->family >= ADRENO_7XX_GEN1) { + /* Wait for previous submit to complete before continuing: */ + OUT_PKT7(ring, CP_WAIT_TIMESTAMP, 4); + OUT_RING(ring, 0); + OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence))); + OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence))); + OUT_RING(ring, submit->seqno - 1); + + OUT_PKT7(ring, CP_THREAD_CONTROL, 1); + OUT_RING(ring, CP_SET_THREAD_BOTH); + + /* Reset state used to synchronize BR and BV */ + OUT_PKT7(ring, CP_RESET_CONTEXT_STATE, 1); + OUT_RING(ring, + CP_RESET_CONTEXT_STATE_0_CLEAR_ON_CHIP_TS | + CP_RESET_CONTEXT_STATE_0_CLEAR_RESOURCE_TABLE | + CP_RESET_CONTEXT_STATE_0_CLEAR_BV_BR_COUNTER | + CP_RESET_CONTEXT_STATE_0_RESET_GLOBAL_LOCAL_TS); + + OUT_PKT7(ring, CP_THREAD_CONTROL, 1); + OUT_RING(ring, CP_SET_THREAD_BR); + } + if (!sysprof) { if (!adreno_is_a7xx(adreno_gpu)) { /* Turn off protected mode to write to special registers */ @@ -137,12 +169,14 @@ static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu, /* * Write the new TTBR0 to the memstore. This is good for debugging. + * Needed for preemption */ - OUT_PKT7(ring, CP_MEM_WRITE, 4); + OUT_PKT7(ring, CP_MEM_WRITE, 5); OUT_RING(ring, CP_MEM_WRITE_0_ADDR_LO(lower_32_bits(memptr))); OUT_RING(ring, CP_MEM_WRITE_1_ADDR_HI(upper_32_bits(memptr))); OUT_RING(ring, lower_32_bits(ttbr)); - OUT_RING(ring, (asid << 16) | upper_32_bits(ttbr)); + OUT_RING(ring, upper_32_bits(ttbr)); + OUT_RING(ring, ctx->seqno); /* * Sync both threads after switching pagetables and enable BR only @@ -192,7 +226,9 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) struct msm_ringbuffer *ring = submit->ring; unsigned int i, ibs = 0; - a6xx_set_pagetable(a6xx_gpu, ring, submit->queue->ctx); + adreno_check_and_reenable_stall(adreno_gpu); + + a6xx_set_pagetable(a6xx_gpu, ring, submit); get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP(0), rbmemptr_stats(ring, index, cpcycles_start)); @@ -218,14 +254,14 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) case MSM_SUBMIT_CMD_IB_TARGET_BUF: break; case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: - if (gpu->cur_ctx_seqno == submit->queue->ctx->seqno) + if (ring->cur_ctx_seqno == submit->queue->ctx->seqno) break; fallthrough; case MSM_SUBMIT_CMD_BUF: - OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3); + OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3); OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); OUT_RING(ring, upper_32_bits(submit->cmd[i].iova)); - OUT_RING(ring, submit->cmd[i].size); + OUT_RING(ring, A5XX_CP_INDIRECT_BUFFER_2_IB_SIZE(submit->cmd[i].size)); ibs++; break; } @@ -267,6 +303,46 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) a6xx_flush(gpu, ring); } +static void a6xx_emit_set_pseudo_reg(struct msm_ringbuffer *ring, + struct a6xx_gpu *a6xx_gpu, struct msm_gpu_submitqueue *queue) +{ + u64 preempt_postamble; + + OUT_PKT7(ring, CP_SET_PSEUDO_REG, 12); + + OUT_RING(ring, SMMU_INFO); + /* don't save SMMU, we write the record from the kernel instead */ + OUT_RING(ring, 0); + OUT_RING(ring, 0); + + /* privileged and non secure buffer save */ + OUT_RING(ring, NON_SECURE_SAVE_ADDR); + OUT_RING(ring, lower_32_bits( + a6xx_gpu->preempt_iova[ring->id])); + OUT_RING(ring, upper_32_bits( + a6xx_gpu->preempt_iova[ring->id])); + + /* user context buffer save, seems to be unnused by fw */ + OUT_RING(ring, NON_PRIV_SAVE_ADDR); + OUT_RING(ring, 0); + OUT_RING(ring, 0); + + OUT_RING(ring, COUNTER); + /* seems OK to set to 0 to disable it */ + OUT_RING(ring, 0); + OUT_RING(ring, 0); + + /* Emit postamble to clear perfcounters */ + preempt_postamble = a6xx_gpu->preempt_postamble_iova; + + OUT_PKT7(ring, CP_SET_AMBLE, 3); + OUT_RING(ring, lower_32_bits(preempt_postamble)); + OUT_RING(ring, upper_32_bits(preempt_postamble)); + OUT_RING(ring, CP_SET_AMBLE_2_DWORDS( + a6xx_gpu->preempt_postamble_len) | + CP_SET_AMBLE_2_TYPE(KMD_AMBLE_TYPE)); +} + static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) { unsigned int index = submit->seqno % MSM_GPU_SUBMIT_STATS_COUNT; @@ -275,6 +351,8 @@ static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) struct msm_ringbuffer *ring = submit->ring; unsigned int i, ibs = 0; + adreno_check_and_reenable_stall(adreno_gpu); + /* * Toggle concurrent binning for pagetable switch and set the thread to * BR since only it can execute the pagetable switch packets. @@ -282,9 +360,16 @@ static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) OUT_PKT7(ring, CP_THREAD_CONTROL, 1); OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | CP_SET_THREAD_BR); - a6xx_set_pagetable(a6xx_gpu, ring, submit->queue->ctx); + a6xx_set_pagetable(a6xx_gpu, ring, submit); - get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP(0), + /* + * If preemption is enabled, then set the pseudo register for the save + * sequence + */ + if (gpu->nr_rings > 1) + a6xx_emit_set_pseudo_reg(ring, a6xx_gpu, submit->queue); + + get_stats_counter(ring, REG_A7XX_RBBM_PERFCTR_CP(0), rbmemptr_stats(ring, index, cpcycles_start)); get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER, rbmemptr_stats(ring, index, alwayson_start)); @@ -295,8 +380,10 @@ static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) OUT_PKT7(ring, CP_SET_MARKER, 1); OUT_RING(ring, 0x101); /* IFPC disable */ - OUT_PKT7(ring, CP_SET_MARKER, 1); - OUT_RING(ring, 0x00d); /* IB1LIST start */ + if (submit->queue->flags & MSM_SUBMITQUEUE_ALLOW_PREEMPT) { + OUT_PKT7(ring, CP_SET_MARKER, 1); + OUT_RING(ring, 0x00d); /* IB1LIST start */ + } /* Submit the commands */ for (i = 0; i < submit->nr_cmds; i++) { @@ -304,14 +391,14 @@ static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) case MSM_SUBMIT_CMD_IB_TARGET_BUF: break; case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: - if (gpu->cur_ctx_seqno == submit->queue->ctx->seqno) + if (ring->cur_ctx_seqno == submit->queue->ctx->seqno) break; fallthrough; case MSM_SUBMIT_CMD_BUF: - OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3); + OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3); OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); OUT_RING(ring, upper_32_bits(submit->cmd[i].iova)); - OUT_RING(ring, submit->cmd[i].size); + OUT_RING(ring, A5XX_CP_INDIRECT_BUFFER_2_IB_SIZE(submit->cmd[i].size)); ibs++; break; } @@ -327,10 +414,12 @@ static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) update_shadow_rptr(gpu, ring); } - OUT_PKT7(ring, CP_SET_MARKER, 1); - OUT_RING(ring, 0x00e); /* IB1LIST end */ + if (submit->queue->flags & MSM_SUBMITQUEUE_ALLOW_PREEMPT) { + OUT_PKT7(ring, CP_SET_MARKER, 1); + OUT_RING(ring, 0x00e); /* IB1LIST end */ + } - get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP(0), + get_stats_counter(ring, REG_A7XX_RBBM_PERFCTR_CP(0), rbmemptr_stats(ring, index, cpcycles_end)); get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER, rbmemptr_stats(ring, index, alwayson_end)); @@ -375,6 +464,8 @@ static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) OUT_RING(ring, upper_32_bits(rbmemptr(ring, bv_fence))); OUT_RING(ring, submit->seqno); + a6xx_gpu->last_seqno[ring->id] = submit->seqno; + /* write the ringbuffer timestamp */ OUT_PKT7(ring, CP_EVENT_WRITE, 4); OUT_RING(ring, CACHE_CLEAN | CP_EVENT_WRITE_0_IRQ | BIT(27)); @@ -388,628 +479,33 @@ static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) OUT_PKT7(ring, CP_SET_MARKER, 1); OUT_RING(ring, 0x100); /* IFPC enable */ - trace_msm_gpu_submit_flush(submit, - gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER)); - - a6xx_flush(gpu, ring); -} - -const struct adreno_reglist a612_hwcg[] = { - {REG_A6XX_RBBM_CLOCK_CNTL_SP0, 0x22222222}, - {REG_A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220}, - {REG_A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000081}, - {REG_A6XX_RBBM_CLOCK_HYST_SP0, 0x0000f3cf}, - {REG_A6XX_RBBM_CLOCK_CNTL_TP0, 0x22222222}, - {REG_A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222}, - {REG_A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222}, - {REG_A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222}, - {REG_A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111}, - {REG_A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111}, - {REG_A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111}, - {REG_A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111}, - {REG_A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777}, - {REG_A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777}, - {REG_A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777}, - {REG_A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777}, - {REG_A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222}, - {REG_A6XX_RBBM_CLOCK_CNTL2_RB0, 0x01202222}, - {REG_A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220}, - {REG_A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040f00}, - {REG_A6XX_RBBM_CLOCK_CNTL_RAC, 0x05522022}, - {REG_A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555}, - {REG_A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011}, - {REG_A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044}, - {REG_A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222}, - {REG_A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222}, - {REG_A6XX_RBBM_CLOCK_MODE_GPC, 0x02222222}, - {REG_A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002}, - {REG_A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222}, - {REG_A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000}, - {REG_A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}, - {REG_A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200}, - {REG_A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000}, - {REG_A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000}, - {REG_A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000}, - {REG_A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004}, - {REG_A6XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000}, - {REG_A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222}, - {REG_A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004}, - {REG_A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002}, - {REG_A6XX_RBBM_ISDB_CNT, 0x00000182}, - {REG_A6XX_RBBM_RAC_THRESHOLD_CNT, 0x00000000}, - {REG_A6XX_RBBM_SP_HYST_CNT, 0x00000000}, - {REG_A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222}, - {REG_A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111}, - {REG_A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555}, - {}, -}; - -/* For a615 family (a615, a616, a618 and a619) */ -const struct adreno_reglist a615_hwcg[] = { - {REG_A6XX_RBBM_CLOCK_CNTL_SP0, 0x02222222}, - {REG_A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220}, - {REG_A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080}, - {REG_A6XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF}, - {REG_A6XX_RBBM_CLOCK_CNTL_TP0, 0x02222222}, - {REG_A6XX_RBBM_CLOCK_CNTL_TP1, 0x02222222}, - {REG_A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222}, - {REG_A6XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222}, - {REG_A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222}, - {REG_A6XX_RBBM_CLOCK_CNTL3_TP1, 0x22222222}, - {REG_A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222}, - {REG_A6XX_RBBM_CLOCK_CNTL4_TP1, 0x00022222}, - {REG_A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777}, - {REG_A6XX_RBBM_CLOCK_HYST_TP1, 0x77777777}, - {REG_A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777}, - {REG_A6XX_RBBM_CLOCK_HYST2_TP1, 0x77777777}, - {REG_A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777}, - {REG_A6XX_RBBM_CLOCK_HYST3_TP1, 0x77777777}, - {REG_A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777}, - {REG_A6XX_RBBM_CLOCK_HYST4_TP1, 0x00077777}, - {REG_A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111}, - {REG_A6XX_RBBM_CLOCK_DELAY_TP1, 0x11111111}, - {REG_A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111}, - {REG_A6XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111}, - {REG_A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111}, - {REG_A6XX_RBBM_CLOCK_DELAY3_TP1, 0x11111111}, - {REG_A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111}, - {REG_A6XX_RBBM_CLOCK_DELAY4_TP1, 0x00011111}, - {REG_A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222}, - {REG_A6XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222}, - {REG_A6XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222}, - {REG_A6XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222}, - {REG_A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004}, - {REG_A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002}, - {REG_A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222}, - {REG_A6XX_RBBM_CLOCK_CNTL2_RB0, 0x00002222}, - {REG_A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002020}, - {REG_A6XX_RBBM_CLOCK_CNTL_CCU1, 0x00002220}, - {REG_A6XX_RBBM_CLOCK_CNTL_CCU2, 0x00002220}, - {REG_A6XX_RBBM_CLOCK_CNTL_CCU3, 0x00002220}, - {REG_A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040F00}, - {REG_A6XX_RBBM_CLOCK_HYST_RB_CCU1, 0x00040F00}, - {REG_A6XX_RBBM_CLOCK_HYST_RB_CCU2, 0x00040F00}, - {REG_A6XX_RBBM_CLOCK_HYST_RB_CCU3, 0x00040F00}, - {REG_A6XX_RBBM_CLOCK_CNTL_RAC, 0x05022022}, - {REG_A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555}, - {REG_A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011}, - {REG_A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044}, - {REG_A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222}, - {REG_A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222}, - {REG_A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222}, - {REG_A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000}, - {REG_A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004}, - {REG_A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000}, - {REG_A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000}, - {REG_A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000}, - {REG_A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200}, - {REG_A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}, - {REG_A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002}, - {REG_A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222}, - {REG_A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222}, - {REG_A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111}, - {REG_A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555}, - {}, -}; - -const struct adreno_reglist a630_hwcg[] = { - {REG_A6XX_RBBM_CLOCK_CNTL_SP0, 0x22222222}, - {REG_A6XX_RBBM_CLOCK_CNTL_SP1, 0x22222222}, - {REG_A6XX_RBBM_CLOCK_CNTL_SP2, 0x22222222}, - {REG_A6XX_RBBM_CLOCK_CNTL_SP3, 0x22222222}, - {REG_A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02022220}, - {REG_A6XX_RBBM_CLOCK_CNTL2_SP1, 0x02022220}, - {REG_A6XX_RBBM_CLOCK_CNTL2_SP2, 0x02022220}, - {REG_A6XX_RBBM_CLOCK_CNTL2_SP3, 0x02022220}, - {REG_A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080}, - {REG_A6XX_RBBM_CLOCK_DELAY_SP1, 0x00000080}, - {REG_A6XX_RBBM_CLOCK_DELAY_SP2, 0x00000080}, - {REG_A6XX_RBBM_CLOCK_DELAY_SP3, 0x00000080}, - {REG_A6XX_RBBM_CLOCK_HYST_SP0, 0x0000f3cf}, - {REG_A6XX_RBBM_CLOCK_HYST_SP1, 0x0000f3cf}, - {REG_A6XX_RBBM_CLOCK_HYST_SP2, 0x0000f3cf}, - {REG_A6XX_RBBM_CLOCK_HYST_SP3, 0x0000f3cf}, - {REG_A6XX_RBBM_CLOCK_CNTL_TP0, 0x02222222}, - {REG_A6XX_RBBM_CLOCK_CNTL_TP1, 0x02222222}, - {REG_A6XX_RBBM_CLOCK_CNTL_TP2, 0x02222222}, - {REG_A6XX_RBBM_CLOCK_CNTL_TP3, 0x02222222}, - {REG_A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222}, - {REG_A6XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222}, - {REG_A6XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222}, - {REG_A6XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222}, - {REG_A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222}, - {REG_A6XX_RBBM_CLOCK_CNTL3_TP1, 0x22222222}, - {REG_A6XX_RBBM_CLOCK_CNTL3_TP2, 0x22222222}, - {REG_A6XX_RBBM_CLOCK_CNTL3_TP3, 0x22222222}, - {REG_A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222}, - {REG_A6XX_RBBM_CLOCK_CNTL4_TP1, 0x00022222}, - {REG_A6XX_RBBM_CLOCK_CNTL4_TP2, 0x00022222}, - {REG_A6XX_RBBM_CLOCK_CNTL4_TP3, 0x00022222}, - {REG_A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777}, - {REG_A6XX_RBBM_CLOCK_HYST_TP1, 0x77777777}, - {REG_A6XX_RBBM_CLOCK_HYST_TP2, 0x77777777}, - {REG_A6XX_RBBM_CLOCK_HYST_TP3, 0x77777777}, - {REG_A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777}, - {REG_A6XX_RBBM_CLOCK_HYST2_TP1, 0x77777777}, - {REG_A6XX_RBBM_CLOCK_HYST2_TP2, 0x77777777}, - {REG_A6XX_RBBM_CLOCK_HYST2_TP3, 0x77777777}, - {REG_A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777}, - {REG_A6XX_RBBM_CLOCK_HYST3_TP1, 0x77777777}, - {REG_A6XX_RBBM_CLOCK_HYST3_TP2, 0x77777777}, - {REG_A6XX_RBBM_CLOCK_HYST3_TP3, 0x77777777}, - {REG_A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777}, - {REG_A6XX_RBBM_CLOCK_HYST4_TP1, 0x00077777}, - {REG_A6XX_RBBM_CLOCK_HYST4_TP2, 0x00077777}, - {REG_A6XX_RBBM_CLOCK_HYST4_TP3, 0x00077777}, - {REG_A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111}, - {REG_A6XX_RBBM_CLOCK_DELAY_TP1, 0x11111111}, - {REG_A6XX_RBBM_CLOCK_DELAY_TP2, 0x11111111}, - {REG_A6XX_RBBM_CLOCK_DELAY_TP3, 0x11111111}, - {REG_A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111}, - {REG_A6XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111}, - {REG_A6XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111}, - {REG_A6XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111}, - {REG_A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111}, - {REG_A6XX_RBBM_CLOCK_DELAY3_TP1, 0x11111111}, - {REG_A6XX_RBBM_CLOCK_DELAY3_TP2, 0x11111111}, - {REG_A6XX_RBBM_CLOCK_DELAY3_TP3, 0x11111111}, - {REG_A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111}, - {REG_A6XX_RBBM_CLOCK_DELAY4_TP1, 0x00011111}, - {REG_A6XX_RBBM_CLOCK_DELAY4_TP2, 0x00011111}, - {REG_A6XX_RBBM_CLOCK_DELAY4_TP3, 0x00011111}, - {REG_A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222}, - {REG_A6XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222}, - {REG_A6XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222}, - {REG_A6XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222}, - {REG_A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004}, - {REG_A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002}, - {REG_A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222}, - {REG_A6XX_RBBM_CLOCK_CNTL_RB1, 0x22222222}, - {REG_A6XX_RBBM_CLOCK_CNTL_RB2, 0x22222222}, - {REG_A6XX_RBBM_CLOCK_CNTL_RB3, 0x22222222}, - {REG_A6XX_RBBM_CLOCK_CNTL2_RB0, 0x00002222}, - {REG_A6XX_RBBM_CLOCK_CNTL2_RB1, 0x00002222}, - {REG_A6XX_RBBM_CLOCK_CNTL2_RB2, 0x00002222}, - {REG_A6XX_RBBM_CLOCK_CNTL2_RB3, 0x00002222}, - {REG_A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220}, - {REG_A6XX_RBBM_CLOCK_CNTL_CCU1, 0x00002220}, - {REG_A6XX_RBBM_CLOCK_CNTL_CCU2, 0x00002220}, - {REG_A6XX_RBBM_CLOCK_CNTL_CCU3, 0x00002220}, - {REG_A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040f00}, - {REG_A6XX_RBBM_CLOCK_HYST_RB_CCU1, 0x00040f00}, - {REG_A6XX_RBBM_CLOCK_HYST_RB_CCU2, 0x00040f00}, - {REG_A6XX_RBBM_CLOCK_HYST_RB_CCU3, 0x00040f00}, - {REG_A6XX_RBBM_CLOCK_CNTL_RAC, 0x05022022}, - {REG_A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555}, - {REG_A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011}, - {REG_A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044}, - {REG_A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222}, - {REG_A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222}, - {REG_A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222}, - {REG_A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000}, - {REG_A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004}, - {REG_A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000}, - {REG_A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000}, - {REG_A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000}, - {REG_A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200}, - {REG_A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}, - {REG_A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002}, - {REG_A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222}, - {REG_A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222}, - {REG_A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111}, - {REG_A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555}, - {}, -}; - -const struct adreno_reglist a640_hwcg[] = { - {REG_A6XX_RBBM_CLOCK_CNTL_SP0, 0x02222222}, - {REG_A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220}, - {REG_A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080}, - {REG_A6XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF}, - {REG_A6XX_RBBM_CLOCK_CNTL_TP0, 0x02222222}, - {REG_A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222}, - {REG_A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222}, - {REG_A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222}, - {REG_A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111}, - {REG_A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111}, - {REG_A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111}, - {REG_A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111}, - {REG_A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777}, - {REG_A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777}, - {REG_A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777}, - {REG_A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777}, - {REG_A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222}, - {REG_A6XX_RBBM_CLOCK_CNTL2_RB0, 0x01002222}, - {REG_A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220}, - {REG_A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040F00}, - {REG_A6XX_RBBM_CLOCK_CNTL_RAC, 0x05222022}, - {REG_A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555}, - {REG_A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011}, - {REG_A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044}, - {REG_A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222}, - {REG_A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222}, - {REG_A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222}, - {REG_A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002}, - {REG_A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222}, - {REG_A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000}, - {REG_A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}, - {REG_A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200}, - {REG_A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000}, - {REG_A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000}, - {REG_A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000}, - {REG_A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004}, - {REG_A6XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000}, - {REG_A6XX_RBBM_CLOCK_CNTL_TEX_FCHE, 0x00000222}, - {REG_A6XX_RBBM_CLOCK_DELAY_TEX_FCHE, 0x00000111}, - {REG_A6XX_RBBM_CLOCK_HYST_TEX_FCHE, 0x00000000}, - {REG_A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222}, - {REG_A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004}, - {REG_A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002}, - {REG_A6XX_RBBM_ISDB_CNT, 0x00000182}, - {REG_A6XX_RBBM_RAC_THRESHOLD_CNT, 0x00000000}, - {REG_A6XX_RBBM_SP_HYST_CNT, 0x00000000}, - {REG_A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222}, - {REG_A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111}, - {REG_A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555}, - {}, -}; - -const struct adreno_reglist a650_hwcg[] = { - {REG_A6XX_RBBM_CLOCK_CNTL_SP0, 0x02222222}, - {REG_A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220}, - {REG_A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080}, - {REG_A6XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF}, - {REG_A6XX_RBBM_CLOCK_CNTL_TP0, 0x02222222}, - {REG_A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222}, - {REG_A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222}, - {REG_A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222}, - {REG_A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111}, - {REG_A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111}, - {REG_A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111}, - {REG_A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111}, - {REG_A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777}, - {REG_A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777}, - {REG_A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777}, - {REG_A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777}, - {REG_A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222}, - {REG_A6XX_RBBM_CLOCK_CNTL2_RB0, 0x01002222}, - {REG_A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220}, - {REG_A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040F00}, - {REG_A6XX_RBBM_CLOCK_CNTL_RAC, 0x25222022}, - {REG_A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555}, - {REG_A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011}, - {REG_A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044}, - {REG_A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222}, - {REG_A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222}, - {REG_A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222}, - {REG_A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002}, - {REG_A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222}, - {REG_A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000}, - {REG_A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}, - {REG_A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200}, - {REG_A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000}, - {REG_A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000}, - {REG_A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000}, - {REG_A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004}, - {REG_A6XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000}, - {REG_A6XX_RBBM_CLOCK_CNTL_TEX_FCHE, 0x00000222}, - {REG_A6XX_RBBM_CLOCK_DELAY_TEX_FCHE, 0x00000111}, - {REG_A6XX_RBBM_CLOCK_HYST_TEX_FCHE, 0x00000777}, - {REG_A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222}, - {REG_A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004}, - {REG_A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002}, - {REG_A6XX_RBBM_ISDB_CNT, 0x00000182}, - {REG_A6XX_RBBM_RAC_THRESHOLD_CNT, 0x00000000}, - {REG_A6XX_RBBM_SP_HYST_CNT, 0x00000000}, - {REG_A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222}, - {REG_A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111}, - {REG_A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555}, - {}, -}; + /* If preemption is enabled */ + if (gpu->nr_rings > 1) { + /* Yield the floor on command completion */ + OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4); -const struct adreno_reglist a660_hwcg[] = { - {REG_A6XX_RBBM_CLOCK_CNTL_SP0, 0x02222222}, - {REG_A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220}, - {REG_A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080}, - {REG_A6XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF}, - {REG_A6XX_RBBM_CLOCK_CNTL_TP0, 0x22222222}, - {REG_A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222}, - {REG_A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222}, - {REG_A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222}, - {REG_A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111}, - {REG_A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111}, - {REG_A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111}, - {REG_A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111}, - {REG_A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777}, - {REG_A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777}, - {REG_A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777}, - {REG_A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777}, - {REG_A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222}, - {REG_A6XX_RBBM_CLOCK_CNTL2_RB0, 0x01002222}, - {REG_A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220}, - {REG_A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040F00}, - {REG_A6XX_RBBM_CLOCK_CNTL_RAC, 0x25222022}, - {REG_A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555}, - {REG_A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011}, - {REG_A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044}, - {REG_A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222}, - {REG_A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222}, - {REG_A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222}, - {REG_A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002}, - {REG_A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222}, - {REG_A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000}, - {REG_A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}, - {REG_A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200}, - {REG_A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000}, - {REG_A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000}, - {REG_A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000}, - {REG_A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004}, - {REG_A6XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000}, - {REG_A6XX_RBBM_CLOCK_CNTL_TEX_FCHE, 0x00000222}, - {REG_A6XX_RBBM_CLOCK_DELAY_TEX_FCHE, 0x00000111}, - {REG_A6XX_RBBM_CLOCK_HYST_TEX_FCHE, 0x00000000}, - {REG_A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222}, - {REG_A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004}, - {REG_A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002}, - {REG_A6XX_RBBM_ISDB_CNT, 0x00000182}, - {REG_A6XX_RBBM_RAC_THRESHOLD_CNT, 0x00000000}, - {REG_A6XX_RBBM_SP_HYST_CNT, 0x00000000}, - {REG_A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222}, - {REG_A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111}, - {REG_A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555}, - {}, -}; + /* + * If dword[2:1] are non zero, they specify an address for + * the CP to write the value of dword[3] to on preemption + * complete. Write 0 to skip the write + */ + OUT_RING(ring, 0x00); + OUT_RING(ring, 0x00); + /* Data value - not used if the address above is 0 */ + OUT_RING(ring, 0x01); + /* generate interrupt on preemption completion */ + OUT_RING(ring, 0x00); + } -const struct adreno_reglist a690_hwcg[] = { - {REG_A6XX_RBBM_CLOCK_CNTL_SP0, 0x02222222}, - {REG_A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220}, - {REG_A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080}, - {REG_A6XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF}, - {REG_A6XX_RBBM_CLOCK_CNTL_TP0, 0x22222222}, - {REG_A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222}, - {REG_A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222}, - {REG_A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222}, - {REG_A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111}, - {REG_A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111}, - {REG_A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111}, - {REG_A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111}, - {REG_A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777}, - {REG_A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777}, - {REG_A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777}, - {REG_A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777}, - {REG_A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222}, - {REG_A6XX_RBBM_CLOCK_CNTL2_RB0, 0x01002222}, - {REG_A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220}, - {REG_A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040F00}, - {REG_A6XX_RBBM_CLOCK_CNTL_RAC, 0x25222022}, - {REG_A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555}, - {REG_A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011}, - {REG_A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044}, - {REG_A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222}, - {REG_A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222}, - {REG_A6XX_RBBM_CLOCK_MODE_GPC, 0x00222222}, - {REG_A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002}, - {REG_A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222}, - {REG_A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000}, - {REG_A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}, - {REG_A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200}, - {REG_A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000}, - {REG_A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000}, - {REG_A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000}, - {REG_A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004}, - {REG_A6XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000}, - {REG_A6XX_RBBM_CLOCK_CNTL_TEX_FCHE, 0x00000222}, - {REG_A6XX_RBBM_CLOCK_DELAY_TEX_FCHE, 0x00000111}, - {REG_A6XX_RBBM_CLOCK_HYST_TEX_FCHE, 0x00000000}, - {REG_A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222}, - {REG_A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004}, - {REG_A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002}, - {REG_A6XX_RBBM_CLOCK_CNTL, 0x8AA8AA82}, - {REG_A6XX_RBBM_ISDB_CNT, 0x00000182}, - {REG_A6XX_RBBM_RAC_THRESHOLD_CNT, 0x00000000}, - {REG_A6XX_RBBM_SP_HYST_CNT, 0x00000000}, - {REG_A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222}, - {REG_A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111}, - {REG_A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555}, - {REG_A6XX_GPU_GMU_AO_GMU_CGC_MODE_CNTL, 0x20200}, - {REG_A6XX_GPU_GMU_AO_GMU_CGC_DELAY_CNTL, 0x10111}, - {REG_A6XX_GPU_GMU_AO_GMU_CGC_HYST_CNTL, 0x5555}, - {} -}; -const struct adreno_reglist a702_hwcg[] = { - { REG_A6XX_RBBM_CLOCK_CNTL_SP0, 0x22222222 }, - { REG_A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220 }, - { REG_A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000081 }, - { REG_A6XX_RBBM_CLOCK_HYST_SP0, 0x0000f3cf }, - { REG_A6XX_RBBM_CLOCK_CNTL_TP0, 0x22222222 }, - { REG_A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222 }, - { REG_A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222 }, - { REG_A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00022222 }, - { REG_A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111 }, - { REG_A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111 }, - { REG_A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111 }, - { REG_A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111 }, - { REG_A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777 }, - { REG_A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777 }, - { REG_A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777 }, - { REG_A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777 }, - { REG_A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222 }, - { REG_A6XX_RBBM_CLOCK_CNTL2_RB0, 0x01202222 }, - { REG_A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220 }, - { REG_A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x00040f00 }, - { REG_A6XX_RBBM_CLOCK_CNTL_RAC, 0x05522022 }, - { REG_A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00005555 }, - { REG_A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011 }, - { REG_A6XX_RBBM_CLOCK_HYST_RAC, 0x00445044 }, - { REG_A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222 }, - { REG_A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222 }, - { REG_A6XX_RBBM_CLOCK_MODE_GPC, 0x02222222 }, - { REG_A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002 }, - { REG_A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222 }, - { REG_A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000 }, - { REG_A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222 }, - { REG_A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200 }, - { REG_A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000 }, - { REG_A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000 }, - { REG_A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000 }, - { REG_A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004 }, - { REG_A6XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000 }, - { REG_A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222 }, - { REG_A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004 }, - { REG_A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002 }, - { REG_A6XX_RBBM_ISDB_CNT, 0x00000182 }, - { REG_A6XX_RBBM_RAC_THRESHOLD_CNT, 0x00000000 }, - { REG_A6XX_RBBM_SP_HYST_CNT, 0x00000000 }, - { REG_A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222 }, - { REG_A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111 }, - { REG_A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555 }, - { REG_A6XX_RBBM_CLOCK_CNTL_FCHE, 0x00000222 }, - { REG_A6XX_RBBM_CLOCK_DELAY_FCHE, 0x00000000 }, - { REG_A6XX_RBBM_CLOCK_HYST_FCHE, 0x00000000 }, - { REG_A6XX_RBBM_CLOCK_CNTL_GLC, 0x00222222 }, - { REG_A6XX_RBBM_CLOCK_DELAY_GLC, 0x00000000 }, - { REG_A6XX_RBBM_CLOCK_HYST_GLC, 0x00000000 }, - { REG_A6XX_RBBM_CLOCK_CNTL_MHUB, 0x00000002 }, - { REG_A6XX_RBBM_CLOCK_DELAY_MHUB, 0x00000000 }, - { REG_A6XX_RBBM_CLOCK_HYST_MHUB, 0x00000000 }, - {} -}; + trace_msm_gpu_submit_flush(submit, + gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER)); -const struct adreno_reglist a730_hwcg[] = { - { REG_A6XX_RBBM_CLOCK_CNTL_SP0, 0x02222222 }, - { REG_A6XX_RBBM_CLOCK_CNTL2_SP0, 0x02022222 }, - { REG_A6XX_RBBM_CLOCK_HYST_SP0, 0x0000f3cf }, - { REG_A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080 }, - { REG_A6XX_RBBM_CLOCK_CNTL_TP0, 0x22222220 }, - { REG_A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222 }, - { REG_A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222 }, - { REG_A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00222222 }, - { REG_A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777 }, - { REG_A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777 }, - { REG_A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777 }, - { REG_A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777 }, - { REG_A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111 }, - { REG_A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111 }, - { REG_A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111 }, - { REG_A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111 }, - { REG_A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222 }, - { REG_A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000004 }, - { REG_A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002 }, - { REG_A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222 }, - { REG_A6XX_RBBM_CLOCK_CNTL2_RB0, 0x01002222 }, - { REG_A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220 }, - { REG_A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x44000f00 }, - { REG_A6XX_RBBM_CLOCK_CNTL_RAC, 0x25222022 }, - { REG_A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00555555 }, - { REG_A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011 }, - { REG_A6XX_RBBM_CLOCK_HYST_RAC, 0x00440044 }, - { REG_A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222 }, - { REG_A7XX_RBBM_CLOCK_MODE2_GRAS, 0x00000222 }, - { REG_A7XX_RBBM_CLOCK_MODE_BV_GRAS, 0x00222222 }, - { REG_A6XX_RBBM_CLOCK_MODE_GPC, 0x02222223 }, - { REG_A6XX_RBBM_CLOCK_MODE_VFD, 0x00002222 }, - { REG_A7XX_RBBM_CLOCK_MODE_BV_GPC, 0x00222222 }, - { REG_A7XX_RBBM_CLOCK_MODE_BV_VFD, 0x00002222 }, - { REG_A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000 }, - { REG_A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004 }, - { REG_A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000 }, - { REG_A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000 }, - { REG_A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200 }, - { REG_A6XX_RBBM_CLOCK_DELAY_VFD, 0x00002222 }, - { REG_A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222 }, - { REG_A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000 }, - { REG_A6XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000 }, - { REG_A6XX_RBBM_CLOCK_DELAY_HLSQ_2, 0x00000002 }, - { REG_A7XX_RBBM_CLOCK_MODE_BV_LRZ, 0x55555552 }, - { REG_A7XX_RBBM_CLOCK_MODE_CP, 0x00000223 }, - { REG_A6XX_RBBM_CLOCK_CNTL, 0x8aa8aa82 }, - { REG_A6XX_RBBM_ISDB_CNT, 0x00000182 }, - { REG_A6XX_RBBM_RAC_THRESHOLD_CNT, 0x00000000 }, - { REG_A6XX_RBBM_SP_HYST_CNT, 0x00000000 }, - { REG_A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222 }, - { REG_A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111 }, - { REG_A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555 }, - {}, -}; + a6xx_flush(gpu, ring); -const struct adreno_reglist a740_hwcg[] = { - { REG_A6XX_RBBM_CLOCK_CNTL_SP0, 0x02222222 }, - { REG_A6XX_RBBM_CLOCK_CNTL2_SP0, 0x22022222 }, - { REG_A6XX_RBBM_CLOCK_HYST_SP0, 0x003cf3cf }, - { REG_A6XX_RBBM_CLOCK_DELAY_SP0, 0x00000080 }, - { REG_A6XX_RBBM_CLOCK_CNTL_TP0, 0x22222220 }, - { REG_A6XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222 }, - { REG_A6XX_RBBM_CLOCK_CNTL3_TP0, 0x22222222 }, - { REG_A6XX_RBBM_CLOCK_CNTL4_TP0, 0x00222222 }, - { REG_A6XX_RBBM_CLOCK_HYST_TP0, 0x77777777 }, - { REG_A6XX_RBBM_CLOCK_HYST2_TP0, 0x77777777 }, - { REG_A6XX_RBBM_CLOCK_HYST3_TP0, 0x77777777 }, - { REG_A6XX_RBBM_CLOCK_HYST4_TP0, 0x00077777 }, - { REG_A6XX_RBBM_CLOCK_DELAY_TP0, 0x11111111 }, - { REG_A6XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111 }, - { REG_A6XX_RBBM_CLOCK_DELAY3_TP0, 0x11111111 }, - { REG_A6XX_RBBM_CLOCK_DELAY4_TP0, 0x00011111 }, - { REG_A6XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222 }, - { REG_A6XX_RBBM_CLOCK_CNTL2_UCHE, 0x00222222 }, - { REG_A6XX_RBBM_CLOCK_HYST_UCHE, 0x00000444 }, - { REG_A6XX_RBBM_CLOCK_DELAY_UCHE, 0x00000222 }, - { REG_A6XX_RBBM_CLOCK_CNTL_RB0, 0x22222222 }, - { REG_A6XX_RBBM_CLOCK_CNTL2_RB0, 0x01002222 }, - { REG_A6XX_RBBM_CLOCK_CNTL_CCU0, 0x00002220 }, - { REG_A6XX_RBBM_CLOCK_HYST_RB_CCU0, 0x44000f00 }, - { REG_A6XX_RBBM_CLOCK_CNTL_RAC, 0x25222022 }, - { REG_A6XX_RBBM_CLOCK_CNTL2_RAC, 0x00555555 }, - { REG_A6XX_RBBM_CLOCK_DELAY_RAC, 0x00000011 }, - { REG_A6XX_RBBM_CLOCK_HYST_RAC, 0x00440044 }, - { REG_A6XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222 }, - { REG_A7XX_RBBM_CLOCK_MODE2_GRAS, 0x00000222 }, - { REG_A7XX_RBBM_CLOCK_MODE_BV_GRAS, 0x00222222 }, - { REG_A6XX_RBBM_CLOCK_MODE_GPC, 0x02222223 }, - { REG_A6XX_RBBM_CLOCK_MODE_VFD, 0x00222222 }, - { REG_A7XX_RBBM_CLOCK_MODE_BV_GPC, 0x00222222 }, - { REG_A7XX_RBBM_CLOCK_MODE_BV_VFD, 0x00002222 }, - { REG_A6XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000 }, - { REG_A6XX_RBBM_CLOCK_HYST_GPC, 0x04104004 }, - { REG_A6XX_RBBM_CLOCK_HYST_VFD, 0x00000000 }, - { REG_A6XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00000000 }, - { REG_A6XX_RBBM_CLOCK_DELAY_GPC, 0x00000200 }, - { REG_A6XX_RBBM_CLOCK_DELAY_VFD, 0x00000000 }, - { REG_A6XX_RBBM_CLOCK_MODE_HLSQ, 0x00002222 }, - { REG_A6XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000 }, - { REG_A6XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000 }, - { REG_A7XX_RBBM_CLOCK_MODE_BV_LRZ, 0x55555552 }, - { REG_A7XX_RBBM_CLOCK_HYST2_VFD, 0x00000000 }, - { REG_A7XX_RBBM_CLOCK_MODE_CP, 0x00000222 }, - { REG_A6XX_RBBM_CLOCK_CNTL, 0x8aa8aa82 }, - { REG_A6XX_RBBM_ISDB_CNT, 0x00000182 }, - { REG_A6XX_RBBM_RAC_THRESHOLD_CNT, 0x00000000 }, - { REG_A6XX_RBBM_SP_HYST_CNT, 0x00000000 }, - { REG_A6XX_RBBM_CLOCK_CNTL_GMU_GX, 0x00000222 }, - { REG_A6XX_RBBM_CLOCK_DELAY_GMU_GX, 0x00000111 }, - { REG_A6XX_RBBM_CLOCK_HYST_GMU_GX, 0x00000555 }, - {}, -}; + /* Check to see if we need to start preemption */ + a6xx_preempt_trigger(gpu); +} static void a6xx_set_hwcg(struct msm_gpu *gpu, bool state) { @@ -1018,9 +514,10 @@ static void a6xx_set_hwcg(struct msm_gpu *gpu, bool state) struct a6xx_gmu *gmu = &a6xx_gpu->gmu; const struct adreno_reglist *reg; unsigned int i; - u32 val, clock_cntl_on, cgc_mode; + u32 cgc_delay, cgc_hyst; + u32 val, clock_cntl_on; - if (!(adreno_gpu->info->hwcg || adreno_is_a7xx(adreno_gpu))) + if (!(adreno_gpu->info->a6xx->hwcg || adreno_is_a7xx(adreno_gpu))) return; if (adreno_is_a630(adreno_gpu)) @@ -1032,18 +529,17 @@ static void a6xx_set_hwcg(struct msm_gpu *gpu, bool state) else clock_cntl_on = 0x8aa8aa82; - if (adreno_is_a7xx(adreno_gpu)) { - cgc_mode = adreno_is_a740_family(adreno_gpu) ? 0x20222 : 0x20000; + cgc_delay = adreno_is_a615_family(adreno_gpu) ? 0x111 : 0x10111; + cgc_hyst = adreno_is_a615_family(adreno_gpu) ? 0x555 : 0x5555; - gmu_write(&a6xx_gpu->gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_MODE_CNTL, - state ? cgc_mode : 0); - gmu_write(&a6xx_gpu->gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_DELAY_CNTL, - state ? 0x10111 : 0); - gmu_write(&a6xx_gpu->gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_HYST_CNTL, - state ? 0x5555 : 0); - } + gmu_write(&a6xx_gpu->gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_MODE_CNTL, + state ? adreno_gpu->info->a6xx->gmu_cgc_mode : 0); + gmu_write(&a6xx_gpu->gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_DELAY_CNTL, + state ? cgc_delay : 0); + gmu_write(&a6xx_gpu->gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_HYST_CNTL, + state ? cgc_hyst : 0); - if (!adreno_gpu->info->hwcg) { + if (!adreno_gpu->info->a6xx->hwcg) { gpu_write(gpu, REG_A7XX_RBBM_CLOCK_CNTL_GLOBAL, 1); gpu_write(gpu, REG_A7XX_RBBM_CGC_GLOBAL_LOAD_CMD, state ? 1 : 0); @@ -1072,7 +568,7 @@ static void a6xx_set_hwcg(struct msm_gpu *gpu, bool state) if (!adreno_is_a610_family(adreno_gpu) && !adreno_is_a7xx(adreno_gpu)) gmu_rmw(gmu, REG_A6XX_GPU_GMU_GX_SPTPRAC_CLOCK_CONTROL, 1, 0); - for (i = 0; (reg = &adreno_gpu->info->hwcg[i], reg->offset); i++) + for (i = 0; (reg = &adreno_gpu->info->a6xx->hwcg[i], reg->offset); i++) gpu_write(gpu, reg->offset, state ? reg->value : 0); /* Enable SP clock */ @@ -1082,256 +578,11 @@ static void a6xx_set_hwcg(struct msm_gpu *gpu, bool state) gpu_write(gpu, REG_A6XX_RBBM_CLOCK_CNTL, state ? clock_cntl_on : 0); } -/* For a615, a616, a618, a619, a630, a640 and a680 */ -static const u32 a6xx_protect[] = { - A6XX_PROTECT_RDONLY(0x00000, 0x04ff), - A6XX_PROTECT_RDONLY(0x00501, 0x0005), - A6XX_PROTECT_RDONLY(0x0050b, 0x02f4), - A6XX_PROTECT_NORDWR(0x0050e, 0x0000), - A6XX_PROTECT_NORDWR(0x00510, 0x0000), - A6XX_PROTECT_NORDWR(0x00534, 0x0000), - A6XX_PROTECT_NORDWR(0x00800, 0x0082), - A6XX_PROTECT_NORDWR(0x008a0, 0x0008), - A6XX_PROTECT_NORDWR(0x008ab, 0x0024), - A6XX_PROTECT_RDONLY(0x008de, 0x00ae), - A6XX_PROTECT_NORDWR(0x00900, 0x004d), - A6XX_PROTECT_NORDWR(0x0098d, 0x0272), - A6XX_PROTECT_NORDWR(0x00e00, 0x0001), - A6XX_PROTECT_NORDWR(0x00e03, 0x000c), - A6XX_PROTECT_NORDWR(0x03c00, 0x00c3), - A6XX_PROTECT_RDONLY(0x03cc4, 0x1fff), - A6XX_PROTECT_NORDWR(0x08630, 0x01cf), - A6XX_PROTECT_NORDWR(0x08e00, 0x0000), - A6XX_PROTECT_NORDWR(0x08e08, 0x0000), - A6XX_PROTECT_NORDWR(0x08e50, 0x001f), - A6XX_PROTECT_NORDWR(0x09624, 0x01db), - A6XX_PROTECT_NORDWR(0x09e70, 0x0001), - A6XX_PROTECT_NORDWR(0x09e78, 0x0187), - A6XX_PROTECT_NORDWR(0x0a630, 0x01cf), - A6XX_PROTECT_NORDWR(0x0ae02, 0x0000), - A6XX_PROTECT_NORDWR(0x0ae50, 0x032f), - A6XX_PROTECT_NORDWR(0x0b604, 0x0000), - A6XX_PROTECT_NORDWR(0x0be02, 0x0001), - A6XX_PROTECT_NORDWR(0x0be20, 0x17df), - A6XX_PROTECT_NORDWR(0x0f000, 0x0bff), - A6XX_PROTECT_RDONLY(0x0fc00, 0x1fff), - A6XX_PROTECT_NORDWR(0x11c00, 0x0000), /* note: infinite range */ -}; - -/* These are for a620 and a650 */ -static const u32 a650_protect[] = { - A6XX_PROTECT_RDONLY(0x00000, 0x04ff), - A6XX_PROTECT_RDONLY(0x00501, 0x0005), - A6XX_PROTECT_RDONLY(0x0050b, 0x02f4), - A6XX_PROTECT_NORDWR(0x0050e, 0x0000), - A6XX_PROTECT_NORDWR(0x00510, 0x0000), - A6XX_PROTECT_NORDWR(0x00534, 0x0000), - A6XX_PROTECT_NORDWR(0x00800, 0x0082), - A6XX_PROTECT_NORDWR(0x008a0, 0x0008), - A6XX_PROTECT_NORDWR(0x008ab, 0x0024), - A6XX_PROTECT_RDONLY(0x008de, 0x00ae), - A6XX_PROTECT_NORDWR(0x00900, 0x004d), - A6XX_PROTECT_NORDWR(0x0098d, 0x0272), - A6XX_PROTECT_NORDWR(0x00e00, 0x0001), - A6XX_PROTECT_NORDWR(0x00e03, 0x000c), - A6XX_PROTECT_NORDWR(0x03c00, 0x00c3), - A6XX_PROTECT_RDONLY(0x03cc4, 0x1fff), - A6XX_PROTECT_NORDWR(0x08630, 0x01cf), - A6XX_PROTECT_NORDWR(0x08e00, 0x0000), - A6XX_PROTECT_NORDWR(0x08e08, 0x0000), - A6XX_PROTECT_NORDWR(0x08e50, 0x001f), - A6XX_PROTECT_NORDWR(0x08e80, 0x027f), - A6XX_PROTECT_NORDWR(0x09624, 0x01db), - A6XX_PROTECT_NORDWR(0x09e60, 0x0011), - A6XX_PROTECT_NORDWR(0x09e78, 0x0187), - A6XX_PROTECT_NORDWR(0x0a630, 0x01cf), - A6XX_PROTECT_NORDWR(0x0ae02, 0x0000), - A6XX_PROTECT_NORDWR(0x0ae50, 0x032f), - A6XX_PROTECT_NORDWR(0x0b604, 0x0000), - A6XX_PROTECT_NORDWR(0x0b608, 0x0007), - A6XX_PROTECT_NORDWR(0x0be02, 0x0001), - A6XX_PROTECT_NORDWR(0x0be20, 0x17df), - A6XX_PROTECT_NORDWR(0x0f000, 0x0bff), - A6XX_PROTECT_RDONLY(0x0fc00, 0x1fff), - A6XX_PROTECT_NORDWR(0x18400, 0x1fff), - A6XX_PROTECT_NORDWR(0x1a800, 0x1fff), - A6XX_PROTECT_NORDWR(0x1f400, 0x0443), - A6XX_PROTECT_RDONLY(0x1f844, 0x007b), - A6XX_PROTECT_NORDWR(0x1f887, 0x001b), - A6XX_PROTECT_NORDWR(0x1f8c0, 0x0000), /* note: infinite range */ -}; - -/* These are for a635 and a660 */ -static const u32 a660_protect[] = { - A6XX_PROTECT_RDONLY(0x00000, 0x04ff), - A6XX_PROTECT_RDONLY(0x00501, 0x0005), - A6XX_PROTECT_RDONLY(0x0050b, 0x02f4), - A6XX_PROTECT_NORDWR(0x0050e, 0x0000), - A6XX_PROTECT_NORDWR(0x00510, 0x0000), - A6XX_PROTECT_NORDWR(0x00534, 0x0000), - A6XX_PROTECT_NORDWR(0x00800, 0x0082), - A6XX_PROTECT_NORDWR(0x008a0, 0x0008), - A6XX_PROTECT_NORDWR(0x008ab, 0x0024), - A6XX_PROTECT_RDONLY(0x008de, 0x00ae), - A6XX_PROTECT_NORDWR(0x00900, 0x004d), - A6XX_PROTECT_NORDWR(0x0098d, 0x0272), - A6XX_PROTECT_NORDWR(0x00e00, 0x0001), - A6XX_PROTECT_NORDWR(0x00e03, 0x000c), - A6XX_PROTECT_NORDWR(0x03c00, 0x00c3), - A6XX_PROTECT_RDONLY(0x03cc4, 0x1fff), - A6XX_PROTECT_NORDWR(0x08630, 0x01cf), - A6XX_PROTECT_NORDWR(0x08e00, 0x0000), - A6XX_PROTECT_NORDWR(0x08e08, 0x0000), - A6XX_PROTECT_NORDWR(0x08e50, 0x001f), - A6XX_PROTECT_NORDWR(0x08e80, 0x027f), - A6XX_PROTECT_NORDWR(0x09624, 0x01db), - A6XX_PROTECT_NORDWR(0x09e60, 0x0011), - A6XX_PROTECT_NORDWR(0x09e78, 0x0187), - A6XX_PROTECT_NORDWR(0x0a630, 0x01cf), - A6XX_PROTECT_NORDWR(0x0ae02, 0x0000), - A6XX_PROTECT_NORDWR(0x0ae50, 0x012f), - A6XX_PROTECT_NORDWR(0x0b604, 0x0000), - A6XX_PROTECT_NORDWR(0x0b608, 0x0006), - A6XX_PROTECT_NORDWR(0x0be02, 0x0001), - A6XX_PROTECT_NORDWR(0x0be20, 0x015f), - A6XX_PROTECT_NORDWR(0x0d000, 0x05ff), - A6XX_PROTECT_NORDWR(0x0f000, 0x0bff), - A6XX_PROTECT_RDONLY(0x0fc00, 0x1fff), - A6XX_PROTECT_NORDWR(0x18400, 0x1fff), - A6XX_PROTECT_NORDWR(0x1a400, 0x1fff), - A6XX_PROTECT_NORDWR(0x1f400, 0x0443), - A6XX_PROTECT_RDONLY(0x1f844, 0x007b), - A6XX_PROTECT_NORDWR(0x1f860, 0x0000), - A6XX_PROTECT_NORDWR(0x1f887, 0x001b), - A6XX_PROTECT_NORDWR(0x1f8c0, 0x0000), /* note: infinite range */ -}; - -/* These are for a690 */ -static const u32 a690_protect[] = { - A6XX_PROTECT_RDONLY(0x00000, 0x004ff), - A6XX_PROTECT_RDONLY(0x00501, 0x00001), - A6XX_PROTECT_RDONLY(0x0050b, 0x002f4), - A6XX_PROTECT_NORDWR(0x0050e, 0x00000), - A6XX_PROTECT_NORDWR(0x00510, 0x00000), - A6XX_PROTECT_NORDWR(0x00534, 0x00000), - A6XX_PROTECT_NORDWR(0x00800, 0x00082), - A6XX_PROTECT_NORDWR(0x008a0, 0x00008), - A6XX_PROTECT_NORDWR(0x008ab, 0x00024), - A6XX_PROTECT_RDONLY(0x008de, 0x000ae), - A6XX_PROTECT_NORDWR(0x00900, 0x0004d), - A6XX_PROTECT_NORDWR(0x0098d, 0x00272), - A6XX_PROTECT_NORDWR(0x00e00, 0x00001), - A6XX_PROTECT_NORDWR(0x00e03, 0x0000c), - A6XX_PROTECT_NORDWR(0x03c00, 0x000c3), - A6XX_PROTECT_RDONLY(0x03cc4, 0x01fff), - A6XX_PROTECT_NORDWR(0x08630, 0x001cf), - A6XX_PROTECT_NORDWR(0x08e00, 0x00000), - A6XX_PROTECT_NORDWR(0x08e08, 0x00007), - A6XX_PROTECT_NORDWR(0x08e50, 0x0001f), - A6XX_PROTECT_NORDWR(0x08e80, 0x0027f), - A6XX_PROTECT_NORDWR(0x09624, 0x001db), - A6XX_PROTECT_NORDWR(0x09e60, 0x00011), - A6XX_PROTECT_NORDWR(0x09e78, 0x00187), - A6XX_PROTECT_NORDWR(0x0a630, 0x001cf), - A6XX_PROTECT_NORDWR(0x0ae02, 0x00000), - A6XX_PROTECT_NORDWR(0x0ae50, 0x0012f), - A6XX_PROTECT_NORDWR(0x0b604, 0x00000), - A6XX_PROTECT_NORDWR(0x0b608, 0x00006), - A6XX_PROTECT_NORDWR(0x0be02, 0x00001), - A6XX_PROTECT_NORDWR(0x0be20, 0x0015f), - A6XX_PROTECT_NORDWR(0x0d000, 0x005ff), - A6XX_PROTECT_NORDWR(0x0f000, 0x00bff), - A6XX_PROTECT_RDONLY(0x0fc00, 0x01fff), - A6XX_PROTECT_NORDWR(0x11c00, 0x00000), /*note: infiite range */ -}; - -static const u32 a730_protect[] = { - A6XX_PROTECT_RDONLY(0x00000, 0x04ff), - A6XX_PROTECT_RDONLY(0x0050b, 0x0058), - A6XX_PROTECT_NORDWR(0x0050e, 0x0000), - A6XX_PROTECT_NORDWR(0x00510, 0x0000), - A6XX_PROTECT_NORDWR(0x00534, 0x0000), - A6XX_PROTECT_RDONLY(0x005fb, 0x009d), - A6XX_PROTECT_NORDWR(0x00699, 0x01e9), - A6XX_PROTECT_NORDWR(0x008a0, 0x0008), - A6XX_PROTECT_NORDWR(0x008ab, 0x0024), - /* 0x008d0-0x008dd are unprotected on purpose for tools like perfetto */ - A6XX_PROTECT_RDONLY(0x008de, 0x0154), - A6XX_PROTECT_NORDWR(0x00900, 0x004d), - A6XX_PROTECT_NORDWR(0x0098d, 0x00b2), - A6XX_PROTECT_NORDWR(0x00a41, 0x01be), - A6XX_PROTECT_NORDWR(0x00df0, 0x0001), - A6XX_PROTECT_NORDWR(0x00e01, 0x0000), - A6XX_PROTECT_NORDWR(0x00e07, 0x0008), - A6XX_PROTECT_NORDWR(0x03c00, 0x00c3), - A6XX_PROTECT_RDONLY(0x03cc4, 0x1fff), - A6XX_PROTECT_NORDWR(0x08630, 0x01cf), - A6XX_PROTECT_NORDWR(0x08e00, 0x0000), - A6XX_PROTECT_NORDWR(0x08e08, 0x0000), - A6XX_PROTECT_NORDWR(0x08e50, 0x001f), - A6XX_PROTECT_NORDWR(0x08e80, 0x0280), - A6XX_PROTECT_NORDWR(0x09624, 0x01db), - A6XX_PROTECT_NORDWR(0x09e40, 0x0000), - A6XX_PROTECT_NORDWR(0x09e64, 0x000d), - A6XX_PROTECT_NORDWR(0x09e78, 0x0187), - A6XX_PROTECT_NORDWR(0x0a630, 0x01cf), - A6XX_PROTECT_NORDWR(0x0ae02, 0x0000), - A6XX_PROTECT_NORDWR(0x0ae50, 0x000f), - A6XX_PROTECT_NORDWR(0x0ae66, 0x0003), - A6XX_PROTECT_NORDWR(0x0ae6f, 0x0003), - A6XX_PROTECT_NORDWR(0x0b604, 0x0003), - A6XX_PROTECT_NORDWR(0x0ec00, 0x0fff), - A6XX_PROTECT_RDONLY(0x0fc00, 0x1fff), - A6XX_PROTECT_NORDWR(0x18400, 0x0053), - A6XX_PROTECT_RDONLY(0x18454, 0x0004), - A6XX_PROTECT_NORDWR(0x18459, 0x1fff), - A6XX_PROTECT_NORDWR(0x1a459, 0x1fff), - A6XX_PROTECT_NORDWR(0x1c459, 0x1fff), - A6XX_PROTECT_NORDWR(0x1f400, 0x0443), - A6XX_PROTECT_RDONLY(0x1f844, 0x007b), - A6XX_PROTECT_NORDWR(0x1f860, 0x0000), - A6XX_PROTECT_NORDWR(0x1f878, 0x002a), - /* CP_PROTECT_REG[44, 46] are left untouched! */ - 0, - 0, - 0, - A6XX_PROTECT_NORDWR(0x1f8c0, 0x00000), -}; - static void a6xx_set_cp_protect(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); - const u32 *regs = a6xx_protect; - unsigned i, count, count_max; - - if (adreno_is_a650(adreno_gpu) || adreno_is_a702(adreno_gpu)) { - regs = a650_protect; - count = ARRAY_SIZE(a650_protect); - count_max = 48; - BUILD_BUG_ON(ARRAY_SIZE(a650_protect) > 48); - } else if (adreno_is_a690(adreno_gpu)) { - regs = a690_protect; - count = ARRAY_SIZE(a690_protect); - count_max = 48; - BUILD_BUG_ON(ARRAY_SIZE(a690_protect) > 48); - } else if (adreno_is_a660_family(adreno_gpu)) { - regs = a660_protect; - count = ARRAY_SIZE(a660_protect); - count_max = 48; - BUILD_BUG_ON(ARRAY_SIZE(a660_protect) > 48); - } else if (adreno_is_a730(adreno_gpu) || - adreno_is_a740(adreno_gpu) || - adreno_is_a750(adreno_gpu)) { - regs = a730_protect; - count = ARRAY_SIZE(a730_protect); - count_max = 48; - BUILD_BUG_ON(ARRAY_SIZE(a730_protect) > 48); - } else { - regs = a6xx_protect; - count = ARRAY_SIZE(a6xx_protect); - count_max = 32; - BUILD_BUG_ON(ARRAY_SIZE(a6xx_protect) > 32); - } + const struct adreno_protect *protect = adreno_gpu->info->a6xx->protect; + unsigned i; /* * Enable access protection to privileged registers, fault on an access @@ -1343,35 +594,28 @@ static void a6xx_set_cp_protect(struct msm_gpu *gpu) A6XX_CP_PROTECT_CNTL_ACCESS_FAULT_ON_VIOL_EN | A6XX_CP_PROTECT_CNTL_LAST_SPAN_INF_RANGE); - for (i = 0; i < count - 1; i++) { + for (i = 0; i < protect->count - 1; i++) { /* Intentionally skip writing to some registers */ - if (regs[i]) - gpu_write(gpu, REG_A6XX_CP_PROTECT(i), regs[i]); + if (protect->regs[i]) + gpu_write(gpu, REG_A6XX_CP_PROTECT(i), protect->regs[i]); } /* last CP_PROTECT to have "infinite" length on the last entry */ - gpu_write(gpu, REG_A6XX_CP_PROTECT(count_max - 1), regs[i]); + gpu_write(gpu, REG_A6XX_CP_PROTECT(protect->count_max - 1), protect->regs[i]); } static void a6xx_calc_ubwc_config(struct adreno_gpu *gpu) { - /* Unknown, introduced with A650 family, related to UBWC mode/ver 4 */ gpu->ubwc_config.rgb565_predicator = 0; - /* Unknown, introduced with A650 family */ gpu->ubwc_config.uavflagprd_inv = 0; - /* Whether the minimum access length is 64 bits */ gpu->ubwc_config.min_acc_len = 0; - /* Entirely magic, per-GPU-gen value */ - gpu->ubwc_config.ubwc_mode = 0; - /* - * The Highest Bank Bit value represents the bit of the highest DDR bank. - * This should ideally use DRAM type detection. - */ + gpu->ubwc_config.ubwc_swizzle = 0x6; + gpu->ubwc_config.macrotile_mode = 0; gpu->ubwc_config.highest_bank_bit = 15; if (adreno_is_a610(gpu)) { gpu->ubwc_config.highest_bank_bit = 13; gpu->ubwc_config.min_acc_len = 1; - gpu->ubwc_config.ubwc_mode = 1; + gpu->ubwc_config.ubwc_swizzle = 0x7; } if (adreno_is_a618(gpu)) @@ -1384,9 +628,26 @@ static void a6xx_calc_ubwc_config(struct adreno_gpu *gpu) if (adreno_is_a619_holi(gpu)) gpu->ubwc_config.highest_bank_bit = 13; + if (adreno_is_a621(gpu)) { + gpu->ubwc_config.highest_bank_bit = 13; + gpu->ubwc_config.amsbc = 1; + gpu->ubwc_config.uavflagprd_inv = 2; + } + + if (adreno_is_a623(gpu)) { + gpu->ubwc_config.highest_bank_bit = 16; + gpu->ubwc_config.amsbc = 1; + gpu->ubwc_config.rgb565_predicator = 1; + gpu->ubwc_config.uavflagprd_inv = 2; + gpu->ubwc_config.macrotile_mode = 1; + } + if (adreno_is_a640_family(gpu)) gpu->ubwc_config.amsbc = 1; + if (adreno_is_a680(gpu)) + gpu->ubwc_config.macrotile_mode = 1; + if (adreno_is_a650(gpu) || adreno_is_a660(gpu) || adreno_is_a690(gpu) || @@ -1397,19 +658,28 @@ static void a6xx_calc_ubwc_config(struct adreno_gpu *gpu) gpu->ubwc_config.amsbc = 1; gpu->ubwc_config.rgb565_predicator = 1; gpu->ubwc_config.uavflagprd_inv = 2; + gpu->ubwc_config.macrotile_mode = 1; + } + + if (adreno_is_a663(gpu)) { + gpu->ubwc_config.highest_bank_bit = 13; + gpu->ubwc_config.amsbc = 1; + gpu->ubwc_config.rgb565_predicator = 1; + gpu->ubwc_config.uavflagprd_inv = 2; + gpu->ubwc_config.macrotile_mode = 1; + gpu->ubwc_config.ubwc_swizzle = 0x4; } if (adreno_is_7c3(gpu)) { gpu->ubwc_config.highest_bank_bit = 14; gpu->ubwc_config.amsbc = 1; - gpu->ubwc_config.rgb565_predicator = 1; gpu->ubwc_config.uavflagprd_inv = 2; + gpu->ubwc_config.macrotile_mode = 1; } if (adreno_is_a702(gpu)) { gpu->ubwc_config.highest_bank_bit = 14; gpu->ubwc_config.min_acc_len = 1; - gpu->ubwc_config.ubwc_mode = 2; } } @@ -1425,21 +695,26 @@ static void a6xx_set_ubwc_config(struct msm_gpu *gpu) u32 hbb = adreno_gpu->ubwc_config.highest_bank_bit - 13; u32 hbb_hi = hbb >> 2; u32 hbb_lo = hbb & 3; + u32 ubwc_mode = adreno_gpu->ubwc_config.ubwc_swizzle & 1; + u32 level2_swizzling_dis = !(adreno_gpu->ubwc_config.ubwc_swizzle & 2); gpu_write(gpu, REG_A6XX_RB_NC_MODE_CNTL, + level2_swizzling_dis << 12 | adreno_gpu->ubwc_config.rgb565_predicator << 11 | hbb_hi << 10 | adreno_gpu->ubwc_config.amsbc << 4 | adreno_gpu->ubwc_config.min_acc_len << 3 | - hbb_lo << 1 | adreno_gpu->ubwc_config.ubwc_mode); + hbb_lo << 1 | ubwc_mode); - gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL, hbb_hi << 4 | + gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL, + level2_swizzling_dis << 6 | hbb_hi << 4 | adreno_gpu->ubwc_config.min_acc_len << 3 | - hbb_lo << 1 | adreno_gpu->ubwc_config.ubwc_mode); + hbb_lo << 1 | ubwc_mode); - gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL, hbb_hi << 10 | + gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL, + level2_swizzling_dis << 12 | hbb_hi << 10 | adreno_gpu->ubwc_config.uavflagprd_inv << 4 | adreno_gpu->ubwc_config.min_acc_len << 3 | - hbb_lo << 1 | adreno_gpu->ubwc_config.ubwc_mode); + hbb_lo << 1 | ubwc_mode); if (adreno_is_a7xx(adreno_gpu)) gpu_write(gpu, REG_A7XX_GRAS_NC_MODE_CNTL, @@ -1447,6 +722,80 @@ static void a6xx_set_ubwc_config(struct msm_gpu *gpu) gpu_write(gpu, REG_A6XX_UCHE_MODE_CNTL, adreno_gpu->ubwc_config.min_acc_len << 23 | hbb_lo << 21); + + gpu_write(gpu, REG_A6XX_RBBM_NC_MODE_CNTL, + adreno_gpu->ubwc_config.macrotile_mode); +} + +static void a7xx_patch_pwrup_reglist(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + const struct adreno_reglist_list *reglist; + void *ptr = a6xx_gpu->pwrup_reglist_ptr; + struct cpu_gpu_lock *lock = ptr; + u32 *dest = (u32 *)&lock->regs[0]; + int i; + + reglist = adreno_gpu->info->a6xx->pwrup_reglist; + + lock->gpu_req = lock->cpu_req = lock->turn = 0; + lock->ifpc_list_len = 0; + lock->preemption_list_len = reglist->count; + + /* + * For each entry in each of the lists, write the offset and the current + * register value into the GPU buffer + */ + for (i = 0; i < reglist->count; i++) { + *dest++ = reglist->regs[i]; + *dest++ = gpu_read(gpu, reglist->regs[i]); + } + + /* + * The overall register list is composed of + * 1. Static IFPC-only registers + * 2. Static IFPC + preemption registers + * 3. Dynamic IFPC + preemption registers (ex: perfcounter selects) + * + * The first two lists are static. Size of these lists are stored as + * number of pairs in ifpc_list_len and preemption_list_len + * respectively. With concurrent binning, Some of the perfcounter + * registers being virtualized, CP needs to know the pipe id to program + * the aperture inorder to restore the same. Thus, third list is a + * dynamic list with triplets as + * (<aperture, shifted 12 bits> <address> <data>), and the length is + * stored as number for triplets in dynamic_list_len. + */ + lock->dynamic_list_len = 0; +} + +static int a7xx_preempt_start(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + struct msm_ringbuffer *ring = gpu->rb[0]; + + if (gpu->nr_rings <= 1) + return 0; + + /* Turn CP protection off */ + OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); + OUT_RING(ring, 0); + + a6xx_emit_set_pseudo_reg(ring, a6xx_gpu, NULL); + + /* Yield the floor on command completion */ + OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4); + OUT_RING(ring, 0x00); + OUT_RING(ring, 0x00); + OUT_RING(ring, 0x00); + /* Generate interrupt on preemption completion */ + OUT_RING(ring, 0x00); + + a6xx_flush(gpu, ring); + + return a6xx_idle(gpu, ring) ? 0 : -EINVAL; } static int a6xx_cp_init(struct msm_gpu *gpu) @@ -1480,6 +829,8 @@ static int a6xx_cp_init(struct msm_gpu *gpu) static int a7xx_cp_init(struct msm_gpu *gpu) { + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); struct msm_ringbuffer *ring = gpu->rb[0]; u32 mask; @@ -1517,11 +868,11 @@ static int a7xx_cp_init(struct msm_gpu *gpu) /* *Don't* send a power up reg list for concurrent binning (TODO) */ /* Lo address */ - OUT_RING(ring, 0x00000000); + OUT_RING(ring, lower_32_bits(a6xx_gpu->pwrup_reglist_iova)); /* Hi address */ - OUT_RING(ring, 0x00000000); + OUT_RING(ring, upper_32_bits(a6xx_gpu->pwrup_reglist_iova)); /* BIT(31) set => read the regs from the list */ - OUT_RING(ring, 0x00000000); + OUT_RING(ring, BIT(31)); a6xx_flush(gpu, ring); return a6xx_idle(gpu, ring) ? 0 : -EINVAL; @@ -1645,6 +996,16 @@ static int a6xx_ucode_load(struct msm_gpu *gpu) msm_gem_object_set_name(a6xx_gpu->shadow_bo, "shadow"); } + a6xx_gpu->pwrup_reglist_ptr = msm_gem_kernel_new(gpu->dev, PAGE_SIZE, + MSM_BO_WC | MSM_BO_MAP_PRIV, + gpu->aspace, &a6xx_gpu->pwrup_reglist_bo, + &a6xx_gpu->pwrup_reglist_iova); + + if (IS_ERR(a6xx_gpu->pwrup_reglist_ptr)) + return PTR_ERR(a6xx_gpu->pwrup_reglist_ptr); + + msm_gem_object_set_name(a6xx_gpu->pwrup_reglist_bo, "pwrup_reglist"); + return 0; } @@ -1686,7 +1047,8 @@ static int a6xx_zap_shader_init(struct msm_gpu *gpu) A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \ A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \ A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR | \ - A6XX_RBBM_INT_0_MASK_TSBWRITEERROR) + A6XX_RBBM_INT_0_MASK_TSBWRITEERROR | \ + A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) #define A7XX_APRIV_MASK (A6XX_CP_APRIV_CNTL_ICACHE | \ A6XX_CP_APRIV_CNTL_RBFETCH | \ @@ -1703,6 +1065,7 @@ static int hw_init(struct msm_gpu *gpu) struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); struct a6xx_gmu *gmu = &a6xx_gpu->gmu; u64 gmem_range_min; + unsigned int i; int ret; if (!adreno_has_gmu_wrapper(adreno_gpu)) { @@ -1715,20 +1078,18 @@ static int hw_init(struct msm_gpu *gpu) /* Clear GBIF halt in case GX domain was not collapsed */ if (adreno_is_a619_holi(adreno_gpu)) { gpu_write(gpu, REG_A6XX_GBIF_HALT, 0); + gpu_read(gpu, REG_A6XX_GBIF_HALT); + gpu_write(gpu, REG_A6XX_RBBM_GPR0_CNTL, 0); - /* Let's make extra sure that the GPU can access the memory.. */ - mb(); + gpu_read(gpu, REG_A6XX_RBBM_GPR0_CNTL); } else if (a6xx_has_gbif(adreno_gpu)) { gpu_write(gpu, REG_A6XX_GBIF_HALT, 0); + gpu_read(gpu, REG_A6XX_GBIF_HALT); + gpu_write(gpu, REG_A6XX_RBBM_GBIF_HALT, 0); - /* Let's make extra sure that the GPU can access the memory.. */ - mb(); + gpu_read(gpu, REG_A6XX_RBBM_GBIF_HALT); } - /* Some GPUs are stubborn and take their sweet time to unhalt GBIF! */ - if (adreno_is_a7xx(adreno_gpu) && a6xx_has_gbif(adreno_gpu)) - spin_until(!gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK)); - gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_CNTL, 0); if (adreno_is_a619_holi(adreno_gpu)) @@ -1787,12 +1148,12 @@ static int hw_init(struct msm_gpu *gpu) /* Disable L2 bypass in the UCHE */ if (adreno_is_a7xx(adreno_gpu)) { - gpu_write64(gpu, REG_A6XX_UCHE_TRAP_BASE, 0x0001fffffffff000llu); - gpu_write64(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE, 0x0001fffffffff000llu); + gpu_write64(gpu, REG_A6XX_UCHE_TRAP_BASE, adreno_gpu->uche_trap_base); + gpu_write64(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base); } else { - gpu_write64(gpu, REG_A6XX_UCHE_WRITE_RANGE_MAX, 0x0001ffffffffffc0llu); - gpu_write64(gpu, REG_A6XX_UCHE_TRAP_BASE, 0x0001fffffffff000llu); - gpu_write64(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE, 0x0001fffffffff000llu); + gpu_write64(gpu, REG_A6XX_UCHE_WRITE_RANGE_MAX, adreno_gpu->uche_trap_base + 0xfc0); + gpu_write64(gpu, REG_A6XX_UCHE_TRAP_BASE, adreno_gpu->uche_trap_base); + gpu_write64(gpu, REG_A6XX_UCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base); } if (!(adreno_is_a650_family(adreno_gpu) || @@ -1838,25 +1199,11 @@ static int hw_init(struct msm_gpu *gpu) } else if (!adreno_is_a7xx(adreno_gpu)) gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 128); - /* Setting the primFifo thresholds default values, - * and vccCacheSkipDis=1 bit (0x200) for A640 and newer - */ - if (adreno_is_a702(adreno_gpu)) - gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x0000c000); - else if (adreno_is_a690(adreno_gpu)) - gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00800200); - else if (adreno_is_a650(adreno_gpu) || adreno_is_a660(adreno_gpu)) - gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00300200); - else if (adreno_is_a640_family(adreno_gpu) || adreno_is_7c3(adreno_gpu)) - gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00200200); - else if (adreno_is_a650(adreno_gpu) || adreno_is_a660(adreno_gpu)) - gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00300200); - else if (adreno_is_a619(adreno_gpu)) - gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00018000); - else if (adreno_is_a610(adreno_gpu)) - gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00080000); - else if (!adreno_is_a7xx(adreno_gpu)) - gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, 0x00180000); + + /* Set the default primFifo threshold values */ + if (adreno_gpu->info->a6xx->prim_fifo_threshold) + gpu_write(gpu, REG_A6XX_PC_DBG_ECO_CNTL, + adreno_gpu->info->a6xx->prim_fifo_threshold); /* Set the AHB default slave response to "ERROR" */ gpu_write(gpu, REG_A6XX_CP_AHB_CNTL, 0x1); @@ -1891,7 +1238,7 @@ static int hw_init(struct msm_gpu *gpu) gpu_write(gpu, REG_A6XX_UCHE_CLIENT_PF, BIT(7) | 0x1); /* Set weights for bicubic filtering */ - if (adreno_is_a650_family(adreno_gpu)) { + if (adreno_is_a650_family(adreno_gpu) || adreno_is_x185(adreno_gpu)) { gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_0, 0); gpu_write(gpu, REG_A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_1, 0x3fe05ff4); @@ -1927,7 +1274,7 @@ static int hw_init(struct msm_gpu *gpu) if (adreno_is_a690(adreno_gpu)) gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG, 0x90); /* Set dualQ + disable afull for A660 GPU */ - else if (adreno_is_a660(adreno_gpu)) + else if (adreno_is_a660(adreno_gpu) || adreno_is_a663(adreno_gpu)) gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG, 0x66906); else if (adreno_is_a7xx(adreno_gpu)) gpu_write(gpu, REG_A6XX_UCHE_CMDQ_CONFIG, @@ -1951,6 +1298,17 @@ static int hw_init(struct msm_gpu *gpu) BIT(6) | BIT(5) | BIT(3) | BIT(2) | BIT(1)); } + if (adreno_is_a750(adreno_gpu)) { + /* Disable ubwc merged UFC request feature */ + gpu_rmw(gpu, REG_A6XX_RB_CMP_DBG_ECO_CNTL, BIT(19), BIT(19)); + + /* Enable TP flaghint and other performance settings */ + gpu_write(gpu, REG_A6XX_TPL1_DBG_ECO_CNTL1, 0xc0700); + } else if (adreno_is_a7xx(adreno_gpu)) { + /* Disable non-ubwc read reqs from passing write reqs */ + gpu_rmw(gpu, REG_A6XX_RB_CMP_DBG_ECO_CNTL, BIT(11), BIT(11)); + } + /* Enable interrupts */ gpu_write(gpu, REG_A6XX_RBBM_INT_0_MASK, adreno_is_a7xx(adreno_gpu) ? A7XX_INT_MASK : A6XX_INT_MASK); @@ -1978,22 +1336,32 @@ static int hw_init(struct msm_gpu *gpu) if (a6xx_gpu->shadow_bo) { gpu_write64(gpu, REG_A6XX_CP_RB_RPTR_ADDR, shadowptr(a6xx_gpu, gpu->rb[0])); + for (unsigned int i = 0; i < gpu->nr_rings; i++) + a6xx_gpu->shadow[i] = 0; } /* ..which means "always" on A7xx, also for BV shadow */ if (adreno_is_a7xx(adreno_gpu)) { gpu_write64(gpu, REG_A7XX_CP_BV_RB_RPTR_ADDR, - rbmemptr(gpu->rb[0], bv_fence)); + rbmemptr(gpu->rb[0], bv_rptr)); } + a6xx_preempt_hw_init(gpu); + /* Always come up on rb 0 */ a6xx_gpu->cur_ring = gpu->rb[0]; - gpu->cur_ctx_seqno = 0; + for (i = 0; i < gpu->nr_rings; i++) + gpu->rb[i]->cur_ctx_seqno = 0; /* Enable the SQE_to start the CP engine */ gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 1); + if (adreno_is_a7xx(adreno_gpu) && !a6xx_gpu->pwrup_reglist_emitted) { + a7xx_patch_pwrup_reglist(gpu); + a6xx_gpu->pwrup_reglist_emitted = true; + } + ret = adreno_is_a7xx(adreno_gpu) ? a7xx_cp_init(gpu) : a6xx_cp_init(gpu); if (ret) goto out; @@ -2031,6 +1399,10 @@ static int hw_init(struct msm_gpu *gpu) out: if (adreno_has_gmu_wrapper(adreno_gpu)) return ret; + + /* Last step - yield the ringbuffer */ + a7xx_preempt_start(gpu); + /* * Tell the GMU that we are done touching the GPU and it can start power * management @@ -2351,11 +1723,32 @@ static void a6xx_fault_detect_irq(struct msm_gpu *gpu) gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE)); /* Turn off the hangcheck timer to keep it from bothering us */ - del_timer(&gpu->hangcheck_timer); + timer_delete(&gpu->hangcheck_timer); kthread_queue_work(gpu->worker, &gpu->recover_work); } +static void a7xx_sw_fuse_violation_irq(struct msm_gpu *gpu) +{ + u32 status; + + status = gpu_read(gpu, REG_A7XX_RBBM_SW_FUSE_INT_STATUS); + gpu_write(gpu, REG_A7XX_RBBM_SW_FUSE_INT_MASK, 0); + + dev_err_ratelimited(&gpu->pdev->dev, "SW fuse violation status=%8.8x\n", status); + + /* + * Ignore FASTBLEND violations, because the HW will silently fall back + * to legacy blending. + */ + if (status & (A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING | + A7XX_CX_MISC_SW_FUSE_VALUE_LPAC)) { + timer_delete(&gpu->hangcheck_timer); + + kthread_queue_work(gpu->worker, &gpu->recover_work); + } +} + static irqreturn_t a6xx_irq(struct msm_gpu *gpu) { struct msm_drm_private *priv = gpu->dev->dev_private; @@ -2384,8 +1777,16 @@ static irqreturn_t a6xx_irq(struct msm_gpu *gpu) if (status & A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS) dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Out of bounds access\n"); - if (status & A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) + if (status & A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) + a7xx_sw_fuse_violation_irq(gpu); + + if (status & A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) { msm_gpu_retire(gpu); + a6xx_preempt_trigger(gpu); + } + + if (status & A6XX_RBBM_INT_0_MASK_CP_SW) + a6xx_preempt_irq(gpu); return IRQ_HANDLED; } @@ -2525,6 +1926,56 @@ static void a6xx_llc_slices_init(struct platform_device *pdev, a6xx_gpu->llc_mmio = ERR_PTR(-EINVAL); } +static int a7xx_cx_mem_init(struct a6xx_gpu *a6xx_gpu) +{ + struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; + struct msm_gpu *gpu = &adreno_gpu->base; + u32 fuse_val; + int ret; + + if (adreno_is_a750(adreno_gpu)) { + /* + * Assume that if qcom scm isn't available, that whatever + * replacement allows writing the fuse register ourselves. + * Users of alternative firmware need to make sure this + * register is writeable or indicate that it's not somehow. + * Print a warning because if you mess this up you're about to + * crash horribly. + */ + if (!qcom_scm_is_available()) { + dev_warn_once(gpu->dev->dev, + "SCM is not available, poking fuse register\n"); + a6xx_llc_write(a6xx_gpu, REG_A7XX_CX_MISC_SW_FUSE_VALUE, + A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING | + A7XX_CX_MISC_SW_FUSE_VALUE_FASTBLEND | + A7XX_CX_MISC_SW_FUSE_VALUE_LPAC); + adreno_gpu->has_ray_tracing = true; + return 0; + } + + ret = qcom_scm_gpu_init_regs(QCOM_SCM_GPU_ALWAYS_EN_REQ | + QCOM_SCM_GPU_TSENSE_EN_REQ); + if (ret) + return ret; + + /* + * On a750 raytracing may be disabled by the firmware, find out + * whether that's the case. The scm call above sets the fuse + * register. + */ + fuse_val = a6xx_llc_read(a6xx_gpu, + REG_A7XX_CX_MISC_SW_FUSE_VALUE); + adreno_gpu->has_ray_tracing = + !!(fuse_val & A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING); + } else if (adreno_is_a740(adreno_gpu)) { + /* Raytracing is always enabled on a740 */ + adreno_gpu->has_ray_tracing = true; + } + + return 0; +} + + #define GBIF_CLIENT_HALT_MASK BIT(0) #define GBIF_ARB_HALT_MASK BIT(1) #define VBIF_XIN_HALT_CTRL0_MASK GENMASK(3, 0) @@ -2834,7 +2285,7 @@ a6xx_create_private_address_space(struct msm_gpu *gpu) return ERR_CAST(mmu); return msm_gem_address_space_create(mmu, - "gpu", 0x100000000ULL, + "gpu", ADRENO_VM_START, adreno_private_address_space_size(gpu)); } @@ -3029,6 +2480,7 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) struct a6xx_gpu *a6xx_gpu; struct adreno_gpu *adreno_gpu; struct msm_gpu *gpu; + extern int enable_preemption; bool is_a7xx; int ret; @@ -3062,11 +2514,15 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) ret = a6xx_set_supported_hw(&pdev->dev, config->info); if (ret) { - a6xx_destroy(&(a6xx_gpu->base.base)); + a6xx_llc_slices_destroy(a6xx_gpu); + kfree(a6xx_gpu); return ERR_PTR(ret); } - if (is_a7xx) + if ((enable_preemption == 1) || (enable_preemption == -1 && + (config->info->quirks & ADRENO_QUIRK_PREEMPTION))) + ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs_a7xx, 4); + else if (is_a7xx) ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs_a7xx, 1); else if (adreno_has_gmu_wrapper(adreno_gpu)) ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs_gmuwrapper, 1); @@ -3094,11 +2550,23 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) return ERR_PTR(ret); } + if (adreno_is_a7xx(adreno_gpu)) { + ret = a7xx_cx_mem_init(a6xx_gpu); + if (ret) { + a6xx_destroy(&(a6xx_gpu->base.base)); + return ERR_PTR(ret); + } + } + + adreno_gpu->uche_trap_base = 0x1fffffffff000ull; + if (gpu->aspace) msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a6xx_fault_handler); a6xx_calc_ubwc_config(adreno_gpu); + /* Set up the preemption specific bits and pieces for each ringbuffer */ + a6xx_preempt_init(gpu); return gpu; } |