summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/msm/adreno
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/msm/adreno')
-rw-r--r--drivers/gpu/drm/msm/adreno/a3xx_gpu.c10
-rw-r--r--drivers/gpu/drm/msm/adreno/a4xx_gpu.c10
-rw-r--r--drivers/gpu/drm/msm/adreno/a5xx_gpu.c261
-rw-r--r--drivers/gpu/drm/msm/adreno/a5xx_gpu.h109
-rw-r--r--drivers/gpu/drm/msm/adreno/a5xx_power.c9
-rw-r--r--drivers/gpu/drm/msm/adreno/a5xx_preempt.c305
-rw-r--r--drivers/gpu/drm/msm/adreno/adreno_device.c78
-rw-r--r--drivers/gpu/drm/msm/adreno/adreno_gpu.c297
-rw-r--r--drivers/gpu/drm/msm/adreno/adreno_gpu.h66
9 files changed, 922 insertions, 223 deletions
diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
index 7791313405b5..4baef2738178 100644
--- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
@@ -44,7 +44,7 @@ static bool a3xx_idle(struct msm_gpu *gpu);
static bool a3xx_me_init(struct msm_gpu *gpu)
{
- struct msm_ringbuffer *ring = gpu->rb;
+ struct msm_ringbuffer *ring = gpu->rb[0];
OUT_PKT3(ring, CP_ME_INIT, 17);
OUT_RING(ring, 0x000003f7);
@@ -65,7 +65,7 @@ static bool a3xx_me_init(struct msm_gpu *gpu)
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
- gpu->funcs->flush(gpu);
+ gpu->funcs->flush(gpu, ring);
return a3xx_idle(gpu);
}
@@ -339,7 +339,7 @@ static void a3xx_destroy(struct msm_gpu *gpu)
static bool a3xx_idle(struct msm_gpu *gpu)
{
/* wait for ringbuffer to drain: */
- if (!adreno_idle(gpu))
+ if (!adreno_idle(gpu, gpu->rb[0]))
return false;
/* then wait for GPU to finish: */
@@ -444,9 +444,9 @@ static const struct adreno_gpu_funcs funcs = {
.pm_suspend = msm_gpu_pm_suspend,
.pm_resume = msm_gpu_pm_resume,
.recover = a3xx_recover,
- .last_fence = adreno_last_fence,
.submit = adreno_submit,
.flush = adreno_flush,
+ .active_ring = adreno_active_ring,
.irq = a3xx_irq,
.destroy = a3xx_destroy,
#ifdef CONFIG_DEBUG_FS
@@ -492,7 +492,7 @@ struct msm_gpu *a3xx_gpu_init(struct drm_device *dev)
adreno_gpu->registers = a3xx_registers;
adreno_gpu->reg_offsets = a3xx_register_offsets;
- ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs);
+ ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
if (ret)
goto fail;
diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
index 58341ef6f15b..8199a4b9f2fa 100644
--- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
@@ -116,7 +116,7 @@ static void a4xx_enable_hwcg(struct msm_gpu *gpu)
static bool a4xx_me_init(struct msm_gpu *gpu)
{
- struct msm_ringbuffer *ring = gpu->rb;
+ struct msm_ringbuffer *ring = gpu->rb[0];
OUT_PKT3(ring, CP_ME_INIT, 17);
OUT_RING(ring, 0x000003f7);
@@ -137,7 +137,7 @@ static bool a4xx_me_init(struct msm_gpu *gpu)
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
- gpu->funcs->flush(gpu);
+ gpu->funcs->flush(gpu, ring);
return a4xx_idle(gpu);
}
@@ -337,7 +337,7 @@ static void a4xx_destroy(struct msm_gpu *gpu)
static bool a4xx_idle(struct msm_gpu *gpu)
{
/* wait for ringbuffer to drain: */
- if (!adreno_idle(gpu))
+ if (!adreno_idle(gpu, gpu->rb[0]))
return false;
/* then wait for GPU to finish: */
@@ -532,9 +532,9 @@ static const struct adreno_gpu_funcs funcs = {
.pm_suspend = a4xx_pm_suspend,
.pm_resume = a4xx_pm_resume,
.recover = a4xx_recover,
- .last_fence = adreno_last_fence,
.submit = adreno_submit,
.flush = adreno_flush,
+ .active_ring = adreno_active_ring,
.irq = a4xx_irq,
.destroy = a4xx_destroy,
#ifdef CONFIG_DEBUG_FS
@@ -574,7 +574,7 @@ struct msm_gpu *a4xx_gpu_init(struct drm_device *dev)
adreno_gpu->registers = a4xx_registers;
adreno_gpu->reg_offsets = a4xx_register_offsets;
- ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs);
+ ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
if (ret)
goto fail;
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index 17c59d839e6f..a1f4eeeb73e2 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -26,8 +26,9 @@ static void a5xx_dump(struct msm_gpu *gpu);
#define GPU_PAS_ID 13
-static int zap_shader_load_mdt(struct device *dev, const char *fwname)
+static int zap_shader_load_mdt(struct msm_gpu *gpu, const char *fwname)
{
+ struct device *dev = &gpu->pdev->dev;
const struct firmware *fw;
struct device_node *np;
struct resource r;
@@ -55,10 +56,10 @@ static int zap_shader_load_mdt(struct device *dev, const char *fwname)
mem_size = resource_size(&r);
/* Request the MDT file for the firmware */
- ret = request_firmware(&fw, fwname, dev);
- if (ret) {
+ fw = adreno_request_fw(to_adreno_gpu(gpu), fwname);
+ if (IS_ERR(fw)) {
DRM_DEV_ERROR(dev, "Unable to load %s\n", fwname);
- return ret;
+ return PTR_ERR(fw);
}
/* Figure out how much memory we need */
@@ -75,9 +76,26 @@ static int zap_shader_load_mdt(struct device *dev, const char *fwname)
goto out;
}
- /* Load the rest of the MDT */
- ret = qcom_mdt_load(dev, fw, fwname, GPU_PAS_ID, mem_region, mem_phys,
- mem_size);
+ /*
+ * Load the rest of the MDT
+ *
+ * Note that we could be dealing with two different paths, since
+ * with upstream linux-firmware it would be in a qcom/ subdir..
+ * adreno_request_fw() handles this, but qcom_mdt_load() does
+ * not. But since we've already gotten thru adreno_request_fw()
+ * we know which of the two cases it is:
+ */
+ if (to_adreno_gpu(gpu)->fwloc == FW_LOCATION_LEGACY) {
+ ret = qcom_mdt_load(dev, fw, fwname, GPU_PAS_ID,
+ mem_region, mem_phys, mem_size);
+ } else {
+ char newname[strlen("qcom/") + strlen(fwname) + 1];
+
+ sprintf(newname, "qcom/%s", fwname);
+
+ ret = qcom_mdt_load(dev, fw, newname, GPU_PAS_ID,
+ mem_region, mem_phys, mem_size);
+ }
if (ret)
goto out;
@@ -95,14 +113,65 @@ out:
return ret;
}
+static void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
+{
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+ struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
+ uint32_t wptr;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ring->lock, flags);
+
+ /* Copy the shadow to the actual register */
+ ring->cur = ring->next;
+
+ /* Make sure to wrap wptr if we need to */
+ wptr = get_wptr(ring);
+
+ spin_unlock_irqrestore(&ring->lock, flags);
+
+ /* Make sure everything is posted before making a decision */
+ mb();
+
+ /* Update HW if this is the current ring and we are not in preempt */
+ if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu))
+ gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
+}
+
static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
struct msm_file_private *ctx)
{
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+ struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
struct msm_drm_private *priv = gpu->dev->dev_private;
- struct msm_ringbuffer *ring = gpu->rb;
+ struct msm_ringbuffer *ring = submit->ring;
unsigned int i, ibs = 0;
+ OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
+ OUT_RING(ring, 0x02);
+
+ /* Turn off protected mode to write to special registers */
+ OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
+ OUT_RING(ring, 0);
+
+ /* Set the save preemption record for the ring/command */
+ OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
+ OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
+ OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
+
+ /* Turn back on protected mode */
+ OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
+ OUT_RING(ring, 1);
+
+ /* Enable local preemption for finegrain preemption */
+ OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
+ OUT_RING(ring, 0x02);
+
+ /* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */
+ OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
+ OUT_RING(ring, 0x02);
+
+ /* Submit the commands */
for (i = 0; i < submit->nr_cmds; i++) {
switch (submit->cmd[i].type) {
case MSM_SUBMIT_CMD_IB_TARGET_BUF:
@@ -120,16 +189,54 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
}
}
+ /*
+ * Write the render mode to NULL (0) to indicate to the CP that the IBs
+ * are done rendering - otherwise a lucky preemption would start
+ * replaying from the last checkpoint
+ */
+ OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
+ OUT_RING(ring, 0);
+ OUT_RING(ring, 0);
+ OUT_RING(ring, 0);
+ OUT_RING(ring, 0);
+ OUT_RING(ring, 0);
+
+ /* Turn off IB level preemptions */
+ OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
+ OUT_RING(ring, 0x01);
+
+ /* Write the fence to the scratch register */
OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1);
- OUT_RING(ring, submit->fence->seqno);
+ OUT_RING(ring, submit->seqno);
+ /*
+ * Execute a CACHE_FLUSH_TS event. This will ensure that the
+ * timestamp is written to the memory and then triggers the interrupt
+ */
OUT_PKT7(ring, CP_EVENT_WRITE, 4);
OUT_RING(ring, CACHE_FLUSH_TS | (1 << 31));
- OUT_RING(ring, lower_32_bits(rbmemptr(adreno_gpu, fence)));
- OUT_RING(ring, upper_32_bits(rbmemptr(adreno_gpu, fence)));
- OUT_RING(ring, submit->fence->seqno);
+ OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
+ OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
+ OUT_RING(ring, submit->seqno);
- gpu->funcs->flush(gpu);
+ /* Yield the floor on command completion */
+ OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
+ /*
+ * If dword[2:1] are non zero, they specify an address for the CP to
+ * write the value of dword[3] to on preemption complete. Write 0 to
+ * skip the write
+ */
+ OUT_RING(ring, 0x00);
+ OUT_RING(ring, 0x00);
+ /* Data value - not used if the address above is 0 */
+ OUT_RING(ring, 0x01);
+ /* Set bit 0 to trigger an interrupt on preempt complete */
+ OUT_RING(ring, 0x01);
+
+ a5xx_flush(gpu, ring);
+
+ /* Check to see if we need to start preemption */
+ a5xx_preempt_trigger(gpu);
}
static const struct {
@@ -245,7 +352,7 @@ void a5xx_set_hwcg(struct msm_gpu *gpu, bool state)
static int a5xx_me_init(struct msm_gpu *gpu)
{
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
- struct msm_ringbuffer *ring = gpu->rb;
+ struct msm_ringbuffer *ring = gpu->rb[0];
OUT_PKT7(ring, CP_ME_INIT, 8);
@@ -276,11 +383,54 @@ static int a5xx_me_init(struct msm_gpu *gpu)
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
- gpu->funcs->flush(gpu);
+ gpu->funcs->flush(gpu, ring);
+ return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
+}
+
+static int a5xx_preempt_start(struct msm_gpu *gpu)
+{
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+ struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
+ struct msm_ringbuffer *ring = gpu->rb[0];
+
+ if (gpu->nr_rings == 1)
+ return 0;
+
+ /* Turn off protected mode to write to special registers */
+ OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
+ OUT_RING(ring, 0);
+
+ /* Set the save preemption record for the ring/command */
+ OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
+ OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id]));
+ OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id]));
+
+ /* Turn back on protected mode */
+ OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
+ OUT_RING(ring, 1);
+
+ OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
+ OUT_RING(ring, 0x00);
+
+ OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
+ OUT_RING(ring, 0x01);
- return a5xx_idle(gpu) ? 0 : -EINVAL;
+ OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
+ OUT_RING(ring, 0x01);
+
+ /* Yield the floor on command completion */
+ OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
+ OUT_RING(ring, 0x00);
+ OUT_RING(ring, 0x00);
+ OUT_RING(ring, 0x01);
+ OUT_RING(ring, 0x01);
+
+ gpu->funcs->flush(gpu, ring);
+
+ return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
}
+
static struct drm_gem_object *a5xx_ucode_load_bo(struct msm_gpu *gpu,
const struct firmware *fw, u64 *iova)
{
@@ -381,7 +531,7 @@ static int a5xx_zap_shader_init(struct msm_gpu *gpu)
return -ENODEV;
}
- ret = zap_shader_load_mdt(&pdev->dev, adreno_gpu->info->zapfw);
+ ret = zap_shader_load_mdt(gpu, adreno_gpu->info->zapfw);
loaded = !ret;
@@ -396,6 +546,7 @@ static int a5xx_zap_shader_init(struct msm_gpu *gpu)
A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \
A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \
+ A5XX_RBBM_INT_0_MASK_CP_SW | \
A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
@@ -536,13 +687,14 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
- /* Load the GPMU firmware before starting the HW init */
- a5xx_gpmu_ucode_init(gpu);
-
ret = adreno_hw_init(gpu);
if (ret)
return ret;
+ a5xx_preempt_hw_init(gpu);
+
+ a5xx_gpmu_ucode_init(gpu);
+
ret = a5xx_ucode_init(gpu);
if (ret)
return ret;
@@ -565,11 +717,11 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
* ticking correctly
*/
if (adreno_is_a530(adreno_gpu)) {
- OUT_PKT7(gpu->rb, CP_EVENT_WRITE, 1);
- OUT_RING(gpu->rb, 0x0F);
+ OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1);
+ OUT_RING(gpu->rb[0], 0x0F);
- gpu->funcs->flush(gpu);
- if (!a5xx_idle(gpu))
+ gpu->funcs->flush(gpu, gpu->rb[0]);
+ if (!a5xx_idle(gpu, gpu->rb[0]))
return -EINVAL;
}
@@ -582,11 +734,11 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
*/
ret = a5xx_zap_shader_init(gpu);
if (!ret) {
- OUT_PKT7(gpu->rb, CP_SET_SECURE_MODE, 1);
- OUT_RING(gpu->rb, 0x00000000);
+ OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
+ OUT_RING(gpu->rb[0], 0x00000000);
- gpu->funcs->flush(gpu);
- if (!a5xx_idle(gpu))
+ gpu->funcs->flush(gpu, gpu->rb[0]);
+ if (!a5xx_idle(gpu, gpu->rb[0]))
return -EINVAL;
} else {
/* Print a warning so if we die, we know why */
@@ -595,6 +747,9 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0);
}
+ /* Last step - yield the ringbuffer */
+ a5xx_preempt_start(gpu);
+
return 0;
}
@@ -625,6 +780,8 @@ static void a5xx_destroy(struct msm_gpu *gpu)
DBG("%s", gpu->name);
+ a5xx_preempt_fini(gpu);
+
if (a5xx_gpu->pm4_bo) {
if (a5xx_gpu->pm4_iova)
msm_gem_put_iova(a5xx_gpu->pm4_bo, gpu->aspace);
@@ -660,18 +817,27 @@ static inline bool _a5xx_check_idle(struct msm_gpu *gpu)
A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT);
}
-bool a5xx_idle(struct msm_gpu *gpu)
+bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
{
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+ struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
+
+ if (ring != a5xx_gpu->cur_ring) {
+ WARN(1, "Tried to idle a non-current ringbuffer\n");
+ return false;
+ }
+
/* wait for CP to drain ringbuffer: */
- if (!adreno_idle(gpu))
+ if (!adreno_idle(gpu, ring))
return false;
if (spin_until(_a5xx_check_idle(gpu))) {
- DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X\n",
+ DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
gpu->name, __builtin_return_address(0),
gpu_read(gpu, REG_A5XX_RBBM_STATUS),
- gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS));
-
+ gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS),
+ gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
+ gpu_read(gpu, REG_A5XX_CP_RB_WPTR));
return false;
}
@@ -802,9 +968,10 @@ static void a5xx_fault_detect_irq(struct msm_gpu *gpu)
{
struct drm_device *dev = gpu->dev;
struct msm_drm_private *priv = dev->dev_private;
+ struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
- dev_err(dev->dev, "gpu fault fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
- gpu->funcs->last_fence(gpu),
+ dev_err(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
+ ring ? ring->id : -1, ring ? ring->seqno : 0,
gpu_read(gpu, REG_A5XX_RBBM_STATUS),
gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
gpu_read(gpu, REG_A5XX_CP_RB_WPTR),
@@ -854,8 +1021,13 @@ static irqreturn_t a5xx_irq(struct msm_gpu *gpu)
if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
a5xx_gpmu_err_irq(gpu);
- if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS)
+ if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
+ a5xx_preempt_trigger(gpu);
msm_gpu_retire(gpu);
+ }
+
+ if (status & A5XX_RBBM_INT_0_MASK_CP_SW)
+ a5xx_preempt_irq(gpu);
return IRQ_HANDLED;
}
@@ -985,6 +1157,14 @@ static void a5xx_show(struct msm_gpu *gpu, struct seq_file *m)
}
#endif
+static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu)
+{
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+ struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
+
+ return a5xx_gpu->cur_ring;
+}
+
static const struct adreno_gpu_funcs funcs = {
.base = {
.get_param = adreno_get_param,
@@ -992,9 +1172,9 @@ static const struct adreno_gpu_funcs funcs = {
.pm_suspend = a5xx_pm_suspend,
.pm_resume = a5xx_pm_resume,
.recover = a5xx_recover,
- .last_fence = adreno_last_fence,
.submit = a5xx_submit,
- .flush = adreno_flush,
+ .flush = a5xx_flush,
+ .active_ring = a5xx_active_ring,
.irq = a5xx_irq,
.destroy = a5xx_destroy,
#ifdef CONFIG_DEBUG_FS
@@ -1030,7 +1210,7 @@ struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
a5xx_gpu->lm_leakage = 0x4E001A;
- ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs);
+ ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 4);
if (ret) {
a5xx_destroy(&(a5xx_gpu->base.base));
return ERR_PTR(ret);
@@ -1039,5 +1219,8 @@ struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
if (gpu->aspace)
msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler);
+ /* Set up the preemption specific bits and pieces for each ringbuffer */
+ a5xx_preempt_init(gpu);
+
return gpu;
}
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h
index e94451685bf8..6fb8c2f9b9e4 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2016 The Linux Foundation. All rights reserved.
+/* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -35,10 +35,100 @@ struct a5xx_gpu {
uint32_t gpmu_dwords;
uint32_t lm_leakage;
+
+ struct msm_ringbuffer *cur_ring;
+ struct msm_ringbuffer *next_ring;
+
+ struct drm_gem_object *preempt_bo[MSM_GPU_MAX_RINGS];
+ struct a5xx_preempt_record *preempt[MSM_GPU_MAX_RINGS];
+ uint64_t preempt_iova[MSM_GPU_MAX_RINGS];
+
+ atomic_t preempt_state;
+ struct timer_list preempt_timer;
};
#define to_a5xx_gpu(x) container_of(x, struct a5xx_gpu, base)
+/*
+ * In order to do lockless preemption we use a simple state machine to progress
+ * through the process.
+ *
+ * PREEMPT_NONE - no preemption in progress. Next state START.
+ * PREEMPT_START - The trigger is evaulating if preemption is possible. Next
+ * states: TRIGGERED, NONE
+ * PREEMPT_ABORT - An intermediate state before moving back to NONE. Next
+ * state: NONE.
+ * PREEMPT_TRIGGERED: A preemption has been executed on the hardware. Next
+ * states: FAULTED, PENDING
+ * PREEMPT_FAULTED: A preemption timed out (never completed). This will trigger
+ * recovery. Next state: N/A
+ * PREEMPT_PENDING: Preemption complete interrupt fired - the callback is
+ * checking the success of the operation. Next state: FAULTED, NONE.
+ */
+
+enum preempt_state {
+ PREEMPT_NONE = 0,
+ PREEMPT_START,
+ PREEMPT_ABORT,
+ PREEMPT_TRIGGERED,
+ PREEMPT_FAULTED,
+ PREEMPT_PENDING,
+};
+
+/*
+ * struct a5xx_preempt_record is a shared buffer between the microcode and the
+ * CPU to store the state for preemption. The record itself is much larger
+ * (64k) but most of that is used by the CP for storage.
+ *
+ * There is a preemption record assigned per ringbuffer. When the CPU triggers a
+ * preemption, it fills out the record with the useful information (wptr, ring
+ * base, etc) and the microcode uses that information to set up the CP following
+ * the preemption. When a ring is switched out, the CP will save the ringbuffer
+ * state back to the record. In this way, once the records are properly set up
+ * the CPU can quickly switch back and forth between ringbuffers by only
+ * updating a few registers (often only the wptr).
+ *
+ * These are the CPU aware registers in the record:
+ * @magic: Must always be 0x27C4BAFC
+ * @info: Type of the record - written 0 by the CPU, updated by the CP
+ * @data: Data field from SET_RENDER_MODE or a checkpoint. Written and used by
+ * the CP
+ * @cntl: Value of RB_CNTL written by CPU, save/restored by CP
+ * @rptr: Value of RB_RPTR written by CPU, save/restored by CP
+ * @wptr: Value of RB_WPTR written by CPU, save/restored by CP
+ * @rptr_addr: Value of RB_RPTR_ADDR written by CPU, save/restored by CP
+ * @rbase: Value of RB_BASE written by CPU, save/restored by CP
+ * @counter: GPU address of the storage area for the performance counters
+ */
+struct a5xx_preempt_record {
+ uint32_t magic;
+ uint32_t info;
+ uint32_t data;
+ uint32_t cntl;
+ uint32_t rptr;
+ uint32_t wptr;
+ uint64_t rptr_addr;
+ uint64_t rbase;
+ uint64_t counter;
+};
+
+/* Magic identifier for the preemption record */
+#define A5XX_PREEMPT_RECORD_MAGIC 0x27C4BAFCUL
+
+/*
+ * Even though the structure above is only a few bytes, we need a full 64k to
+ * store the entire preemption record from the CP
+ */
+#define A5XX_PREEMPT_RECORD_SIZE (64 * 1024)
+
+/*
+ * The preemption counter block is a storage area for the value of the
+ * preemption counters that are saved immediately before context switch. We
+ * append it on to the end of the allocation for the preemption record.
+ */
+#define A5XX_PREEMPT_COUNTER_SIZE (16 * 4)
+
+
int a5xx_power_init(struct msm_gpu *gpu);
void a5xx_gpmu_ucode_init(struct msm_gpu *gpu);
@@ -55,7 +145,22 @@ static inline int spin_usecs(struct msm_gpu *gpu, uint32_t usecs,
return -ETIMEDOUT;
}
-bool a5xx_idle(struct msm_gpu *gpu);
+bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring);
void a5xx_set_hwcg(struct msm_gpu *gpu, bool state);
+void a5xx_preempt_init(struct msm_gpu *gpu);
+void a5xx_preempt_hw_init(struct msm_gpu *gpu);
+void a5xx_preempt_trigger(struct msm_gpu *gpu);
+void a5xx_preempt_irq(struct msm_gpu *gpu);
+void a5xx_preempt_fini(struct msm_gpu *gpu);
+
+/* Return true if we are in a preempt state */
+static inline bool a5xx_in_preempt(struct a5xx_gpu *a5xx_gpu)
+{
+ int preempt_state = atomic_read(&a5xx_gpu->preempt_state);
+
+ return !(preempt_state == PREEMPT_NONE ||
+ preempt_state == PREEMPT_ABORT);
+}
+
#endif /* __A5XX_GPU_H__ */
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_power.c b/drivers/gpu/drm/msm/adreno/a5xx_power.c
index 04aab1dcae2b..e5700bbf09dd 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_power.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_power.c
@@ -173,7 +173,7 @@ static int a5xx_gpmu_init(struct msm_gpu *gpu)
{
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
- struct msm_ringbuffer *ring = gpu->rb;
+ struct msm_ringbuffer *ring = gpu->rb[0];
if (!a5xx_gpu->gpmu_dwords)
return 0;
@@ -192,9 +192,9 @@ static int a5xx_gpmu_init(struct msm_gpu *gpu)
OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
OUT_RING(ring, 1);
- gpu->funcs->flush(gpu);
+ gpu->funcs->flush(gpu, ring);
- if (!a5xx_idle(gpu)) {
+ if (!a5xx_idle(gpu, ring)) {
DRM_ERROR("%s: Unable to load GPMU firmware. GPMU will not be active\n",
gpu->name);
return -EINVAL;
@@ -264,7 +264,8 @@ void a5xx_gpmu_ucode_init(struct msm_gpu *gpu)
return;
/* Get the firmware */
- if (request_firmware(&fw, adreno_gpu->info->gpmufw, drm->dev)) {
+ fw = adreno_request_fw(adreno_gpu, adreno_gpu->info->gpmufw);
+ if (IS_ERR(fw)) {
DRM_ERROR("%s: Could not get GPMU firmware. GPMU will not be active\n",
gpu->name);
return;
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c
new file mode 100644
index 000000000000..40f4840ef98e
--- /dev/null
+++ b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c
@@ -0,0 +1,305 @@
+/* Copyright (c) 2017 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include "msm_gem.h"
+#include "a5xx_gpu.h"
+
+/*
+ * Try to transition the preemption state from old to new. Return
+ * true on success or false if the original state wasn't 'old'
+ */
+static inline bool try_preempt_state(struct a5xx_gpu *a5xx_gpu,
+ enum preempt_state old, enum preempt_state new)
+{
+ enum preempt_state cur = atomic_cmpxchg(&a5xx_gpu->preempt_state,
+ old, new);
+
+ return (cur == old);
+}
+
+/*
+ * Force the preemption state to the specified state. This is used in cases
+ * where the current state is known and won't change
+ */
+static inline void set_preempt_state(struct a5xx_gpu *gpu,
+ enum preempt_state new)
+{
+ /*
+ * preempt_state may be read by other cores trying to trigger a
+ * preemption or in the interrupt handler so barriers are needed
+ * before...
+ */
+ smp_mb__before_atomic();
+ atomic_set(&gpu->preempt_state, new);
+ /* ... and after*/
+ smp_mb__after_atomic();
+}
+
+/* Write the most recent wptr for the given ring into the hardware */
+static inline void update_wptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
+{
+ unsigned long flags;
+ uint32_t wptr;
+
+ if (!ring)
+ return;
+
+ spin_lock_irqsave(&ring->lock, flags);
+ wptr = get_wptr(ring);
+ spin_unlock_irqrestore(&ring->lock, flags);
+
+ gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
+}
+
+/* Return the highest priority ringbuffer with something in it */
+static struct msm_ringbuffer *get_next_ring(struct msm_gpu *gpu)
+{
+ unsigned long flags;
+ int i;
+
+ for (i = 0; i < gpu->nr_rings; i++) {
+ bool empty;
+ struct msm_ringbuffer *ring = gpu->rb[i];
+
+ spin_lock_irqsave(&ring->lock, flags);
+ empty = (get_wptr(ring) == ring->memptrs->rptr);
+ spin_unlock_irqrestore(&ring->lock, flags);
+
+ if (!empty)
+ return ring;
+ }
+
+ return NULL;
+}
+
+static void a5xx_preempt_timer(unsigned long data)
+{
+ struct a5xx_gpu *a5xx_gpu = (struct a5xx_gpu *) data;
+ struct msm_gpu *gpu = &a5xx_gpu->base.base;
+ struct drm_device *dev = gpu->dev;
+ struct msm_drm_private *priv = dev->dev_private;
+
+ if (!try_preempt_state(a5xx_gpu, PREEMPT_TRIGGERED, PREEMPT_FAULTED))
+ return;
+
+ dev_err(dev->dev, "%s: preemption timed out\n", gpu->name);
+ queue_work(priv->wq, &gpu->recover_work);
+}
+
+/* Try to trigger a preemption switch */
+void a5xx_preempt_trigger(struct msm_gpu *gpu)
+{
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+ struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
+ unsigned long flags;
+ struct msm_ringbuffer *ring;
+
+ if (gpu->nr_rings == 1)
+ return;
+
+ /*
+ * Try to start preemption by moving from NONE to START. If
+ * unsuccessful, a preemption is already in flight
+ */
+ if (!try_preempt_state(a5xx_gpu, PREEMPT_NONE, PREEMPT_START))
+ return;
+
+ /* Get the next ring to preempt to */
+ ring = get_next_ring(gpu);
+
+ /*
+ * If no ring is populated or the highest priority ring is the current
+ * one do nothing except to update the wptr to the latest and greatest
+ */
+ if (!ring || (a5xx_gpu->cur_ring == ring)) {
+ /*
+ * Its possible that while a preemption request is in progress
+ * from an irq context, a user context trying to submit might
+ * fail to update the write pointer, because it determines
+ * that the preempt state is not PREEMPT_NONE.
+ *
+ * Close the race by introducing an intermediate
+ * state PREEMPT_ABORT to let the submit path
+ * know that the ringbuffer is not going to change
+ * and can safely update the write pointer.
+ */
+
+ set_preempt_state(a5xx_gpu, PREEMPT_ABORT);
+ update_wptr(gpu, a5xx_gpu->cur_ring);
+ set_preempt_state(a5xx_gpu, PREEMPT_NONE);
+ return;
+ }
+
+ /* Make sure the wptr doesn't update while we're in motion */
+ spin_lock_irqsave(&ring->lock, flags);
+ a5xx_gpu->preempt[ring->id]->wptr = get_wptr(ring);
+ spin_unlock_irqrestore(&ring->lock, flags);
+
+ /* Set the address of the incoming preemption record */
+ gpu_write64(gpu, REG_A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_LO,
+ REG_A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_HI,
+ a5xx_gpu->preempt_iova[ring->id]);
+
+ a5xx_gpu->next_ring = ring;
+
+ /* Start a timer to catch a stuck preemption */
+ mod_timer(&a5xx_gpu->preempt_timer, jiffies + msecs_to_jiffies(10000));
+
+ /* Set the preemption state to triggered */
+ set_preempt_state(a5xx_gpu, PREEMPT_TRIGGERED);
+
+ /* Make sure everything is written before hitting the button */
+ wmb();
+
+ /* And actually start the preemption */
+ gpu_write(gpu, REG_A5XX_CP_CONTEXT_SWITCH_CNTL, 1);
+}
+
+void a5xx_preempt_irq(struct msm_gpu *gpu)
+{
+ uint32_t status;
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+ struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
+ struct drm_device *dev = gpu->dev;
+ struct msm_drm_private *priv = dev->dev_private;
+
+ if (!try_preempt_state(a5xx_gpu, PREEMPT_TRIGGERED, PREEMPT_PENDING))
+ return;
+
+ /* Delete the preemption watchdog timer */
+ del_timer(&a5xx_gpu->preempt_timer);
+
+ /*
+ * The hardware should be setting CP_CONTEXT_SWITCH_CNTL to zero before
+ * firing the interrupt, but there is a non zero chance of a hardware
+ * condition or a software race that could set it again before we have a
+ * chance to finish. If that happens, log and go for recovery
+ */
+ status = gpu_read(gpu, REG_A5XX_CP_CONTEXT_SWITCH_CNTL);
+ if (unlikely(status)) {
+ set_preempt_state(a5xx_gpu, PREEMPT_FAULTED);
+ dev_err(dev->dev, "%s: Preemption failed to complete\n",
+ gpu->name);
+ queue_work(priv->wq, &gpu->recover_work);
+ return;
+ }
+
+ a5xx_gpu->cur_ring = a5xx_gpu->next_ring;
+ a5xx_gpu->next_ring = NULL;
+
+ update_wptr(gpu, a5xx_gpu->cur_ring);
+
+ set_preempt_state(a5xx_gpu, PREEMPT_NONE);
+}
+
+void a5xx_preempt_hw_init(struct msm_gpu *gpu)
+{
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+ struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
+ int i;
+
+ for (i = 0; i < gpu->nr_rings; i++) {
+ a5xx_gpu->preempt[i]->wptr = 0;
+ a5xx_gpu->preempt[i]->rptr = 0;
+ a5xx_gpu->preempt[i]->rbase = gpu->rb[i]->iova;
+ }
+
+ /* Write a 0 to signal that we aren't switching pagetables */
+ gpu_write64(gpu, REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO,
+ REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI, 0);
+
+ /* Reset the preemption state */
+ set_preempt_state(a5xx_gpu, PREEMPT_NONE);
+
+ /* Always come up on rb 0 */
+ a5xx_gpu->cur_ring = gpu->rb[0];
+}
+
+static int preempt_init_ring(struct a5xx_gpu *a5xx_gpu,
+ struct msm_ringbuffer *ring)
+{
+ struct adreno_gpu *adreno_gpu = &a5xx_gpu->base;
+ struct msm_gpu *gpu = &adreno_gpu->base;
+ struct a5xx_preempt_record *ptr;
+ struct drm_gem_object *bo = NULL;
+ u64 iova = 0;
+
+ ptr = msm_gem_kernel_new(gpu->dev,
+ A5XX_PREEMPT_RECORD_SIZE + A5XX_PREEMPT_COUNTER_SIZE,
+ MSM_BO_UNCACHED, gpu->aspace, &bo, &iova);
+
+ if (IS_ERR(ptr))
+ return PTR_ERR(ptr);
+
+ a5xx_gpu->preempt_bo[ring->id] = bo;
+ a5xx_gpu->preempt_iova[ring->id] = iova;
+ a5xx_gpu->preempt[ring->id] = ptr;
+
+ /* Set up the defaults on the preemption record */
+
+ ptr->magic = A5XX_PREEMPT_RECORD_MAGIC;
+ ptr->info = 0;
+ ptr->data = 0;
+ ptr->cntl = MSM_GPU_RB_CNTL_DEFAULT;
+ ptr->rptr_addr = rbmemptr(ring, rptr);
+ ptr->counter = iova + A5XX_PREEMPT_RECORD_SIZE;
+
+ return 0;
+}
+
+void a5xx_preempt_fini(struct msm_gpu *gpu)
+{
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+ struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
+ int i;
+
+ for (i = 0; i < gpu->nr_rings; i++) {
+ if (!a5xx_gpu->preempt_bo[i])
+ continue;
+
+ msm_gem_put_vaddr(a5xx_gpu->preempt_bo[i]);
+
+ if (a5xx_gpu->preempt_iova[i])
+ msm_gem_put_iova(a5xx_gpu->preempt_bo[i], gpu->aspace);
+
+ drm_gem_object_unreference(a5xx_gpu->preempt_bo[i]);
+ a5xx_gpu->preempt_bo[i] = NULL;
+ }
+}
+
+void a5xx_preempt_init(struct msm_gpu *gpu)
+{
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+ struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
+ int i;
+
+ /* No preemption if we only have one ring */
+ if (gpu->nr_rings <= 1)
+ return;
+
+ for (i = 0; i < gpu->nr_rings; i++) {
+ if (preempt_init_ring(a5xx_gpu, gpu->rb[i])) {
+ /*
+ * On any failure our adventure is over. Clean up and
+ * set nr_rings to 1 to force preemption off
+ */
+ a5xx_preempt_fini(gpu);
+ gpu->nr_rings = 1;
+
+ return;
+ }
+ }
+
+ setup_timer(&a5xx_gpu->preempt_timer, a5xx_preempt_timer,
+ (unsigned long) a5xx_gpu);
+}
diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c
index c75c4df4bc39..05022ea2a007 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_device.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_device.c
@@ -125,51 +125,24 @@ struct msm_gpu *adreno_load_gpu(struct drm_device *dev)
{
struct msm_drm_private *priv = dev->dev_private;
struct platform_device *pdev = priv->gpu_pdev;
- struct adreno_platform_config *config;
- struct adreno_rev rev;
- const struct adreno_info *info;
- struct msm_gpu *gpu = NULL;
+ struct msm_gpu *gpu = platform_get_drvdata(priv->gpu_pdev);
+ int ret;
- if (!pdev) {
+ if (!gpu) {
dev_err(dev->dev, "no adreno device\n");
return NULL;
}
- config = pdev->dev.platform_data;
- rev = config->rev;
- info = adreno_info(config->rev);
-
- if (!info) {
- dev_warn(dev->dev, "Unknown GPU revision: %u.%u.%u.%u\n",
- rev.core, rev.major, rev.minor, rev.patchid);
+ pm_runtime_get_sync(&pdev->dev);
+ mutex_lock(&dev->struct_mutex);
+ ret = msm_gpu_hw_init(gpu);
+ mutex_unlock(&dev->struct_mutex);
+ pm_runtime_put_sync(&pdev->dev);
+ if (ret) {
+ dev_err(dev->dev, "gpu hw init failed: %d\n", ret);
return NULL;
}
- DBG("Found GPU: %u.%u.%u.%u", rev.core, rev.major,
- rev.minor, rev.patchid);
-
- gpu = info->init(dev);
- if (IS_ERR(gpu)) {
- dev_warn(dev->dev, "failed to load adreno gpu\n");
- gpu = NULL;
- /* not fatal */
- }
-
- if (gpu) {
- int ret;
-
- pm_runtime_get_sync(&pdev->dev);
- mutex_lock(&dev->struct_mutex);
- ret = msm_gpu_hw_init(gpu);
- mutex_unlock(&dev->struct_mutex);
- pm_runtime_put_sync(&pdev->dev);
- if (ret) {
- dev_err(dev->dev, "gpu hw init failed: %d\n", ret);
- gpu->funcs->destroy(gpu);
- gpu = NULL;
- }
- }
-
return gpu;
}
@@ -282,6 +255,9 @@ static int adreno_get_pwrlevels(struct device *dev,
static int adreno_bind(struct device *dev, struct device *master, void *data)
{
static struct adreno_platform_config config = {};
+ const struct adreno_info *info;
+ struct drm_device *drm = dev_get_drvdata(master);
+ struct msm_gpu *gpu;
u32 val;
int ret;
@@ -302,13 +278,39 @@ static int adreno_bind(struct device *dev, struct device *master, void *data)
return ret;
dev->platform_data = &config;
- set_gpu_pdev(dev_get_drvdata(master), to_platform_device(dev));
+ set_gpu_pdev(drm, to_platform_device(dev));
+
+ info = adreno_info(config.rev);
+
+ if (!info) {
+ dev_warn(drm->dev, "Unknown GPU revision: %u.%u.%u.%u\n",
+ config.rev.core, config.rev.major,
+ config.rev.minor, config.rev.patchid);
+ return -ENXIO;
+ }
+
+ DBG("Found GPU: %u.%u.%u.%u", config.rev.core, config.rev.major,
+ config.rev.minor, config.rev.patchid);
+
+ gpu = info->init(drm);
+ if (IS_ERR(gpu)) {
+ dev_warn(drm->dev, "failed to load adreno gpu\n");
+ return PTR_ERR(gpu);
+ }
+
+ dev_set_drvdata(dev, gpu);
+
return 0;
}
static void adreno_unbind(struct device *dev, struct device *master,
void *data)
{
+ struct msm_gpu *gpu = dev_get_drvdata(dev);
+
+ gpu->funcs->pm_suspend(gpu);
+ gpu->funcs->destroy(gpu);
+
set_gpu_pdev(dev_get_drvdata(master), NULL);
}
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index c8b4ac254bb5..e2ffecce59a3 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -21,8 +21,6 @@
#include "msm_gem.h"
#include "msm_mmu.h"
-#define RB_SIZE SZ_32K
-#define RB_BLKSIZE 32
int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value)
{
@@ -58,72 +56,181 @@ int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value)
return ret;
}
return -EINVAL;
+ case MSM_PARAM_NR_RINGS:
+ *value = gpu->nr_rings;
+ return 0;
default:
DBG("%s: invalid param: %u", gpu->name, param);
return -EINVAL;
}
}
+const struct firmware *
+adreno_request_fw(struct adreno_gpu *adreno_gpu, const char *fwname)
+{
+ struct drm_device *drm = adreno_gpu->base.dev;
+ const struct firmware *fw = NULL;
+ char newname[strlen("qcom/") + strlen(fwname) + 1];
+ int ret;
+
+ sprintf(newname, "qcom/%s", fwname);
+
+ /*
+ * Try first to load from qcom/$fwfile using a direct load (to avoid
+ * a potential timeout waiting for usermode helper)
+ */
+ if ((adreno_gpu->fwloc == FW_LOCATION_UNKNOWN) ||
+ (adreno_gpu->fwloc == FW_LOCATION_NEW)) {
+
+ ret = request_firmware_direct(&fw, newname, drm->dev);
+ if (!ret) {
+ dev_info(drm->dev, "loaded %s from new location\n",
+ newname);
+ adreno_gpu->fwloc = FW_LOCATION_NEW;
+ return fw;
+ } else if (adreno_gpu->fwloc != FW_LOCATION_UNKNOWN) {
+ dev_err(drm->dev, "failed to load %s: %d\n",
+ newname, ret);
+ return ERR_PTR(ret);
+ }
+ }
+
+ /*
+ * Then try the legacy location without qcom/ prefix
+ */
+ if ((adreno_gpu->fwloc == FW_LOCATION_UNKNOWN) ||
+ (adreno_gpu->fwloc == FW_LOCATION_LEGACY)) {
+
+ ret = request_firmware_direct(&fw, fwname, drm->dev);
+ if (!ret) {
+ dev_info(drm->dev, "loaded %s from legacy location\n",
+ newname);
+ adreno_gpu->fwloc = FW_LOCATION_LEGACY;
+ return fw;
+ } else if (adreno_gpu->fwloc != FW_LOCATION_UNKNOWN) {
+ dev_err(drm->dev, "failed to load %s: %d\n",
+ fwname, ret);
+ return ERR_PTR(ret);
+ }
+ }
+
+ /*
+ * Finally fall back to request_firmware() for cases where the
+ * usermode helper is needed (I think mainly android)
+ */
+ if ((adreno_gpu->fwloc == FW_LOCATION_UNKNOWN) ||
+ (adreno_gpu->fwloc == FW_LOCATION_HELPER)) {
+
+ ret = request_firmware(&fw, newname, drm->dev);
+ if (!ret) {
+ dev_info(drm->dev, "loaded %s with helper\n",
+ newname);
+ adreno_gpu->fwloc = FW_LOCATION_HELPER;
+ return fw;
+ } else if (adreno_gpu->fwloc != FW_LOCATION_UNKNOWN) {
+ dev_err(drm->dev, "failed to load %s: %d\n",
+ newname, ret);
+ return ERR_PTR(ret);
+ }
+ }
+
+ dev_err(drm->dev, "failed to load %s\n", fwname);
+ return ERR_PTR(-ENOENT);
+}
+
+static int adreno_load_fw(struct adreno_gpu *adreno_gpu)
+{
+ const struct firmware *fw;
+
+ if (adreno_gpu->pm4)
+ return 0;
+
+ fw = adreno_request_fw(adreno_gpu, adreno_gpu->info->pm4fw);
+ if (IS_ERR(fw))
+ return PTR_ERR(fw);
+ adreno_gpu->pm4 = fw;
+
+ fw = adreno_request_fw(adreno_gpu, adreno_gpu->info->pfpfw);
+ if (IS_ERR(fw)) {
+ release_firmware(adreno_gpu->pm4);
+ adreno_gpu->pm4 = NULL;
+ return PTR_ERR(fw);
+ }
+ adreno_gpu->pfp = fw;
+
+ return 0;
+}
+
int adreno_hw_init(struct msm_gpu *gpu)
{
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
- int ret;
+ int ret, i;
DBG("%s", gpu->name);
- ret = msm_gem_get_iova(gpu->rb->bo, gpu->aspace, &gpu->rb_iova);
- if (ret) {
- gpu->rb_iova = 0;
- dev_err(gpu->dev->dev, "could not map ringbuffer: %d\n", ret);
+ ret = adreno_load_fw(adreno_gpu);
+ if (ret)
return ret;
- }
- /* reset ringbuffer: */
- gpu->rb->cur = gpu->rb->start;
+ for (i = 0; i < gpu->nr_rings; i++) {
+ struct msm_ringbuffer *ring = gpu->rb[i];
+
+ if (!ring)
+ continue;
+
+ ret = msm_gem_get_iova(ring->bo, gpu->aspace, &ring->iova);
+ if (ret) {
+ ring->iova = 0;
+ dev_err(gpu->dev->dev,
+ "could not map ringbuffer %d: %d\n", i, ret);
+ return ret;
+ }
+
+ ring->cur = ring->start;
+ ring->next = ring->start;
- /* reset completed fence seqno: */
- adreno_gpu->memptrs->fence = gpu->fctx->completed_fence;
- adreno_gpu->memptrs->rptr = 0;
+ /* reset completed fence seqno: */
+ ring->memptrs->fence = ring->seqno;
+ ring->memptrs->rptr = 0;
+ }
- /* Setup REG_CP_RB_CNTL: */
+ /*
+ * Setup REG_CP_RB_CNTL. The same value is used across targets (with
+ * the excpetion of A430 that disables the RPTR shadow) - the cacluation
+ * for the ringbuffer size and block size is moved to msm_gpu.h for the
+ * pre-processor to deal with and the A430 variant is ORed in here
+ */
adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_CNTL,
- /* size is log2(quad-words): */
- AXXX_CP_RB_CNTL_BUFSZ(ilog2(gpu->rb->size / 8)) |
- AXXX_CP_RB_CNTL_BLKSZ(ilog2(RB_BLKSIZE / 8)) |
- (adreno_is_a430(adreno_gpu) ? AXXX_CP_RB_CNTL_NO_UPDATE : 0));
+ MSM_GPU_RB_CNTL_DEFAULT |
+ (adreno_is_a430(adreno_gpu) ? AXXX_CP_RB_CNTL_NO_UPDATE : 0));
- /* Setup ringbuffer address: */
+ /* Setup ringbuffer address - use ringbuffer[0] for GPU init */
adreno_gpu_write64(adreno_gpu, REG_ADRENO_CP_RB_BASE,
- REG_ADRENO_CP_RB_BASE_HI, gpu->rb_iova);
+ REG_ADRENO_CP_RB_BASE_HI, gpu->rb[0]->iova);
if (!adreno_is_a430(adreno_gpu)) {
adreno_gpu_write64(adreno_gpu, REG_ADRENO_CP_RB_RPTR_ADDR,
REG_ADRENO_CP_RB_RPTR_ADDR_HI,
- rbmemptr(adreno_gpu, rptr));
+ rbmemptr(gpu->rb[0], rptr));
}
return 0;
}
-static uint32_t get_wptr(struct msm_ringbuffer *ring)
-{
- return ring->cur - ring->start;
-}
-
/* Use this helper to read rptr, since a430 doesn't update rptr in memory */
-static uint32_t get_rptr(struct adreno_gpu *adreno_gpu)
+static uint32_t get_rptr(struct adreno_gpu *adreno_gpu,
+ struct msm_ringbuffer *ring)
{
if (adreno_is_a430(adreno_gpu))
- return adreno_gpu->memptrs->rptr = adreno_gpu_read(
+ return ring->memptrs->rptr = adreno_gpu_read(
adreno_gpu, REG_ADRENO_CP_RB_RPTR);
else
- return adreno_gpu->memptrs->rptr;
+ return ring->memptrs->rptr;
}
-uint32_t adreno_last_fence(struct msm_gpu *gpu)
+struct msm_ringbuffer *adreno_active_ring(struct msm_gpu *gpu)
{
- struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
- return adreno_gpu->memptrs->fence;
+ return gpu->rb[0];
}
void adreno_recover(struct msm_gpu *gpu)
@@ -149,7 +256,7 @@ void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
{
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
struct msm_drm_private *priv = gpu->dev->dev_private;
- struct msm_ringbuffer *ring = gpu->rb;
+ struct msm_ringbuffer *ring = submit->ring;
unsigned i;
for (i = 0; i < submit->nr_cmds; i++) {
@@ -164,7 +271,7 @@ void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
case MSM_SUBMIT_CMD_BUF:
OUT_PKT3(ring, adreno_is_a430(adreno_gpu) ?
CP_INDIRECT_BUFFER_PFE : CP_INDIRECT_BUFFER_PFD, 2);
- OUT_RING(ring, submit->cmd[i].iova);
+ OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
OUT_RING(ring, submit->cmd[i].size);
OUT_PKT2(ring);
break;
@@ -172,7 +279,7 @@ void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
}
OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1);
- OUT_RING(ring, submit->fence->seqno);
+ OUT_RING(ring, submit->seqno);
if (adreno_is_a3xx(adreno_gpu) || adreno_is_a4xx(adreno_gpu)) {
/* Flush HLSQ lazy updates to make sure there is nothing
@@ -188,8 +295,8 @@ void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
OUT_PKT3(ring, CP_EVENT_WRITE, 3);
OUT_RING(ring, CACHE_FLUSH_TS);
- OUT_RING(ring, rbmemptr(adreno_gpu, fence));
- OUT_RING(ring, submit->fence->seqno);
+ OUT_RING(ring, rbmemptr(ring, fence));
+ OUT_RING(ring, submit->seqno);
/* we could maybe be clever and only CP_COND_EXEC the interrupt: */
OUT_PKT3(ring, CP_INTERRUPT, 1);
@@ -215,20 +322,23 @@ void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
}
#endif
- gpu->funcs->flush(gpu);
+ gpu->funcs->flush(gpu, ring);
}
-void adreno_flush(struct msm_gpu *gpu)
+void adreno_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
{
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
uint32_t wptr;
+ /* Copy the shadow to the actual register */
+ ring->cur = ring->next;
+
/*
* Mask wptr value that we calculate to fit in the HW range. This is
* to account for the possibility that the last command fit exactly into
* the ringbuffer and rb->next hasn't wrapped to zero yet
*/
- wptr = get_wptr(gpu->rb) & ((gpu->rb->size / 4) - 1);
+ wptr = get_wptr(ring);
/* ensure writes to ringbuffer have hit system memory: */
mb();
@@ -236,17 +346,19 @@ void adreno_flush(struct msm_gpu *gpu)
adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_WPTR, wptr);
}
-bool adreno_idle(struct msm_gpu *gpu)
+bool adreno_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
{
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
- uint32_t wptr = get_wptr(gpu->rb);
+ uint32_t wptr = get_wptr(ring);
/* wait for CP to drain ringbuffer: */
- if (!spin_until(get_rptr(adreno_gpu) == wptr))
+ if (!spin_until(get_rptr(adreno_gpu, ring) == wptr))
return true;
/* TODO maybe we need to reset GPU here to recover from hang? */
- DRM_ERROR("%s: timeout waiting to drain ringbuffer!\n", gpu->name);
+ DRM_ERROR("%s: timeout waiting to drain ringbuffer %d rptr/wptr = %X/%X\n",
+ gpu->name, ring->id, get_rptr(adreno_gpu, ring), wptr);
+
return false;
}
@@ -261,10 +373,16 @@ void adreno_show(struct msm_gpu *gpu, struct seq_file *m)
adreno_gpu->rev.major, adreno_gpu->rev.minor,
adreno_gpu->rev.patchid);
- seq_printf(m, "fence: %d/%d\n", adreno_gpu->memptrs->fence,
- gpu->fctx->last_fence);
- seq_printf(m, "rptr: %d\n", get_rptr(adreno_gpu));
- seq_printf(m, "rb wptr: %d\n", get_wptr(gpu->rb));
+ for (i = 0; i < gpu->nr_rings; i++) {
+ struct msm_ringbuffer *ring = gpu->rb[i];
+
+ seq_printf(m, "rb %d: fence: %d/%d\n", i,
+ ring->memptrs->fence, ring->seqno);
+
+ seq_printf(m, " rptr: %d\n",
+ get_rptr(adreno_gpu, ring));
+ seq_printf(m, "rb wptr: %d\n", get_wptr(ring));
+ }
/* dump these out in a form that can be parsed by demsm: */
seq_printf(m, "IO:region %s 00000000 00020000\n", gpu->name);
@@ -290,16 +408,23 @@ void adreno_show(struct msm_gpu *gpu, struct seq_file *m)
void adreno_dump_info(struct msm_gpu *gpu)
{
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+ int i;
printk("revision: %d (%d.%d.%d.%d)\n",
adreno_gpu->info->revn, adreno_gpu->rev.core,
adreno_gpu->rev.major, adreno_gpu->rev.minor,
adreno_gpu->rev.patchid);
- printk("fence: %d/%d\n", adreno_gpu->memptrs->fence,
- gpu->fctx->last_fence);
- printk("rptr: %d\n", get_rptr(adreno_gpu));
- printk("rb wptr: %d\n", get_wptr(gpu->rb));
+ for (i = 0; i < gpu->nr_rings; i++) {
+ struct msm_ringbuffer *ring = gpu->rb[i];
+
+ printk("rb %d: fence: %d/%d\n", i,
+ ring->memptrs->fence,
+ ring->seqno);
+
+ printk("rptr: %d\n", get_rptr(adreno_gpu, ring));
+ printk("rb wptr: %d\n", get_wptr(ring));
+ }
}
/* would be nice to not have to duplicate the _show() stuff with printk(): */
@@ -322,28 +447,31 @@ void adreno_dump(struct msm_gpu *gpu)
}
}
-static uint32_t ring_freewords(struct msm_gpu *gpu)
+static uint32_t ring_freewords(struct msm_ringbuffer *ring)
{
- struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
- uint32_t size = gpu->rb->size / 4;
- uint32_t wptr = get_wptr(gpu->rb);
- uint32_t rptr = get_rptr(adreno_gpu);
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(ring->gpu);
+ uint32_t size = MSM_GPU_RINGBUFFER_SZ >> 2;
+ /* Use ring->next to calculate free size */
+ uint32_t wptr = ring->next - ring->start;
+ uint32_t rptr = get_rptr(adreno_gpu, ring);
return (rptr + (size - 1) - wptr) % size;
}
-void adreno_wait_ring(struct msm_gpu *gpu, uint32_t ndwords)
+void adreno_wait_ring(struct msm_ringbuffer *ring, uint32_t ndwords)
{
- if (spin_until(ring_freewords(gpu) >= ndwords))
- DRM_ERROR("%s: timeout waiting for ringbuffer space\n", gpu->name);
+ if (spin_until(ring_freewords(ring) >= ndwords))
+ DRM_DEV_ERROR(ring->gpu->dev->dev,
+ "timeout waiting for space in ringubffer %d\n",
+ ring->id);
}
int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev,
- struct adreno_gpu *adreno_gpu, const struct adreno_gpu_funcs *funcs)
+ struct adreno_gpu *adreno_gpu,
+ const struct adreno_gpu_funcs *funcs, int nr_rings)
{
struct adreno_platform_config *config = pdev->dev.platform_data;
struct msm_gpu_config adreno_gpu_config = { 0 };
struct msm_gpu *gpu = &adreno_gpu->base;
- int ret;
adreno_gpu->funcs = funcs;
adreno_gpu->info = adreno_info(config->rev);
@@ -366,59 +494,20 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev,
adreno_gpu_config.va_start = SZ_16M;
adreno_gpu_config.va_end = 0xffffffff;
- adreno_gpu_config.ringsz = RB_SIZE;
+ adreno_gpu_config.nr_rings = nr_rings;
pm_runtime_set_autosuspend_delay(&pdev->dev, DRM_MSM_INACTIVE_PERIOD);
pm_runtime_use_autosuspend(&pdev->dev);
pm_runtime_enable(&pdev->dev);
- ret = msm_gpu_init(drm, pdev, &adreno_gpu->base, &funcs->base,
+ return msm_gpu_init(drm, pdev, &adreno_gpu->base, &funcs->base,
adreno_gpu->info->name, &adreno_gpu_config);
- if (ret)
- return ret;
-
- ret = request_firmware(&adreno_gpu->pm4, adreno_gpu->info->pm4fw, drm->dev);
- if (ret) {
- dev_err(drm->dev, "failed to load %s PM4 firmware: %d\n",
- adreno_gpu->info->pm4fw, ret);
- return ret;
- }
-
- ret = request_firmware(&adreno_gpu->pfp, adreno_gpu->info->pfpfw, drm->dev);
- if (ret) {
- dev_err(drm->dev, "failed to load %s PFP firmware: %d\n",
- adreno_gpu->info->pfpfw, ret);
- return ret;
- }
-
- adreno_gpu->memptrs = msm_gem_kernel_new(drm,
- sizeof(*adreno_gpu->memptrs), MSM_BO_UNCACHED, gpu->aspace,
- &adreno_gpu->memptrs_bo, &adreno_gpu->memptrs_iova);
-
- if (IS_ERR(adreno_gpu->memptrs)) {
- ret = PTR_ERR(adreno_gpu->memptrs);
- adreno_gpu->memptrs = NULL;
- dev_err(drm->dev, "could not allocate memptrs: %d\n", ret);
- }
-
- return ret;
}
void adreno_gpu_cleanup(struct adreno_gpu *adreno_gpu)
{
- struct msm_gpu *gpu = &adreno_gpu->base;
-
- if (adreno_gpu->memptrs_bo) {
- if (adreno_gpu->memptrs)
- msm_gem_put_vaddr(adreno_gpu->memptrs_bo);
-
- if (adreno_gpu->memptrs_iova)
- msm_gem_put_iova(adreno_gpu->memptrs_bo, gpu->aspace);
-
- drm_gem_object_unreference_unlocked(adreno_gpu->memptrs_bo);
- }
release_firmware(adreno_gpu->pm4);
release_firmware(adreno_gpu->pfp);
- msm_gpu_cleanup(gpu);
+ msm_gpu_cleanup(&adreno_gpu->base);
}
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
index 4d9165f29f43..28e3de6e5f94 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
@@ -2,7 +2,7 @@
* Copyright (C) 2013 Red Hat
* Author: Rob Clark <robdclark@gmail.com>
*
- * Copyright (c) 2014 The Linux Foundation. All rights reserved.
+ * Copyright (c) 2014,2017 The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published by
@@ -82,14 +82,6 @@ struct adreno_info {
const struct adreno_info *adreno_info(struct adreno_rev rev);
-#define rbmemptr(adreno_gpu, member) \
- ((adreno_gpu)->memptrs_iova + offsetof(struct adreno_rbmemptrs, member))
-
-struct adreno_rbmemptrs {
- volatile uint32_t rptr;
- volatile uint32_t fence;
-};
-
struct adreno_gpu {
struct msm_gpu base;
struct adreno_rev rev;
@@ -101,16 +93,30 @@ struct adreno_gpu {
/* interesting register offsets to dump: */
const unsigned int *registers;
+ /*
+ * Are we loading fw from legacy path? Prior to addition
+ * of gpu firmware to linux-firmware, the fw files were
+ * placed in toplevel firmware directory, following qcom's
+ * android kernel. But linux-firmware preferred they be
+ * placed in a 'qcom' subdirectory.
+ *
+ * For backwards compatibility, we try first to load from
+ * the new path, using request_firmware_direct() to avoid
+ * any potential timeout waiting for usermode helper, then
+ * fall back to the old path (with direct load). And
+ * finally fall back to request_firmware() with the new
+ * path to allow the usermode helper.
+ */
+ enum {
+ FW_LOCATION_UNKNOWN = 0,
+ FW_LOCATION_NEW, /* /lib/firmware/qcom/$fwfile */
+ FW_LOCATION_LEGACY, /* /lib/firmware/$fwfile */
+ FW_LOCATION_HELPER,
+ } fwloc;
+
/* firmware: */
const struct firmware *pm4, *pfp;
- /* ringbuffer rptr/wptr: */
- // TODO should this be in msm_ringbuffer? I think it would be
- // different for z180..
- struct adreno_rbmemptrs *memptrs;
- struct drm_gem_object *memptrs_bo;
- uint64_t memptrs_iova;
-
/*
* Register offsets are different between some GPUs.
* GPU specific offsets will be exported by GPU specific
@@ -196,22 +202,25 @@ static inline int adreno_is_a530(struct adreno_gpu *gpu)
}
int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value);
+const struct firmware *adreno_request_fw(struct adreno_gpu *adreno_gpu,
+ const char *fwname);
int adreno_hw_init(struct msm_gpu *gpu);
-uint32_t adreno_last_fence(struct msm_gpu *gpu);
void adreno_recover(struct msm_gpu *gpu);
void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
struct msm_file_private *ctx);
-void adreno_flush(struct msm_gpu *gpu);
-bool adreno_idle(struct msm_gpu *gpu);
+void adreno_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring);
+bool adreno_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring);
#ifdef CONFIG_DEBUG_FS
void adreno_show(struct msm_gpu *gpu, struct seq_file *m);
#endif
void adreno_dump_info(struct msm_gpu *gpu);
void adreno_dump(struct msm_gpu *gpu);
-void adreno_wait_ring(struct msm_gpu *gpu, uint32_t ndwords);
+void adreno_wait_ring(struct msm_ringbuffer *ring, uint32_t ndwords);
+struct msm_ringbuffer *adreno_active_ring(struct msm_gpu *gpu);
int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev,
- struct adreno_gpu *gpu, const struct adreno_gpu_funcs *funcs);
+ struct adreno_gpu *gpu, const struct adreno_gpu_funcs *funcs,
+ int nr_rings);
void adreno_gpu_cleanup(struct adreno_gpu *gpu);
@@ -220,7 +229,7 @@ void adreno_gpu_cleanup(struct adreno_gpu *gpu);
static inline void
OUT_PKT0(struct msm_ringbuffer *ring, uint16_t regindx, uint16_t cnt)
{
- adreno_wait_ring(ring->gpu, cnt+1);
+ adreno_wait_ring(ring, cnt+1);
OUT_RING(ring, CP_TYPE0_PKT | ((cnt-1) << 16) | (regindx & 0x7FFF));
}
@@ -228,14 +237,14 @@ OUT_PKT0(struct msm_ringbuffer *ring, uint16_t regindx, uint16_t cnt)
static inline void
OUT_PKT2(struct msm_ringbuffer *ring)
{
- adreno_wait_ring(ring->gpu, 1);
+ adreno_wait_ring(ring, 1);
OUT_RING(ring, CP_TYPE2_PKT);
}
static inline void
OUT_PKT3(struct msm_ringbuffer *ring, uint8_t opcode, uint16_t cnt)
{
- adreno_wait_ring(ring->gpu, cnt+1);
+ adreno_wait_ring(ring, cnt+1);
OUT_RING(ring, CP_TYPE3_PKT | ((cnt-1) << 16) | ((opcode & 0xFF) << 8));
}
@@ -257,14 +266,14 @@ static inline u32 PM4_PARITY(u32 val)
static inline void
OUT_PKT4(struct msm_ringbuffer *ring, uint16_t regindx, uint16_t cnt)
{
- adreno_wait_ring(ring->gpu, cnt + 1);
+ adreno_wait_ring(ring, cnt + 1);
OUT_RING(ring, PKT4(regindx, cnt));
}
static inline void
OUT_PKT7(struct msm_ringbuffer *ring, uint8_t opcode, uint16_t cnt)
{
- adreno_wait_ring(ring->gpu, cnt + 1);
+ adreno_wait_ring(ring, cnt + 1);
OUT_RING(ring, CP_TYPE7_PKT | (cnt << 0) | (PM4_PARITY(cnt) << 15) |
((opcode & 0x7F) << 16) | (PM4_PARITY(opcode) << 23));
}
@@ -323,6 +332,11 @@ static inline void adreno_gpu_write64(struct adreno_gpu *gpu,
adreno_gpu_write(gpu, hi, upper_32_bits(data));
}
+static inline uint32_t get_wptr(struct msm_ringbuffer *ring)
+{
+ return (ring->cur - ring->start) % (MSM_GPU_RINGBUFFER_SZ >> 2);
+}
+
/*
* Given a register and a count, return a value to program into
* REG_CP_PROTECT_REG(n) - this will block both reads and writes for _len