From 1c61eae469e0d1d2fb9d7b77f51ca50c1f8f3ce9 Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 18 Feb 2014 01:50:22 -0700 Subject: drm/radeon: fix CP semaphores on CIK MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The CP semaphore queue on CIK has a bug that triggers if uncompleted waits use the same address while a signal is still pending. Work around this by using different addresses for each sync. Signed-off-by: Christian König Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/radeon.h | 4 +++- drivers/gpu/drm/radeon/radeon_ring.c | 2 +- drivers/gpu/drm/radeon/radeon_semaphore.c | 19 ++++++++++++++++--- 3 files changed, 20 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 4a8ac1cd6b4c..024db37b1832 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -135,6 +135,9 @@ extern int radeon_hard_reset; /* R600+ */ #define R600_RING_TYPE_UVD_INDEX 5 +/* number of hw syncs before falling back on blocking */ +#define RADEON_NUM_SYNCS 4 + /* hardcode those limit for now */ #define RADEON_VA_IB_OFFSET (1 << 20) #define RADEON_VA_RESERVED_SIZE (8 << 20) @@ -554,7 +557,6 @@ int radeon_mode_dumb_mmap(struct drm_file *filp, /* * Semaphores. */ -/* everything here is constant */ struct radeon_semaphore { struct radeon_sa_bo *sa_bo; signed waiters; diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 1b783f0e6d3a..15e44a7281ab 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -139,7 +139,7 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, } /* 64 dwords should be enough for fence too */ - r = radeon_ring_lock(rdev, ring, 64 + RADEON_NUM_RINGS * 8); + r = radeon_ring_lock(rdev, ring, 64 + RADEON_NUM_SYNCS * 8); if (r) { dev_err(rdev->dev, "scheduling IB failed (%d).\n", r); return r; diff --git a/drivers/gpu/drm/radeon/radeon_semaphore.c b/drivers/gpu/drm/radeon/radeon_semaphore.c index 2b42aa1914f2..9006b32d5eed 100644 --- a/drivers/gpu/drm/radeon/radeon_semaphore.c +++ b/drivers/gpu/drm/radeon/radeon_semaphore.c @@ -34,14 +34,15 @@ int radeon_semaphore_create(struct radeon_device *rdev, struct radeon_semaphore **semaphore) { + uint32_t *cpu_addr; int i, r; *semaphore = kmalloc(sizeof(struct radeon_semaphore), GFP_KERNEL); if (*semaphore == NULL) { return -ENOMEM; } - r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, - &(*semaphore)->sa_bo, 8, 8, true); + r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, &(*semaphore)->sa_bo, + 8 * RADEON_NUM_SYNCS, 8, true); if (r) { kfree(*semaphore); *semaphore = NULL; @@ -49,7 +50,10 @@ int radeon_semaphore_create(struct radeon_device *rdev, } (*semaphore)->waiters = 0; (*semaphore)->gpu_addr = radeon_sa_bo_gpu_addr((*semaphore)->sa_bo); - *((uint64_t*)radeon_sa_bo_cpu_addr((*semaphore)->sa_bo)) = 0; + + cpu_addr = radeon_sa_bo_cpu_addr((*semaphore)->sa_bo); + for (i = 0; i < RADEON_NUM_SYNCS; ++i) + cpu_addr[i] = 0; for (i = 0; i < RADEON_NUM_RINGS; ++i) (*semaphore)->sync_to[i] = NULL; @@ -125,6 +129,7 @@ int radeon_semaphore_sync_rings(struct radeon_device *rdev, struct radeon_semaphore *semaphore, int ring) { + unsigned count = 0; int i, r; for (i = 0; i < RADEON_NUM_RINGS; ++i) { @@ -140,6 +145,12 @@ int radeon_semaphore_sync_rings(struct radeon_device *rdev, return -EINVAL; } + if (++count > RADEON_NUM_SYNCS) { + /* not enough room, wait manually */ + radeon_fence_wait_locked(fence); + continue; + } + /* allocate enough space for sync command */ r = radeon_ring_alloc(rdev, &rdev->ring[i], 16); if (r) { @@ -164,6 +175,8 @@ int radeon_semaphore_sync_rings(struct radeon_device *rdev, radeon_ring_commit(rdev, &rdev->ring[i]); radeon_fence_note_sync(fence, ring); + + semaphore->gpu_addr += 8; } return 0; -- cgit From d93f79376f210e0b19da57a3dc841ba332daa9d0 Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 23 May 2013 12:10:04 +0200 Subject: drm/radeon: initial VCE support v4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only VCE 2.0 support so far. v2: squashing multiple patches into this one v3: add IRQ support for CIK, major cleanups, basic code documentation v4: remove HAINAN from chipset list Signed-off-by: Christian König --- drivers/gpu/drm/radeon/Makefile | 6 + drivers/gpu/drm/radeon/cik.c | 60 ++++ drivers/gpu/drm/radeon/cikd.h | 33 ++ drivers/gpu/drm/radeon/radeon.h | 56 +++- drivers/gpu/drm/radeon/radeon_asic.c | 17 + drivers/gpu/drm/radeon/radeon_asic.h | 13 + drivers/gpu/drm/radeon/radeon_cs.c | 4 + drivers/gpu/drm/radeon/radeon_kms.c | 1 + drivers/gpu/drm/radeon/radeon_ring.c | 4 + drivers/gpu/drm/radeon/radeon_test.c | 39 ++- drivers/gpu/drm/radeon/radeon_vce.c | 588 +++++++++++++++++++++++++++++++++++ drivers/gpu/drm/radeon/sid.h | 47 +++ drivers/gpu/drm/radeon/vce_v1_0.c | 187 +++++++++++ drivers/gpu/drm/radeon/vce_v2_0.c | 70 +++++ 14 files changed, 1116 insertions(+), 9 deletions(-) create mode 100644 drivers/gpu/drm/radeon/radeon_vce.c create mode 100644 drivers/gpu/drm/radeon/vce_v1_0.c create mode 100644 drivers/gpu/drm/radeon/vce_v2_0.c (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile index 306364a1ecda..ed60caa32518 100644 --- a/drivers/gpu/drm/radeon/Makefile +++ b/drivers/gpu/drm/radeon/Makefile @@ -99,6 +99,12 @@ radeon-y += \ uvd_v3_1.o \ uvd_v4_2.o +# add VCE block +radeon-y += \ + radeon_vce.o \ + vce_v1_0.o \ + vce_v2_0.o \ + radeon-$(CONFIG_COMPAT) += radeon_ioc32.o radeon-$(CONFIG_VGA_SWITCHEROO) += radeon_atpx_handler.o radeon-$(CONFIG_ACPI) += radeon_acpi.o diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index e6419ca7cd37..be6eb4d91284 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -7490,6 +7490,20 @@ restart_ih: /* reset addr and status */ WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1); break; + case 167: /* VCE */ + DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data); + switch (src_data) { + case 0: + radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX); + break; + case 1: + radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX); + break; + default: + DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data); + break; + } + break; case 176: /* GFX RB CP_INT */ case 177: /* GFX IB CP_INT */ radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX); @@ -7789,6 +7803,22 @@ static int cik_startup(struct radeon_device *rdev) if (r) rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0; + r = radeon_vce_resume(rdev); + if (!r) { + r = vce_v2_0_resume(rdev); + if (!r) + r = radeon_fence_driver_start_ring(rdev, + TN_RING_TYPE_VCE1_INDEX); + if (!r) + r = radeon_fence_driver_start_ring(rdev, + TN_RING_TYPE_VCE2_INDEX); + } + if (r) { + dev_err(rdev->dev, "VCE init error (%d).\n", r); + rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0; + rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0; + } + /* Enable IRQ */ if (!rdev->irq.installed) { r = radeon_irq_kms_init(rdev); @@ -7864,6 +7894,23 @@ static int cik_startup(struct radeon_device *rdev) DRM_ERROR("radeon: failed initializing UVD (%d).\n", r); } + r = -ENOENT; + + ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX]; + if (ring->ring_size) + r = radeon_ring_init(rdev, ring, ring->ring_size, 0, + VCE_CMD_NO_OP); + + ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX]; + if (ring->ring_size) + r = radeon_ring_init(rdev, ring, ring->ring_size, 0, + VCE_CMD_NO_OP); + + if (!r) + r = vce_v1_0_init(rdev); + else if (r != -ENOENT) + DRM_ERROR("radeon: failed initializing VCE (%d).\n", r); + r = radeon_ib_pool_init(rdev); if (r) { dev_err(rdev->dev, "IB initialization failed (%d).\n", r); @@ -7934,6 +7981,7 @@ int cik_suspend(struct radeon_device *rdev) cik_sdma_enable(rdev, false); uvd_v1_0_fini(rdev); radeon_uvd_suspend(rdev); + radeon_vce_suspend(rdev); cik_fini_pg(rdev); cik_fini_cg(rdev); cik_irq_suspend(rdev); @@ -8066,6 +8114,17 @@ int cik_init(struct radeon_device *rdev) r600_ring_init(rdev, ring, 4096); } + r = radeon_vce_init(rdev); + if (!r) { + ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX]; + ring->ring_obj = NULL; + r600_ring_init(rdev, ring, 4096); + + ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX]; + ring->ring_obj = NULL; + r600_ring_init(rdev, ring, 4096); + } + rdev->ih.ring_obj = NULL; r600_ih_ring_init(rdev, 64 * 1024); @@ -8127,6 +8186,7 @@ void cik_fini(struct radeon_device *rdev) radeon_irq_kms_fini(rdev); uvd_v1_0_fini(rdev); radeon_uvd_fini(rdev); + radeon_vce_fini(rdev); cik_pcie_gart_fini(rdev); r600_vram_scratch_fini(rdev); radeon_gem_fini(rdev); diff --git a/drivers/gpu/drm/radeon/cikd.h b/drivers/gpu/drm/radeon/cikd.h index 98bae9d7b74d..459ae021d91c 100644 --- a/drivers/gpu/drm/radeon/cikd.h +++ b/drivers/gpu/drm/radeon/cikd.h @@ -2010,4 +2010,37 @@ /* UVD CTX indirect */ #define UVD_CGC_MEM_CTRL 0xC0 +/* VCE */ + +#define VCE_VCPU_CACHE_OFFSET0 0x20024 +#define VCE_VCPU_CACHE_SIZE0 0x20028 +#define VCE_VCPU_CACHE_OFFSET1 0x2002c +#define VCE_VCPU_CACHE_SIZE1 0x20030 +#define VCE_VCPU_CACHE_OFFSET2 0x20034 +#define VCE_VCPU_CACHE_SIZE2 0x20038 +#define VCE_RB_RPTR2 0x20178 +#define VCE_RB_WPTR2 0x2017c +#define VCE_RB_RPTR 0x2018c +#define VCE_RB_WPTR 0x20190 +#define VCE_CLOCK_GATING_A 0x202f8 +#define VCE_CLOCK_GATING_B 0x202fc +#define VCE_UENC_CLOCK_GATING 0x207bc +#define VCE_UENC_REG_CLOCK_GATING 0x207c0 +#define VCE_SYS_INT_EN 0x21300 +# define VCE_SYS_INT_TRAP_INTERRUPT_EN (1 << 3) +#define VCE_LMI_CTRL2 0x21474 +#define VCE_LMI_CTRL 0x21498 +#define VCE_LMI_VM_CTRL 0x214a0 +#define VCE_LMI_SWAP_CNTL 0x214b4 +#define VCE_LMI_SWAP_CNTL1 0x214b8 +#define VCE_LMI_CACHE_CTRL 0x214f4 + +#define VCE_CMD_NO_OP 0x00000000 +#define VCE_CMD_END 0x00000001 +#define VCE_CMD_IB 0x00000002 +#define VCE_CMD_FENCE 0x00000003 +#define VCE_CMD_TRAP 0x00000004 +#define VCE_CMD_IB_AUTO 0x00000005 +#define VCE_CMD_SEMAPHORE 0x00000006 + #endif diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 024db37b1832..a58a38942c73 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -113,19 +113,16 @@ extern int radeon_hard_reset; #define RADEONFB_CONN_LIMIT 4 #define RADEON_BIOS_NUM_SCRATCH 8 -/* max number of rings */ -#define RADEON_NUM_RINGS 6 - /* fence seq are set to this number when signaled */ #define RADEON_FENCE_SIGNALED_SEQ 0LL /* internal ring indices */ /* r1xx+ has gfx CP ring */ -#define RADEON_RING_TYPE_GFX_INDEX 0 +#define RADEON_RING_TYPE_GFX_INDEX 0 /* cayman has 2 compute CP rings */ -#define CAYMAN_RING_TYPE_CP1_INDEX 1 -#define CAYMAN_RING_TYPE_CP2_INDEX 2 +#define CAYMAN_RING_TYPE_CP1_INDEX 1 +#define CAYMAN_RING_TYPE_CP2_INDEX 2 /* R600+ has an async dma ring */ #define R600_RING_TYPE_DMA_INDEX 3 @@ -133,7 +130,14 @@ extern int radeon_hard_reset; #define CAYMAN_RING_TYPE_DMA1_INDEX 4 /* R600+ */ -#define R600_RING_TYPE_UVD_INDEX 5 +#define R600_RING_TYPE_UVD_INDEX 5 + +/* TN+ */ +#define TN_RING_TYPE_VCE1_INDEX 6 +#define TN_RING_TYPE_VCE2_INDEX 7 + +/* max number of rings */ +#define RADEON_NUM_RINGS 8 /* number of hw syncs before falling back on blocking */ #define RADEON_NUM_SYNCS 4 @@ -1591,6 +1595,42 @@ int radeon_uvd_calc_upll_dividers(struct radeon_device *rdev, int radeon_uvd_send_upll_ctlreq(struct radeon_device *rdev, unsigned cg_upll_func_cntl); +/* + * VCE + */ +#define RADEON_MAX_VCE_HANDLES 16 +#define RADEON_VCE_STACK_SIZE (1024*1024) +#define RADEON_VCE_HEAP_SIZE (4*1024*1024) + +struct radeon_vce { + struct radeon_bo *vcpu_bo; + void *cpu_addr; + uint64_t gpu_addr; + atomic_t handles[RADEON_MAX_VCE_HANDLES]; + struct drm_file *filp[RADEON_MAX_VCE_HANDLES]; +}; + +int radeon_vce_init(struct radeon_device *rdev); +void radeon_vce_fini(struct radeon_device *rdev); +int radeon_vce_suspend(struct radeon_device *rdev); +int radeon_vce_resume(struct radeon_device *rdev); +int radeon_vce_get_create_msg(struct radeon_device *rdev, int ring, + uint32_t handle, struct radeon_fence **fence); +int radeon_vce_get_destroy_msg(struct radeon_device *rdev, int ring, + uint32_t handle, struct radeon_fence **fence); +void radeon_vce_free_handles(struct radeon_device *rdev, struct drm_file *filp); +int radeon_vce_cs_reloc(struct radeon_cs_parser *p, int lo, int hi); +int radeon_vce_cs_parse(struct radeon_cs_parser *p); +bool radeon_vce_semaphore_emit(struct radeon_device *rdev, + struct radeon_ring *ring, + struct radeon_semaphore *semaphore, + bool emit_wait); +void radeon_vce_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib); +void radeon_vce_fence_emit(struct radeon_device *rdev, + struct radeon_fence *fence); +int radeon_vce_ring_test(struct radeon_device *rdev, struct radeon_ring *ring); +int radeon_vce_ib_test(struct radeon_device *rdev, struct radeon_ring *ring); + struct r600_audio_pin { int channels; int rate; @@ -2186,6 +2226,7 @@ struct radeon_device { struct radeon_gem gem; struct radeon_pm pm; struct radeon_uvd uvd; + struct radeon_vce vce; uint32_t bios_scratch[RADEON_BIOS_NUM_SCRATCH]; struct radeon_wb wb; struct radeon_dummy_page dummy_page; @@ -2205,6 +2246,7 @@ struct radeon_device { const struct firmware *sdma_fw; /* CIK SDMA firmware */ const struct firmware *smc_fw; /* SMC firmware */ const struct firmware *uvd_fw; /* UVD firmware */ + const struct firmware *vce_fw; /* VCE firmware */ struct r600_vram_scratch vram_scratch; int msi_enabled; /* msi enabled */ struct r600_ih ih; /* r6/700 interrupt ring */ diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index dda02bfc10a4..4f059b2c05fb 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -1987,6 +1987,19 @@ static struct radeon_asic_ring ci_dma_ring = { .set_wptr = &cik_sdma_set_wptr, }; +static struct radeon_asic_ring ci_vce_ring = { + .ib_execute = &radeon_vce_ib_execute, + .emit_fence = &radeon_vce_fence_emit, + .emit_semaphore = &radeon_vce_semaphore_emit, + .cs_parse = &radeon_vce_cs_parse, + .ring_test = &radeon_vce_ring_test, + .ib_test = &radeon_vce_ib_test, + .is_lockup = &radeon_ring_test_lockup, + .get_rptr = &vce_v1_0_get_rptr, + .get_wptr = &vce_v1_0_get_wptr, + .set_wptr = &vce_v1_0_set_wptr, +}; + static struct radeon_asic ci_asic = { .init = &cik_init, .fini = &cik_fini, @@ -2015,6 +2028,8 @@ static struct radeon_asic ci_asic = { [R600_RING_TYPE_DMA_INDEX] = &ci_dma_ring, [CAYMAN_RING_TYPE_DMA1_INDEX] = &ci_dma_ring, [R600_RING_TYPE_UVD_INDEX] = &cayman_uvd_ring, + [TN_RING_TYPE_VCE1_INDEX] = &ci_vce_ring, + [TN_RING_TYPE_VCE2_INDEX] = &ci_vce_ring, }, .irq = { .set = &cik_irq_set, @@ -2117,6 +2132,8 @@ static struct radeon_asic kv_asic = { [R600_RING_TYPE_DMA_INDEX] = &ci_dma_ring, [CAYMAN_RING_TYPE_DMA1_INDEX] = &ci_dma_ring, [R600_RING_TYPE_UVD_INDEX] = &cayman_uvd_ring, + [TN_RING_TYPE_VCE1_INDEX] = &ci_vce_ring, + [TN_RING_TYPE_VCE2_INDEX] = &ci_vce_ring, }, .irq = { .set = &cik_irq_set, diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index ae637cfda783..13f87bf5254b 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -863,4 +863,17 @@ bool uvd_v3_1_semaphore_emit(struct radeon_device *rdev, /* uvd v4.2 */ int uvd_v4_2_resume(struct radeon_device *rdev); +/* vce v1.0 */ +uint32_t vce_v1_0_get_rptr(struct radeon_device *rdev, + struct radeon_ring *ring); +uint32_t vce_v1_0_get_wptr(struct radeon_device *rdev, + struct radeon_ring *ring); +void vce_v1_0_set_wptr(struct radeon_device *rdev, + struct radeon_ring *ring); +int vce_v1_0_init(struct radeon_device *rdev); +int vce_v1_0_start(struct radeon_device *rdev); + +/* vce v2.0 */ +int vce_v2_0_resume(struct radeon_device *rdev); + #endif diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index dfb5a1db87d4..701ee7981b5c 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -147,6 +147,10 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority case RADEON_CS_RING_UVD: p->ring = R600_RING_TYPE_UVD_INDEX; break; + case RADEON_CS_RING_VCE: + /* TODO: only use the low priority ring for now */ + p->ring = TN_RING_TYPE_VCE1_INDEX; + break; } return 0; } diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index 114d1672d616..0e078afc5db7 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -610,6 +610,7 @@ void radeon_driver_preclose_kms(struct drm_device *dev, if (rdev->cmask_filp == file_priv) rdev->cmask_filp = NULL; radeon_uvd_free_handles(rdev, file_priv); + radeon_vce_free_handles(rdev, file_priv); } /* diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 15e44a7281ab..d2980b03d1ad 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -814,6 +814,8 @@ static int cayman_cp2_index = CAYMAN_RING_TYPE_CP2_INDEX; static int radeon_dma1_index = R600_RING_TYPE_DMA_INDEX; static int radeon_dma2_index = CAYMAN_RING_TYPE_DMA1_INDEX; static int r600_uvd_index = R600_RING_TYPE_UVD_INDEX; +static int si_vce1_index = TN_RING_TYPE_VCE1_INDEX; +static int si_vce2_index = TN_RING_TYPE_VCE2_INDEX; static struct drm_info_list radeon_debugfs_ring_info_list[] = { {"radeon_ring_gfx", radeon_debugfs_ring_info, 0, &radeon_gfx_index}, @@ -822,6 +824,8 @@ static struct drm_info_list radeon_debugfs_ring_info_list[] = { {"radeon_ring_dma1", radeon_debugfs_ring_info, 0, &radeon_dma1_index}, {"radeon_ring_dma2", radeon_debugfs_ring_info, 0, &radeon_dma2_index}, {"radeon_ring_uvd", radeon_debugfs_ring_info, 0, &r600_uvd_index}, + {"radeon_ring_vce1", radeon_debugfs_ring_info, 0, &si_vce1_index}, + {"radeon_ring_vce2", radeon_debugfs_ring_info, 0, &si_vce2_index}, }; static int radeon_debugfs_sa_info(struct seq_file *m, void *data) diff --git a/drivers/gpu/drm/radeon/radeon_test.c b/drivers/gpu/drm/radeon/radeon_test.c index 12e8099a0823..3a13e0d1055c 100644 --- a/drivers/gpu/drm/radeon/radeon_test.c +++ b/drivers/gpu/drm/radeon/radeon_test.c @@ -257,20 +257,36 @@ static int radeon_test_create_and_emit_fence(struct radeon_device *rdev, struct radeon_ring *ring, struct radeon_fence **fence) { + uint32_t handle = ring->idx ^ 0xdeafbeef; int r; if (ring->idx == R600_RING_TYPE_UVD_INDEX) { - r = radeon_uvd_get_create_msg(rdev, ring->idx, 1, NULL); + r = radeon_uvd_get_create_msg(rdev, ring->idx, handle, NULL); if (r) { DRM_ERROR("Failed to get dummy create msg\n"); return r; } - r = radeon_uvd_get_destroy_msg(rdev, ring->idx, 1, fence); + r = radeon_uvd_get_destroy_msg(rdev, ring->idx, handle, fence); if (r) { DRM_ERROR("Failed to get dummy destroy msg\n"); return r; } + + } else if (ring->idx == TN_RING_TYPE_VCE1_INDEX || + ring->idx == TN_RING_TYPE_VCE2_INDEX) { + r = radeon_vce_get_create_msg(rdev, ring->idx, handle, NULL); + if (r) { + DRM_ERROR("Failed to get dummy create msg\n"); + return r; + } + + r = radeon_vce_get_destroy_msg(rdev, ring->idx, handle, fence); + if (r) { + DRM_ERROR("Failed to get dummy destroy msg\n"); + return r; + } + } else { r = radeon_ring_lock(rdev, ring, 64); if (r) { @@ -486,6 +502,16 @@ out_cleanup: printk(KERN_WARNING "Error while testing ring sync (%d).\n", r); } +static bool radeon_test_sync_possible(struct radeon_ring *ringA, + struct radeon_ring *ringB) +{ + if (ringA->idx == TN_RING_TYPE_VCE2_INDEX && + ringB->idx == TN_RING_TYPE_VCE1_INDEX) + return false; + + return true; +} + void radeon_test_syncing(struct radeon_device *rdev) { int i, j, k; @@ -500,6 +526,9 @@ void radeon_test_syncing(struct radeon_device *rdev) if (!ringB->ready) continue; + if (!radeon_test_sync_possible(ringA, ringB)) + continue; + DRM_INFO("Testing syncing between rings %d and %d...\n", i, j); radeon_test_ring_sync(rdev, ringA, ringB); @@ -511,6 +540,12 @@ void radeon_test_syncing(struct radeon_device *rdev) if (!ringC->ready) continue; + if (!radeon_test_sync_possible(ringA, ringC)) + continue; + + if (!radeon_test_sync_possible(ringB, ringC)) + continue; + DRM_INFO("Testing syncing between rings %d, %d and %d...\n", i, j, k); radeon_test_ring_sync2(rdev, ringA, ringB, ringC); diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c new file mode 100644 index 000000000000..2547d8ea347a --- /dev/null +++ b/drivers/gpu/drm/radeon/radeon_vce.c @@ -0,0 +1,588 @@ +/* + * Copyright 2013 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * Authors: Christian König + */ + +#include +#include +#include +#include + +#include "radeon.h" +#include "radeon_asic.h" +#include "sid.h" + +/* Firmware Names */ +#define FIRMWARE_BONAIRE "radeon/BONAIRE_vce.bin" + +MODULE_FIRMWARE(FIRMWARE_BONAIRE); + +/** + * radeon_vce_init - allocate memory, load vce firmware + * + * @rdev: radeon_device pointer + * + * First step to get VCE online, allocate memory and load the firmware + */ +int radeon_vce_init(struct radeon_device *rdev) +{ + unsigned long bo_size; + const char *fw_name; + int i, r; + + switch (rdev->family) { + case CHIP_BONAIRE: + case CHIP_KAVERI: + case CHIP_KABINI: + fw_name = FIRMWARE_BONAIRE; + break; + + default: + return -EINVAL; + } + + r = request_firmware(&rdev->vce_fw, fw_name, rdev->dev); + if (r) { + dev_err(rdev->dev, "radeon_vce: Can't load firmware \"%s\"\n", + fw_name); + return r; + } + + bo_size = RADEON_GPU_PAGE_ALIGN(rdev->vce_fw->size) + + RADEON_VCE_STACK_SIZE + RADEON_VCE_HEAP_SIZE; + r = radeon_bo_create(rdev, bo_size, PAGE_SIZE, true, + RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->vce.vcpu_bo); + if (r) { + dev_err(rdev->dev, "(%d) failed to allocate VCE bo\n", r); + return r; + } + + r = radeon_vce_resume(rdev); + if (r) + return r; + + memset(rdev->vce.cpu_addr, 0, bo_size); + memcpy(rdev->vce.cpu_addr, rdev->vce_fw->data, rdev->vce_fw->size); + + r = radeon_vce_suspend(rdev); + if (r) + return r; + + for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i) { + atomic_set(&rdev->vce.handles[i], 0); + rdev->vce.filp[i] = NULL; + } + + return 0; +} + +/** + * radeon_vce_fini - free memory + * + * @rdev: radeon_device pointer + * + * Last step on VCE teardown, free firmware memory + */ +void radeon_vce_fini(struct radeon_device *rdev) +{ + radeon_vce_suspend(rdev); + radeon_bo_unref(&rdev->vce.vcpu_bo); +} + +/** + * radeon_vce_suspend - unpin VCE fw memory + * + * @rdev: radeon_device pointer + * + * TODO: Test VCE suspend/resume + */ +int radeon_vce_suspend(struct radeon_device *rdev) +{ + int r; + + if (rdev->vce.vcpu_bo == NULL) + return 0; + + r = radeon_bo_reserve(rdev->vce.vcpu_bo, false); + if (!r) { + radeon_bo_kunmap(rdev->vce.vcpu_bo); + radeon_bo_unpin(rdev->vce.vcpu_bo); + radeon_bo_unreserve(rdev->vce.vcpu_bo); + } + return r; +} + +/** + * radeon_vce_resume - pin VCE fw memory + * + * @rdev: radeon_device pointer + * + * TODO: Test VCE suspend/resume + */ +int radeon_vce_resume(struct radeon_device *rdev) +{ + int r; + + if (rdev->vce.vcpu_bo == NULL) + return -EINVAL; + + r = radeon_bo_reserve(rdev->vce.vcpu_bo, false); + if (r) { + radeon_bo_unref(&rdev->vce.vcpu_bo); + dev_err(rdev->dev, "(%d) failed to reserve VCE bo\n", r); + return r; + } + + r = radeon_bo_pin(rdev->vce.vcpu_bo, RADEON_GEM_DOMAIN_VRAM, + &rdev->vce.gpu_addr); + if (r) { + radeon_bo_unreserve(rdev->vce.vcpu_bo); + radeon_bo_unref(&rdev->vce.vcpu_bo); + dev_err(rdev->dev, "(%d) VCE bo pin failed\n", r); + return r; + } + + r = radeon_bo_kmap(rdev->vce.vcpu_bo, &rdev->vce.cpu_addr); + if (r) { + dev_err(rdev->dev, "(%d) VCE map failed\n", r); + return r; + } + + radeon_bo_unreserve(rdev->vce.vcpu_bo); + + return 0; +} + +/** + * radeon_vce_free_handles - free still open VCE handles + * + * @rdev: radeon_device pointer + * @filp: drm file pointer + * + * Close all VCE handles still open by this file pointer + */ +void radeon_vce_free_handles(struct radeon_device *rdev, struct drm_file *filp) +{ + int i, r; + for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i) { + uint32_t handle = atomic_read(&rdev->vce.handles[i]); + if (!handle || rdev->vce.filp[i] != filp) + continue; + + r = radeon_vce_get_destroy_msg(rdev, TN_RING_TYPE_VCE1_INDEX, + handle, NULL); + if (r) + DRM_ERROR("Error destroying VCE handle (%d)!\n", r); + + rdev->vce.filp[i] = NULL; + atomic_set(&rdev->vce.handles[i], 0); + } +} + +/** + * radeon_vce_get_create_msg - generate a VCE create msg + * + * @rdev: radeon_device pointer + * @ring: ring we should submit the msg to + * @handle: VCE session handle to use + * @fence: optional fence to return + * + * Open up a stream for HW test + */ +int radeon_vce_get_create_msg(struct radeon_device *rdev, int ring, + uint32_t handle, struct radeon_fence **fence) +{ + const unsigned ib_size_dw = 1024; + struct radeon_ib ib; + uint64_t dummy; + int i, r; + + r = radeon_ib_get(rdev, ring, &ib, NULL, ib_size_dw * 4); + if (r) { + DRM_ERROR("radeon: failed to get ib (%d).\n", r); + return r; + } + + dummy = ib.gpu_addr + 1024; + + /* stitch together an VCE create msg */ + ib.length_dw = 0; + ib.ptr[ib.length_dw++] = 0x0000000c; /* len */ + ib.ptr[ib.length_dw++] = 0x00000001; /* session cmd */ + ib.ptr[ib.length_dw++] = handle; + + ib.ptr[ib.length_dw++] = 0x00000030; /* len */ + ib.ptr[ib.length_dw++] = 0x01000001; /* create cmd */ + ib.ptr[ib.length_dw++] = 0x00000000; + ib.ptr[ib.length_dw++] = 0x00000042; + ib.ptr[ib.length_dw++] = 0x0000000a; + ib.ptr[ib.length_dw++] = 0x00000001; + ib.ptr[ib.length_dw++] = 0x00000080; + ib.ptr[ib.length_dw++] = 0x00000060; + ib.ptr[ib.length_dw++] = 0x00000100; + ib.ptr[ib.length_dw++] = 0x00000100; + ib.ptr[ib.length_dw++] = 0x0000000c; + ib.ptr[ib.length_dw++] = 0x00000000; + + ib.ptr[ib.length_dw++] = 0x00000014; /* len */ + ib.ptr[ib.length_dw++] = 0x05000005; /* feedback buffer */ + ib.ptr[ib.length_dw++] = upper_32_bits(dummy); + ib.ptr[ib.length_dw++] = dummy; + ib.ptr[ib.length_dw++] = 0x00000001; + + for (i = ib.length_dw; i < ib_size_dw; ++i) + ib.ptr[i] = 0x0; + + r = radeon_ib_schedule(rdev, &ib, NULL); + if (r) { + DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); + } + + if (fence) + *fence = radeon_fence_ref(ib.fence); + + radeon_ib_free(rdev, &ib); + + return r; +} + +/** + * radeon_vce_get_destroy_msg - generate a VCE destroy msg + * + * @rdev: radeon_device pointer + * @ring: ring we should submit the msg to + * @handle: VCE session handle to use + * @fence: optional fence to return + * + * Close up a stream for HW test or if userspace failed to do so + */ +int radeon_vce_get_destroy_msg(struct radeon_device *rdev, int ring, + uint32_t handle, struct radeon_fence **fence) +{ + const unsigned ib_size_dw = 1024; + struct radeon_ib ib; + uint64_t dummy; + int i, r; + + r = radeon_ib_get(rdev, ring, &ib, NULL, ib_size_dw * 4); + if (r) { + DRM_ERROR("radeon: failed to get ib (%d).\n", r); + return r; + } + + dummy = ib.gpu_addr + 1024; + + /* stitch together an VCE destroy msg */ + ib.length_dw = 0; + ib.ptr[ib.length_dw++] = 0x0000000c; /* len */ + ib.ptr[ib.length_dw++] = 0x00000001; /* session cmd */ + ib.ptr[ib.length_dw++] = handle; + + ib.ptr[ib.length_dw++] = 0x00000014; /* len */ + ib.ptr[ib.length_dw++] = 0x05000005; /* feedback buffer */ + ib.ptr[ib.length_dw++] = upper_32_bits(dummy); + ib.ptr[ib.length_dw++] = dummy; + ib.ptr[ib.length_dw++] = 0x00000001; + + ib.ptr[ib.length_dw++] = 0x00000008; /* len */ + ib.ptr[ib.length_dw++] = 0x02000001; /* destroy cmd */ + + for (i = ib.length_dw; i < ib_size_dw; ++i) + ib.ptr[i] = 0x0; + + r = radeon_ib_schedule(rdev, &ib, NULL); + if (r) { + DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); + } + + if (fence) + *fence = radeon_fence_ref(ib.fence); + + radeon_ib_free(rdev, &ib); + + return r; +} + +/** + * radeon_vce_cs_reloc - command submission relocation + * + * @p: parser context + * @lo: address of lower dword + * @hi: address of higher dword + * + * Patch relocation inside command stream with real buffer address + */ +int radeon_vce_cs_reloc(struct radeon_cs_parser *p, int lo, int hi) +{ + struct radeon_cs_chunk *relocs_chunk; + uint64_t offset; + unsigned idx; + + relocs_chunk = &p->chunks[p->chunk_relocs_idx]; + offset = radeon_get_ib_value(p, lo); + idx = radeon_get_ib_value(p, hi); + + if (idx >= relocs_chunk->length_dw) { + DRM_ERROR("Relocs at %d after relocations chunk end %d !\n", + idx, relocs_chunk->length_dw); + return -EINVAL; + } + + offset += p->relocs_ptr[(idx / 4)]->lobj.gpu_offset; + + p->ib.ptr[lo] = offset & 0xFFFFFFFF; + p->ib.ptr[hi] = offset >> 32; + + return 0; +} + +/** + * radeon_vce_cs_parse - parse and validate the command stream + * + * @p: parser context + * + */ +int radeon_vce_cs_parse(struct radeon_cs_parser *p) +{ + uint32_t handle = 0; + bool destroy = false; + int i, r; + + while (p->idx < p->chunks[p->chunk_ib_idx].length_dw) { + uint32_t len = radeon_get_ib_value(p, p->idx); + uint32_t cmd = radeon_get_ib_value(p, p->idx + 1); + + if ((len < 8) || (len & 3)) { + DRM_ERROR("invalid VCE command length (%d)!\n", len); + return -EINVAL; + } + + switch (cmd) { + case 0x00000001: // session + handle = radeon_get_ib_value(p, p->idx + 2); + break; + + case 0x00000002: // task info + case 0x01000001: // create + case 0x04000001: // config extension + case 0x04000002: // pic control + case 0x04000005: // rate control + case 0x04000007: // motion estimation + case 0x04000008: // rdo + break; + + case 0x03000001: // encode + r = radeon_vce_cs_reloc(p, p->idx + 10, p->idx + 9); + if (r) + return r; + + r = radeon_vce_cs_reloc(p, p->idx + 12, p->idx + 11); + if (r) + return r; + break; + + case 0x02000001: // destroy + destroy = true; + break; + + case 0x05000001: // context buffer + case 0x05000004: // video bitstream buffer + case 0x05000005: // feedback buffer + r = radeon_vce_cs_reloc(p, p->idx + 3, p->idx + 2); + if (r) + return r; + break; + + default: + DRM_ERROR("invalid VCE command (0x%x)!\n", cmd); + return -EINVAL; + } + + p->idx += len / 4; + } + + if (destroy) { + /* IB contains a destroy msg, free the handle */ + for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i) + atomic_cmpxchg(&p->rdev->vce.handles[i], handle, 0); + + return 0; + } + + /* create or encode, validate the handle */ + for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i) { + if (atomic_read(&p->rdev->vce.handles[i]) == handle) + return 0; + } + + /* handle not found try to alloc a new one */ + for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i) { + if (!atomic_cmpxchg(&p->rdev->vce.handles[i], 0, handle)) { + p->rdev->vce.filp[i] = p->filp; + return 0; + } + } + + DRM_ERROR("No more free VCE handles!\n"); + return -EINVAL; +} + +/** + * radeon_vce_semaphore_emit - emit a semaphore command + * + * @rdev: radeon_device pointer + * @ring: engine to use + * @semaphore: address of semaphore + * @emit_wait: true=emit wait, false=emit signal + * + */ +bool radeon_vce_semaphore_emit(struct radeon_device *rdev, + struct radeon_ring *ring, + struct radeon_semaphore *semaphore, + bool emit_wait) +{ + uint64_t addr = semaphore->gpu_addr; + + radeon_ring_write(ring, VCE_CMD_SEMAPHORE); + radeon_ring_write(ring, (addr >> 3) & 0x000FFFFF); + radeon_ring_write(ring, (addr >> 23) & 0x000FFFFF); + radeon_ring_write(ring, 0x01003000 | (emit_wait ? 1 : 0)); + if (!emit_wait) + radeon_ring_write(ring, VCE_CMD_END); + + return true; +} + +/** + * radeon_vce_ib_execute - execute indirect buffer + * + * @rdev: radeon_device pointer + * @ib: the IB to execute + * + */ +void radeon_vce_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) +{ + struct radeon_ring *ring = &rdev->ring[ib->ring]; + radeon_ring_write(ring, VCE_CMD_IB); + radeon_ring_write(ring, ib->gpu_addr); + radeon_ring_write(ring, upper_32_bits(ib->gpu_addr)); + radeon_ring_write(ring, ib->length_dw); +} + +/** + * radeon_vce_fence_emit - add a fence command to the ring + * + * @rdev: radeon_device pointer + * @fence: the fence + * + */ +void radeon_vce_fence_emit(struct radeon_device *rdev, + struct radeon_fence *fence) +{ + struct radeon_ring *ring = &rdev->ring[fence->ring]; + uint32_t addr = rdev->fence_drv[fence->ring].gpu_addr; + + radeon_ring_write(ring, VCE_CMD_FENCE); + radeon_ring_write(ring, addr); + radeon_ring_write(ring, upper_32_bits(addr)); + radeon_ring_write(ring, fence->seq); + radeon_ring_write(ring, VCE_CMD_TRAP); + radeon_ring_write(ring, VCE_CMD_END); +} + +/** + * radeon_vce_ring_test - test if VCE ring is working + * + * @rdev: radeon_device pointer + * @ring: the engine to test on + * + */ +int radeon_vce_ring_test(struct radeon_device *rdev, struct radeon_ring *ring) +{ + uint32_t rptr = vce_v1_0_get_rptr(rdev, ring); + unsigned i; + int r; + + r = radeon_ring_lock(rdev, ring, 16); + if (r) { + DRM_ERROR("radeon: vce failed to lock ring %d (%d).\n", + ring->idx, r); + return r; + } + radeon_ring_write(ring, VCE_CMD_END); + radeon_ring_unlock_commit(rdev, ring); + + for (i = 0; i < rdev->usec_timeout; i++) { + if (vce_v1_0_get_rptr(rdev, ring) != rptr) + break; + DRM_UDELAY(1); + } + + if (i < rdev->usec_timeout) { + DRM_INFO("ring test on %d succeeded in %d usecs\n", + ring->idx, i); + } else { + DRM_ERROR("radeon: ring %d test failed\n", + ring->idx); + r = -ETIMEDOUT; + } + + return r; +} + +/** + * radeon_vce_ib_test - test if VCE IBs are working + * + * @rdev: radeon_device pointer + * @ring: the engine to test on + * + */ +int radeon_vce_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) +{ + struct radeon_fence *fence = NULL; + int r; + + r = radeon_vce_get_create_msg(rdev, ring->idx, 1, NULL); + if (r) { + DRM_ERROR("radeon: failed to get create msg (%d).\n", r); + goto error; + } + + r = radeon_vce_get_destroy_msg(rdev, ring->idx, 1, &fence); + if (r) { + DRM_ERROR("radeon: failed to get destroy ib (%d).\n", r); + goto error; + } + + r = radeon_fence_wait(fence, false); + if (r) { + DRM_ERROR("radeon: fence wait failed (%d).\n", r); + } else { + DRM_INFO("ib test on ring %d succeeded\n", ring->idx); + } +error: + radeon_fence_unref(&fence); + return r; +} diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h index 9239a6d29128..683532f84931 100644 --- a/drivers/gpu/drm/radeon/sid.h +++ b/drivers/gpu/drm/radeon/sid.h @@ -1798,4 +1798,51 @@ #define DMA_PACKET_CONSTANT_FILL 0xd #define DMA_PACKET_NOP 0xf +#define VCE_STATUS 0x20004 +#define VCE_VCPU_CNTL 0x20014 +#define VCE_CLK_EN (1 << 0) +#define VCE_VCPU_CACHE_OFFSET0 0x20024 +#define VCE_VCPU_CACHE_SIZE0 0x20028 +#define VCE_VCPU_CACHE_OFFSET1 0x2002c +#define VCE_VCPU_CACHE_SIZE1 0x20030 +#define VCE_VCPU_CACHE_OFFSET2 0x20034 +#define VCE_VCPU_CACHE_SIZE2 0x20038 +#define VCE_SOFT_RESET 0x20120 +#define VCE_ECPU_SOFT_RESET (1 << 0) +#define VCE_FME_SOFT_RESET (1 << 2) +#define VCE_RB_BASE_LO2 0x2016c +#define VCE_RB_BASE_HI2 0x20170 +#define VCE_RB_SIZE2 0x20174 +#define VCE_RB_RPTR2 0x20178 +#define VCE_RB_WPTR2 0x2017c +#define VCE_RB_BASE_LO 0x20180 +#define VCE_RB_BASE_HI 0x20184 +#define VCE_RB_SIZE 0x20188 +#define VCE_RB_RPTR 0x2018c +#define VCE_RB_WPTR 0x20190 +#define VCE_CLOCK_GATING_A 0x202f8 +#define VCE_CLOCK_GATING_B 0x202fc +#define VCE_UENC_CLOCK_GATING 0x205bc +#define VCE_UENC_REG_CLOCK_GATING 0x205c0 +#define VCE_FW_REG_STATUS 0x20e10 +# define VCE_FW_REG_STATUS_BUSY (1 << 0) +# define VCE_FW_REG_STATUS_PASS (1 << 3) +# define VCE_FW_REG_STATUS_DONE (1 << 11) +#define VCE_LMI_FW_START_KEYSEL 0x20e18 +#define VCE_LMI_FW_PERIODIC_CTRL 0x20e20 +#define VCE_LMI_CTRL2 0x20e74 +#define VCE_LMI_CTRL 0x20e98 +#define VCE_LMI_VM_CTRL 0x20ea0 +#define VCE_LMI_SWAP_CNTL 0x20eb4 +#define VCE_LMI_SWAP_CNTL1 0x20eb8 +#define VCE_LMI_CACHE_CTRL 0x20ef4 + +#define VCE_CMD_NO_OP 0x00000000 +#define VCE_CMD_END 0x00000001 +#define VCE_CMD_IB 0x00000002 +#define VCE_CMD_FENCE 0x00000003 +#define VCE_CMD_TRAP 0x00000004 +#define VCE_CMD_IB_AUTO 0x00000005 +#define VCE_CMD_SEMAPHORE 0x00000006 + #endif diff --git a/drivers/gpu/drm/radeon/vce_v1_0.c b/drivers/gpu/drm/radeon/vce_v1_0.c new file mode 100644 index 000000000000..e0c3534356a1 --- /dev/null +++ b/drivers/gpu/drm/radeon/vce_v1_0.c @@ -0,0 +1,187 @@ +/* + * Copyright 2013 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * Authors: Christian König + */ + +#include +#include +#include "radeon.h" +#include "radeon_asic.h" +#include "sid.h" + +/** + * vce_v1_0_get_rptr - get read pointer + * + * @rdev: radeon_device pointer + * @ring: radeon_ring pointer + * + * Returns the current hardware read pointer + */ +uint32_t vce_v1_0_get_rptr(struct radeon_device *rdev, + struct radeon_ring *ring) +{ + if (ring->idx == TN_RING_TYPE_VCE1_INDEX) + return RREG32(VCE_RB_RPTR); + else + return RREG32(VCE_RB_RPTR2); +} + +/** + * vce_v1_0_get_wptr - get write pointer + * + * @rdev: radeon_device pointer + * @ring: radeon_ring pointer + * + * Returns the current hardware write pointer + */ +uint32_t vce_v1_0_get_wptr(struct radeon_device *rdev, + struct radeon_ring *ring) +{ + if (ring->idx == TN_RING_TYPE_VCE1_INDEX) + return RREG32(VCE_RB_WPTR); + else + return RREG32(VCE_RB_WPTR2); +} + +/** + * vce_v1_0_set_wptr - set write pointer + * + * @rdev: radeon_device pointer + * @ring: radeon_ring pointer + * + * Commits the write pointer to the hardware + */ +void vce_v1_0_set_wptr(struct radeon_device *rdev, + struct radeon_ring *ring) +{ + if (ring->idx == TN_RING_TYPE_VCE1_INDEX) + WREG32(VCE_RB_WPTR, ring->wptr); + else + WREG32(VCE_RB_WPTR2, ring->wptr); +} + +/** + * vce_v1_0_start - start VCE block + * + * @rdev: radeon_device pointer + * + * Setup and start the VCE block + */ +int vce_v1_0_start(struct radeon_device *rdev) +{ + struct radeon_ring *ring; + int i, j, r; + + /* set BUSY flag */ + WREG32_P(VCE_STATUS, 1, ~1); + + ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX]; + WREG32(VCE_RB_RPTR, ring->rptr); + WREG32(VCE_RB_WPTR, ring->wptr); + WREG32(VCE_RB_BASE_LO, ring->gpu_addr); + WREG32(VCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32(VCE_RB_SIZE, ring->ring_size / 4); + + ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX]; + WREG32(VCE_RB_RPTR2, ring->rptr); + WREG32(VCE_RB_WPTR2, ring->wptr); + WREG32(VCE_RB_BASE_LO2, ring->gpu_addr); + WREG32(VCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); + WREG32(VCE_RB_SIZE2, ring->ring_size / 4); + + WREG32_P(VCE_VCPU_CNTL, VCE_CLK_EN, ~VCE_CLK_EN); + + WREG32_P(VCE_SOFT_RESET, + VCE_ECPU_SOFT_RESET | + VCE_FME_SOFT_RESET, ~( + VCE_ECPU_SOFT_RESET | + VCE_FME_SOFT_RESET)); + + mdelay(100); + + WREG32_P(VCE_SOFT_RESET, 0, ~( + VCE_ECPU_SOFT_RESET | + VCE_FME_SOFT_RESET)); + + for (i = 0; i < 10; ++i) { + uint32_t status; + for (j = 0; j < 100; ++j) { + status = RREG32(VCE_STATUS); + if (status & 2) + break; + mdelay(10); + } + r = 0; + if (status & 2) + break; + + DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n"); + WREG32_P(VCE_SOFT_RESET, VCE_ECPU_SOFT_RESET, ~VCE_ECPU_SOFT_RESET); + mdelay(10); + WREG32_P(VCE_SOFT_RESET, 0, ~VCE_ECPU_SOFT_RESET); + mdelay(10); + r = -1; + } + + /* clear BUSY flag */ + WREG32_P(VCE_STATUS, 0, ~1); + + if (r) { + DRM_ERROR("VCE not responding, giving up!!!\n"); + return r; + } + + return 0; +} + +int vce_v1_0_init(struct radeon_device *rdev) +{ + struct radeon_ring *ring; + int r; + + r = vce_v1_0_start(rdev); + if (r) + return r; + + ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX]; + ring->ready = true; + r = radeon_ring_test(rdev, TN_RING_TYPE_VCE1_INDEX, ring); + if (r) { + ring->ready = false; + return r; + } + + ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX]; + ring->ready = true; + r = radeon_ring_test(rdev, TN_RING_TYPE_VCE2_INDEX, ring); + if (r) { + ring->ready = false; + return r; + } + + DRM_INFO("VCE initialized successfully.\n"); + + return 0; +} diff --git a/drivers/gpu/drm/radeon/vce_v2_0.c b/drivers/gpu/drm/radeon/vce_v2_0.c new file mode 100644 index 000000000000..4911d1b00e3b --- /dev/null +++ b/drivers/gpu/drm/radeon/vce_v2_0.c @@ -0,0 +1,70 @@ +/* + * Copyright 2013 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * Authors: Christian König + */ + +#include +#include +#include "radeon.h" +#include "radeon_asic.h" +#include "cikd.h" + +int vce_v2_0_resume(struct radeon_device *rdev) +{ + uint64_t addr = rdev->vce.gpu_addr; + uint32_t size; + + WREG32_P(VCE_CLOCK_GATING_A, 0, ~(1 << 16)); + WREG32_P(VCE_UENC_CLOCK_GATING, 0x1FF000, ~0xFF9FF000); + WREG32_P(VCE_UENC_REG_CLOCK_GATING, 0x3F, ~0x3F); + WREG32(VCE_CLOCK_GATING_B, 0xf7); + + WREG32(VCE_LMI_CTRL, 0x00398000); + WREG32_P(VCE_LMI_CACHE_CTRL, 0x0, ~0x1); + WREG32(VCE_LMI_SWAP_CNTL, 0); + WREG32(VCE_LMI_SWAP_CNTL1, 0); + WREG32(VCE_LMI_VM_CTRL, 0); + + size = RADEON_GPU_PAGE_ALIGN(rdev->vce_fw->size); + WREG32(VCE_VCPU_CACHE_OFFSET0, addr & 0x7fffffff); + WREG32(VCE_VCPU_CACHE_SIZE0, size); + + addr += size; + size = RADEON_VCE_STACK_SIZE; + WREG32(VCE_VCPU_CACHE_OFFSET1, addr & 0x7fffffff); + WREG32(VCE_VCPU_CACHE_SIZE1, size); + + addr += size; + size = RADEON_VCE_HEAP_SIZE; + WREG32(VCE_VCPU_CACHE_OFFSET2, addr & 0x7fffffff); + WREG32(VCE_VCPU_CACHE_SIZE2, size); + + WREG32_P(VCE_LMI_CTRL2, 0x0, ~0x100); + + WREG32_P(VCE_SYS_INT_EN, VCE_SYS_INT_TRAP_INTERRUPT_EN, + ~VCE_SYS_INT_TRAP_INTERRUPT_EN); + + return 0; +} -- cgit From f7ba8b04b22d7c74898f53a0e118f31b9d40dcc2 Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 27 Jan 2014 10:16:06 -0700 Subject: drm/radeon: add VCE ring query MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Christian König --- drivers/gpu/drm/radeon/radeon_kms.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index 0e078afc5db7..ea018d53a85e 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -433,6 +433,9 @@ static int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file case RADEON_CS_RING_UVD: *value = rdev->ring[R600_RING_TYPE_UVD_INDEX].ready; break; + case RADEON_CS_RING_VCE: + *value = rdev->ring[TN_RING_TYPE_VCE1_INDEX].ready; + break; default: return -EINVAL; } -- cgit From 98ccc291ffdc34ccb9b13f0c29cc51d6eab24022 Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 23 Jan 2014 09:50:49 -0700 Subject: drm/radeon: add VCE version parsing and checking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Also make the result available to userspace. Signed-off-by: Christian König --- drivers/gpu/drm/radeon/radeon.h | 2 ++ drivers/gpu/drm/radeon/radeon_kms.c | 6 ++++ drivers/gpu/drm/radeon/radeon_vce.c | 56 +++++++++++++++++++++++++++++++++---- 3 files changed, 58 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index a58a38942c73..d1491d41fc2d 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1606,6 +1606,8 @@ struct radeon_vce { struct radeon_bo *vcpu_bo; void *cpu_addr; uint64_t gpu_addr; + unsigned fw_version; + unsigned fb_version; atomic_t handles[RADEON_MAX_VCE_HANDLES]; struct drm_file *filp[RADEON_MAX_VCE_HANDLES]; }; diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index ea018d53a85e..baff98be65b1 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -480,6 +480,12 @@ static int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file else *value = rdev->pm.default_sclk * 10; break; + case RADEON_INFO_VCE_FW_VERSION: + *value = rdev->vce.fw_version; + break; + case RADEON_INFO_VCE_FB_VERSION: + *value = rdev->vce.fb_version; + break; default: DRM_DEBUG_KMS("Invalid request %d\n", info->request); return -EINVAL; diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c index 2547d8ea347a..f46563b60921 100644 --- a/drivers/gpu/drm/radeon/radeon_vce.c +++ b/drivers/gpu/drm/radeon/radeon_vce.c @@ -48,8 +48,11 @@ MODULE_FIRMWARE(FIRMWARE_BONAIRE); */ int radeon_vce_init(struct radeon_device *rdev) { - unsigned long bo_size; - const char *fw_name; + static const char *fw_version = "[ATI LIB=VCEFW,"; + static const char *fb_version = "[ATI LIB=VCEFWSTATS,"; + unsigned long size; + const char *fw_name, *c; + uint8_t start, mid, end; int i, r; switch (rdev->family) { @@ -70,9 +73,50 @@ int radeon_vce_init(struct radeon_device *rdev) return r; } - bo_size = RADEON_GPU_PAGE_ALIGN(rdev->vce_fw->size) + - RADEON_VCE_STACK_SIZE + RADEON_VCE_HEAP_SIZE; - r = radeon_bo_create(rdev, bo_size, PAGE_SIZE, true, + /* search for firmware version */ + + size = rdev->vce_fw->size - strlen(fw_version) - 9; + c = rdev->vce_fw->data; + for (;size > 0; --size, ++c) + if (strncmp(c, fw_version, strlen(fw_version)) == 0) + break; + + if (size == 0) + return -EINVAL; + + c += strlen(fw_version); + if (sscanf(c, "%2hhd.%2hhd.%2hhd]", &start, &mid, &end) != 3) + return -EINVAL; + + /* search for feedback version */ + + size = rdev->vce_fw->size - strlen(fb_version) - 3; + c = rdev->vce_fw->data; + for (;size > 0; --size, ++c) + if (strncmp(c, fb_version, strlen(fb_version)) == 0) + break; + + if (size == 0) + return -EINVAL; + + c += strlen(fb_version); + if (sscanf(c, "%2u]", &rdev->vce.fb_version) != 1) + return -EINVAL; + + DRM_INFO("Found VCE firmware/feedback version %hhd.%hhd.%hhd / %d!\n", + start, mid, end, rdev->vce.fb_version); + + rdev->vce.fw_version = (start << 24) | (mid << 16) | (end << 8); + + /* we can only work with this fw version for now */ + if (rdev->vce.fw_version != ((40 << 24) | (2 << 16) | (2 << 8))) + return -EINVAL; + + /* load firmware into VRAM */ + + size = RADEON_GPU_PAGE_ALIGN(rdev->vce_fw->size) + + RADEON_VCE_STACK_SIZE + RADEON_VCE_HEAP_SIZE; + r = radeon_bo_create(rdev, size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->vce.vcpu_bo); if (r) { dev_err(rdev->dev, "(%d) failed to allocate VCE bo\n", r); @@ -83,7 +127,7 @@ int radeon_vce_init(struct radeon_device *rdev) if (r) return r; - memset(rdev->vce.cpu_addr, 0, bo_size); + memset(rdev->vce.cpu_addr, 0, size); memcpy(rdev->vce.cpu_addr, rdev->vce_fw->data, rdev->vce_fw->size); r = radeon_vce_suspend(rdev); -- cgit From b59b733397cac70be5b04c60e8810077ac6ca48d Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 20 Aug 2013 20:01:18 -0400 Subject: drm/radeon: add callback for setting vce clocks Similar to uvd clock setting. Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index d1491d41fc2d..2b26feb0a7c6 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1822,6 +1822,7 @@ struct radeon_asic { void (*set_pcie_lanes)(struct radeon_device *rdev, int lanes); void (*set_clock_gating)(struct radeon_device *rdev, int enable); int (*set_uvd_clocks)(struct radeon_device *rdev, u32 vclk, u32 dclk); + int (*set_vce_clocks)(struct radeon_device *rdev, u32 evclk, u32 ecclk); int (*get_temperature)(struct radeon_device *rdev); } pm; /* dynamic power management */ @@ -2683,6 +2684,7 @@ void radeon_ring_write(struct radeon_ring *ring, uint32_t v); #define radeon_set_pcie_lanes(rdev, l) (rdev)->asic->pm.set_pcie_lanes((rdev), (l)) #define radeon_set_clock_gating(rdev, e) (rdev)->asic->pm.set_clock_gating((rdev), (e)) #define radeon_set_uvd_clocks(rdev, v, d) (rdev)->asic->pm.set_uvd_clocks((rdev), (v), (d)) +#define radeon_set_vce_clocks(rdev, ev, ec) (rdev)->asic->pm.set_vce_clocks((rdev), (ev), (ec)) #define radeon_get_temperature(rdev) (rdev)->asic->pm.get_temperature((rdev)) #define radeon_set_surface_reg(rdev, r, f, p, o, s) ((rdev)->asic->surface.set_reg((rdev), (r), (f), (p), (o), (s))) #define radeon_clear_surface_reg(rdev, r) ((rdev)->asic->surface.clear_reg((rdev), (r))) -- cgit From 82f79cc54b6a67c0b17aff4fb5ed43155ff3f0ea Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 21 Aug 2013 10:02:32 -0400 Subject: drm/radeon/dpm: move platform caps fetching to a separate function It's needed by by both the asic specific functions and the extended table parser. Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/btc_dpm.c | 4 ++++ drivers/gpu/drm/radeon/ci_dpm.c | 9 ++++++--- drivers/gpu/drm/radeon/cypress_dpm.c | 4 ++++ drivers/gpu/drm/radeon/kv_dpm.c | 7 ++++--- drivers/gpu/drm/radeon/ni_dpm.c | 7 ++++--- drivers/gpu/drm/radeon/r600_dpm.c | 20 ++++++++++++++++++++ drivers/gpu/drm/radeon/r600_dpm.h | 2 ++ drivers/gpu/drm/radeon/rs780_dpm.c | 7 ++++--- drivers/gpu/drm/radeon/rv6xx_dpm.c | 7 ++++--- drivers/gpu/drm/radeon/rv770_dpm.c | 7 ++++--- drivers/gpu/drm/radeon/si_dpm.c | 7 ++++--- drivers/gpu/drm/radeon/sumo_dpm.c | 7 ++++--- drivers/gpu/drm/radeon/trinity_dpm.c | 7 ++++--- 13 files changed, 68 insertions(+), 27 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/btc_dpm.c b/drivers/gpu/drm/radeon/btc_dpm.c index ea103ccdf4bd..f81d7ca134db 100644 --- a/drivers/gpu/drm/radeon/btc_dpm.c +++ b/drivers/gpu/drm/radeon/btc_dpm.c @@ -2601,6 +2601,10 @@ int btc_dpm_init(struct radeon_device *rdev) pi->min_vddc_in_table = 0; pi->max_vddc_in_table = 0; + ret = r600_get_platform_caps(rdev); + if (ret) + return ret; + ret = rv7xx_parse_power_table(rdev); if (ret) return ret; diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c index 8d49104ca6c2..4a0c40186046 100644 --- a/drivers/gpu/drm/radeon/ci_dpm.c +++ b/drivers/gpu/drm/radeon/ci_dpm.c @@ -4959,9 +4959,6 @@ static int ci_parse_power_table(struct radeon_device *rdev) if (!rdev->pm.dpm.ps) return -ENOMEM; power_state_offset = (u8 *)state_array->states; - rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps); - rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime); - rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime); for (i = 0; i < state_array->ucNumEntries; i++) { u8 *idx; power_state = (union pplib_power_state *)power_state_offset; @@ -5077,6 +5074,12 @@ int ci_dpm_init(struct radeon_device *rdev) ci_dpm_fini(rdev); return ret; } + + ret = r600_get_platform_caps(rdev); + if (ret) { + ci_dpm_fini(rdev); + return ret; + } ret = ci_parse_power_table(rdev); if (ret) { ci_dpm_fini(rdev); diff --git a/drivers/gpu/drm/radeon/cypress_dpm.c b/drivers/gpu/drm/radeon/cypress_dpm.c index cf783fc0ef21..5a9a5f4d7888 100644 --- a/drivers/gpu/drm/radeon/cypress_dpm.c +++ b/drivers/gpu/drm/radeon/cypress_dpm.c @@ -2036,6 +2036,10 @@ int cypress_dpm_init(struct radeon_device *rdev) pi->min_vddc_in_table = 0; pi->max_vddc_in_table = 0; + ret = r600_get_platform_caps(rdev); + if (ret) + return ret; + ret = rv7xx_parse_power_table(rdev); if (ret) return ret; diff --git a/drivers/gpu/drm/radeon/kv_dpm.c b/drivers/gpu/drm/radeon/kv_dpm.c index 351db361239d..b5bb3a5654ce 100644 --- a/drivers/gpu/drm/radeon/kv_dpm.c +++ b/drivers/gpu/drm/radeon/kv_dpm.c @@ -2538,9 +2538,6 @@ static int kv_parse_power_table(struct radeon_device *rdev) if (!rdev->pm.dpm.ps) return -ENOMEM; power_state_offset = (u8 *)state_array->states; - rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps); - rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime); - rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime); for (i = 0; i < state_array->ucNumEntries; i++) { u8 *idx; power_state = (union pplib_power_state *)power_state_offset; @@ -2590,6 +2587,10 @@ int kv_dpm_init(struct radeon_device *rdev) return -ENOMEM; rdev->pm.dpm.priv = pi; + ret = r600_get_platform_caps(rdev); + if (ret) + return ret; + ret = r600_parse_extended_power_table(rdev); if (ret) return ret; diff --git a/drivers/gpu/drm/radeon/ni_dpm.c b/drivers/gpu/drm/radeon/ni_dpm.c index 1217fbcbdcca..89fc5b976d9b 100644 --- a/drivers/gpu/drm/radeon/ni_dpm.c +++ b/drivers/gpu/drm/radeon/ni_dpm.c @@ -4025,9 +4025,6 @@ static int ni_parse_power_table(struct radeon_device *rdev) power_info->pplib.ucNumStates, GFP_KERNEL); if (!rdev->pm.dpm.ps) return -ENOMEM; - rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps); - rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime); - rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime); for (i = 0; i < power_info->pplib.ucNumStates; i++) { power_state = (union pplib_power_state *) @@ -4089,6 +4086,10 @@ int ni_dpm_init(struct radeon_device *rdev) pi->min_vddc_in_table = 0; pi->max_vddc_in_table = 0; + ret = r600_get_platform_caps(rdev); + if (ret) + return ret; + ret = ni_parse_power_table(rdev); if (ret) return ret; diff --git a/drivers/gpu/drm/radeon/r600_dpm.c b/drivers/gpu/drm/radeon/r600_dpm.c index e4cc9b314ce9..e8b6e4ab312b 100644 --- a/drivers/gpu/drm/radeon/r600_dpm.c +++ b/drivers/gpu/drm/radeon/r600_dpm.c @@ -834,6 +834,26 @@ static int r600_parse_clk_voltage_dep_table(struct radeon_clock_voltage_dependen return 0; } +int r600_get_platform_caps(struct radeon_device *rdev) +{ + struct radeon_mode_info *mode_info = &rdev->mode_info; + union power_info *power_info; + int index = GetIndexIntoMasterTable(DATA, PowerPlayInfo); + u16 data_offset; + u8 frev, crev; + + if (!atom_parse_data_header(mode_info->atom_context, index, NULL, + &frev, &crev, &data_offset)) + return -EINVAL; + power_info = (union power_info *)(mode_info->atom_context->bios + data_offset); + + rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps); + rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime); + rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime); + + return 0; +} + /* sizeof(ATOM_PPLIB_EXTENDEDHEADER) */ #define SIZE_OF_ATOM_PPLIB_EXTENDEDHEADER_V2 12 #define SIZE_OF_ATOM_PPLIB_EXTENDEDHEADER_V3 14 diff --git a/drivers/gpu/drm/radeon/r600_dpm.h b/drivers/gpu/drm/radeon/r600_dpm.h index 07eab2b04e81..46b9d2a03018 100644 --- a/drivers/gpu/drm/radeon/r600_dpm.h +++ b/drivers/gpu/drm/radeon/r600_dpm.h @@ -215,6 +215,8 @@ void r600_stop_dpm(struct radeon_device *rdev); bool r600_is_internal_thermal_sensor(enum radeon_int_thermal_type sensor); +int r600_get_platform_caps(struct radeon_device *rdev); + int r600_parse_extended_power_table(struct radeon_device *rdev); void r600_free_extended_power_table(struct radeon_device *rdev); diff --git a/drivers/gpu/drm/radeon/rs780_dpm.c b/drivers/gpu/drm/radeon/rs780_dpm.c index 8512085b0aef..02f7710de470 100644 --- a/drivers/gpu/drm/radeon/rs780_dpm.c +++ b/drivers/gpu/drm/radeon/rs780_dpm.c @@ -807,9 +807,6 @@ static int rs780_parse_power_table(struct radeon_device *rdev) power_info->pplib.ucNumStates, GFP_KERNEL); if (!rdev->pm.dpm.ps) return -ENOMEM; - rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps); - rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime); - rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime); for (i = 0; i < power_info->pplib.ucNumStates; i++) { power_state = (union pplib_power_state *) @@ -859,6 +856,10 @@ int rs780_dpm_init(struct radeon_device *rdev) return -ENOMEM; rdev->pm.dpm.priv = pi; + ret = r600_get_platform_caps(rdev); + if (ret) + return ret; + ret = rs780_parse_power_table(rdev); if (ret) return ret; diff --git a/drivers/gpu/drm/radeon/rv6xx_dpm.c b/drivers/gpu/drm/radeon/rv6xx_dpm.c index bebf31c4d841..e7045b085715 100644 --- a/drivers/gpu/drm/radeon/rv6xx_dpm.c +++ b/drivers/gpu/drm/radeon/rv6xx_dpm.c @@ -1891,9 +1891,6 @@ static int rv6xx_parse_power_table(struct radeon_device *rdev) power_info->pplib.ucNumStates, GFP_KERNEL); if (!rdev->pm.dpm.ps) return -ENOMEM; - rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps); - rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime); - rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime); for (i = 0; i < power_info->pplib.ucNumStates; i++) { power_state = (union pplib_power_state *) @@ -1943,6 +1940,10 @@ int rv6xx_dpm_init(struct radeon_device *rdev) return -ENOMEM; rdev->pm.dpm.priv = pi; + ret = r600_get_platform_caps(rdev); + if (ret) + return ret; + ret = rv6xx_parse_power_table(rdev); if (ret) return ret; diff --git a/drivers/gpu/drm/radeon/rv770_dpm.c b/drivers/gpu/drm/radeon/rv770_dpm.c index 5b2ea8ac0731..9098c86ca006 100644 --- a/drivers/gpu/drm/radeon/rv770_dpm.c +++ b/drivers/gpu/drm/radeon/rv770_dpm.c @@ -2281,9 +2281,6 @@ int rv7xx_parse_power_table(struct radeon_device *rdev) power_info->pplib.ucNumStates, GFP_KERNEL); if (!rdev->pm.dpm.ps) return -ENOMEM; - rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps); - rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime); - rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime); for (i = 0; i < power_info->pplib.ucNumStates; i++) { power_state = (union pplib_power_state *) @@ -2361,6 +2358,10 @@ int rv770_dpm_init(struct radeon_device *rdev) pi->min_vddc_in_table = 0; pi->max_vddc_in_table = 0; + ret = r600_get_platform_caps(rdev); + if (ret) + return ret; + ret = rv7xx_parse_power_table(rdev); if (ret) return ret; diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index eafb0e6bc67e..d5024778cabe 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -6271,9 +6271,6 @@ static int si_parse_power_table(struct radeon_device *rdev) if (!rdev->pm.dpm.ps) return -ENOMEM; power_state_offset = (u8 *)state_array->states; - rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps); - rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime); - rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime); for (i = 0; i < state_array->ucNumEntries; i++) { u8 *idx; power_state = (union pplib_power_state *)power_state_offset; @@ -6350,6 +6347,10 @@ int si_dpm_init(struct radeon_device *rdev) pi->min_vddc_in_table = 0; pi->max_vddc_in_table = 0; + ret = r600_get_platform_caps(rdev); + if (ret) + return ret; + ret = si_parse_power_table(rdev); if (ret) return ret; diff --git a/drivers/gpu/drm/radeon/sumo_dpm.c b/drivers/gpu/drm/radeon/sumo_dpm.c index 8b47b3cd0357..3f0e8d7b8dbe 100644 --- a/drivers/gpu/drm/radeon/sumo_dpm.c +++ b/drivers/gpu/drm/radeon/sumo_dpm.c @@ -1484,9 +1484,6 @@ static int sumo_parse_power_table(struct radeon_device *rdev) if (!rdev->pm.dpm.ps) return -ENOMEM; power_state_offset = (u8 *)state_array->states; - rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps); - rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime); - rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime); for (i = 0; i < state_array->ucNumEntries; i++) { u8 *idx; power_state = (union pplib_power_state *)power_state_offset; @@ -1772,6 +1769,10 @@ int sumo_dpm_init(struct radeon_device *rdev) sumo_construct_boot_and_acpi_state(rdev); + ret = r600_get_platform_caps(rdev); + if (ret) + return ret; + ret = sumo_parse_power_table(rdev); if (ret) return ret; diff --git a/drivers/gpu/drm/radeon/trinity_dpm.c b/drivers/gpu/drm/radeon/trinity_dpm.c index 2da0e17eb960..2a2822c03329 100644 --- a/drivers/gpu/drm/radeon/trinity_dpm.c +++ b/drivers/gpu/drm/radeon/trinity_dpm.c @@ -1694,9 +1694,6 @@ static int trinity_parse_power_table(struct radeon_device *rdev) if (!rdev->pm.dpm.ps) return -ENOMEM; power_state_offset = (u8 *)state_array->states; - rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps); - rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime); - rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime); for (i = 0; i < state_array->ucNumEntries; i++) { u8 *idx; power_state = (union pplib_power_state *)power_state_offset; @@ -1895,6 +1892,10 @@ int trinity_dpm_init(struct radeon_device *rdev) trinity_construct_boot_state(rdev); + ret = r600_get_platform_caps(rdev); + if (ret) + return ret; + ret = trinity_parse_power_table(rdev); if (ret) return ret; -- cgit From b62d628bd63f61e9aea3b8fab2ec638680bf4aa4 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 20 Aug 2013 20:29:05 -0400 Subject: drm/radeon/dpm: fill in some initial vce infrastructure Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon.h | 12 ++++++++++++ drivers/gpu/drm/radeon/radeon_pm.c | 8 ++++++++ 2 files changed, 20 insertions(+) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 2b26feb0a7c6..60c171c60a64 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1259,6 +1259,15 @@ enum radeon_dpm_event_src { RADEON_DPM_EVENT_SRC_DIGIAL_OR_EXTERNAL = 4 }; +enum radeon_vce_level { + RADEON_VCE_LEVEL_AC_ALL = 0, /* AC, All cases */ + RADEON_VCE_LEVEL_DC_EE = 1, /* DC, entropy encoding */ + RADEON_VCE_LEVEL_DC_LL_LOW = 2, /* DC, low latency queue, res <= 720 */ + RADEON_VCE_LEVEL_DC_LL_HIGH = 3, /* DC, low latency queue, 1080 >= res > 720 */ + RADEON_VCE_LEVEL_DC_GP_LOW = 4, /* DC, general purpose queue, res <= 720 */ + RADEON_VCE_LEVEL_DC_GP_HIGH = 5, /* DC, general purpose queue, 1080 >= res > 720 */ +}; + struct radeon_ps { u32 caps; /* vbios flags */ u32 class; /* vbios flags */ @@ -1269,6 +1278,8 @@ struct radeon_ps { /* VCE clocks */ u32 evclk; u32 ecclk; + bool vce_active; + enum radeon_vce_level vce_level; /* asic priv */ void *ps_priv; }; @@ -1480,6 +1491,7 @@ struct radeon_dpm { /* special states active */ bool thermal_active; bool uvd_active; + bool vce_active; /* thermal handling */ struct radeon_dpm_thermal thermal; /* forced levels */ diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index 8e8153e471c2..a4687e7b45f8 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -826,6 +826,9 @@ static void radeon_dpm_change_power_state_locked(struct radeon_device *rdev) /* no need to reprogram if nothing changed unless we are on BTC+ */ if (rdev->pm.dpm.current_ps == rdev->pm.dpm.requested_ps) { + /* vce just modifies an existing state so force a change */ + if (ps->vce_active != rdev->pm.dpm.vce_active) + goto force; if ((rdev->family < CHIP_BARTS) || (rdev->flags & RADEON_IS_IGP)) { /* for pre-BTC and APUs if the num crtcs changed but state is the same, * all we need to do is update the display configuration. @@ -862,16 +865,21 @@ static void radeon_dpm_change_power_state_locked(struct radeon_device *rdev) } } +force: if (radeon_dpm == 1) { printk("switching from power state:\n"); radeon_dpm_print_power_state(rdev, rdev->pm.dpm.current_ps); printk("switching to power state:\n"); radeon_dpm_print_power_state(rdev, rdev->pm.dpm.requested_ps); } + mutex_lock(&rdev->ddev->struct_mutex); down_write(&rdev->pm.mclk_lock); mutex_lock(&rdev->ring_lock); + /* update whether vce is active */ + ps->vce_active = rdev->pm.dpm.vce_active; + ret = radeon_dpm_pre_set_power_state(rdev); if (ret) goto done; -- cgit From 58bd2a88facbdf3c39db0f834111cd4294400814 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 4 Sep 2013 16:13:56 -0400 Subject: drm/radeon/dpm: fetch vce states from the vbios Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/r600_dpm.c | 28 +++++++++++++++++++++++++++- drivers/gpu/drm/radeon/radeon.h | 16 ++++++++++++++++ 2 files changed, 43 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/r600_dpm.c b/drivers/gpu/drm/radeon/r600_dpm.c index e8b6e4ab312b..cbf7e3269f84 100644 --- a/drivers/gpu/drm/radeon/r600_dpm.c +++ b/drivers/gpu/drm/radeon/r600_dpm.c @@ -1063,7 +1063,15 @@ int r600_parse_extended_power_table(struct radeon_device *rdev) (mode_info->atom_context->bios + data_offset + le16_to_cpu(ext_hdr->usVCETableOffset) + 1 + 1 + array->ucNumEntries * sizeof(VCEClockInfo)); + ATOM_PPLIB_VCE_State_Table *states = + (ATOM_PPLIB_VCE_State_Table *) + (mode_info->atom_context->bios + data_offset + + le16_to_cpu(ext_hdr->usVCETableOffset) + 1 + + 1 + (array->ucNumEntries * sizeof (VCEClockInfo)) + + 1 + (limits->numEntries * sizeof(ATOM_PPLIB_VCE_Clock_Voltage_Limit_Record))); ATOM_PPLIB_VCE_Clock_Voltage_Limit_Record *entry; + ATOM_PPLIB_VCE_State_Record *state_entry; + VCEClockInfo *vce_clk; u32 size = limits->numEntries * sizeof(struct radeon_vce_clock_voltage_dependency_entry); rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.entries = @@ -1075,8 +1083,9 @@ int r600_parse_extended_power_table(struct radeon_device *rdev) rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.count = limits->numEntries; entry = &limits->entries[0]; + state_entry = &states->entries[0]; for (i = 0; i < limits->numEntries; i++) { - VCEClockInfo *vce_clk = (VCEClockInfo *) + vce_clk = (VCEClockInfo *) ((u8 *)&array->entries[0] + (entry->ucVCEClockInfoIndex * sizeof(VCEClockInfo))); rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.entries[i].evclk = @@ -1088,6 +1097,23 @@ int r600_parse_extended_power_table(struct radeon_device *rdev) entry = (ATOM_PPLIB_VCE_Clock_Voltage_Limit_Record *) ((u8 *)entry + sizeof(ATOM_PPLIB_VCE_Clock_Voltage_Limit_Record)); } + for (i = 0; i < states->numEntries; i++) { + if (i >= RADEON_MAX_VCE_LEVELS) + break; + vce_clk = (VCEClockInfo *) + ((u8 *)&array->entries[0] + + (state_entry->ucVCEClockInfoIndex * sizeof(VCEClockInfo))); + rdev->pm.dpm.vce_states[i].evclk = + le16_to_cpu(vce_clk->usEVClkLow) | (vce_clk->ucEVClkHigh << 16); + rdev->pm.dpm.vce_states[i].ecclk = + le16_to_cpu(vce_clk->usECClkLow) | (vce_clk->ucECClkHigh << 16); + rdev->pm.dpm.vce_states[i].clk_idx = + state_entry->ucClockInfoIndex & 0x3f; + rdev->pm.dpm.vce_states[i].pstate = + (state_entry->ucClockInfoIndex & 0xc0) >> 6; + state_entry = (ATOM_PPLIB_VCE_State_Record *) + ((u8 *)state_entry + sizeof(ATOM_PPLIB_VCE_State_Record)); + } } if ((le16_to_cpu(ext_hdr->usSize) >= SIZE_OF_ATOM_PPLIB_EXTENDEDHEADER_V3) && ext_hdr->usUVDTableOffset) { diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 60c171c60a64..693a8fccd94d 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1259,6 +1259,8 @@ enum radeon_dpm_event_src { RADEON_DPM_EVENT_SRC_DIGIAL_OR_EXTERNAL = 4 }; +#define RADEON_MAX_VCE_LEVELS 6 + enum radeon_vce_level { RADEON_VCE_LEVEL_AC_ALL = 0, /* AC, All cases */ RADEON_VCE_LEVEL_DC_EE = 1, /* DC, entropy encoding */ @@ -1454,6 +1456,17 @@ enum radeon_dpm_forced_level { RADEON_DPM_FORCED_LEVEL_HIGH = 2, }; +struct radeon_vce_state { + /* vce clocks */ + u32 evclk; + u32 ecclk; + /* gpu clocks */ + u32 sclk; + u32 mclk; + u8 clk_idx; + u8 pstate; +}; + struct radeon_dpm { struct radeon_ps *ps; /* number of valid power states */ @@ -1466,6 +1479,9 @@ struct radeon_dpm { struct radeon_ps *boot_ps; /* default uvd power state */ struct radeon_ps *uvd_ps; + /* vce requirements */ + struct radeon_vce_state vce_states[RADEON_MAX_VCE_LEVELS]; + enum radeon_vce_level vce_level; enum radeon_pm_state_type state; enum radeon_pm_state_type user_state; u32 platform_caps; -- cgit From 5ad6bf91ef8fd265aee252982a7d6fcf78436153 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 22 Aug 2013 17:09:06 -0400 Subject: drm/radeon: fill in set_vce_clocks for CIK asics Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/cik.c | 35 +++++++++++++++++++++++++++++++++++ drivers/gpu/drm/radeon/cikd.h | 6 ++++++ drivers/gpu/drm/radeon/radeon_asic.c | 2 ++ drivers/gpu/drm/radeon/radeon_asic.h | 1 + 4 files changed, 44 insertions(+) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index be6eb4d91284..ecb16b14f049 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -8925,6 +8925,41 @@ int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk) return r; } +int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk) +{ + int r, i; + struct atom_clock_dividers dividers; + u32 tmp; + + r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK, + ecclk, false, ÷rs); + if (r) + return r; + + for (i = 0; i < 100; i++) { + if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS) + break; + mdelay(10); + } + if (i == 100) + return -ETIMEDOUT; + + tmp = RREG32_SMC(CG_ECLK_CNTL); + tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK); + tmp |= dividers.post_divider; + WREG32_SMC(CG_ECLK_CNTL, tmp); + + for (i = 0; i < 100; i++) { + if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS) + break; + mdelay(10); + } + if (i == 100) + return -ETIMEDOUT; + + return 0; +} + static void cik_pcie_gen3_enable(struct radeon_device *rdev) { struct pci_dev *root = rdev->pdev->bus->self; diff --git a/drivers/gpu/drm/radeon/cikd.h b/drivers/gpu/drm/radeon/cikd.h index 459ae021d91c..ee16380ceba8 100644 --- a/drivers/gpu/drm/radeon/cikd.h +++ b/drivers/gpu/drm/radeon/cikd.h @@ -203,6 +203,12 @@ #define CTF_TEMP_MASK 0x0003fe00 #define CTF_TEMP_SHIFT 9 +#define CG_ECLK_CNTL 0xC05000AC +# define ECLK_DIVIDER_MASK 0x7f +# define ECLK_DIR_CNTL_EN (1 << 8) +#define CG_ECLK_STATUS 0xC05000B0 +# define ECLK_STATUS (1 << 0) + #define CG_SPLL_FUNC_CNTL 0xC0500140 #define SPLL_RESET (1 << 0) #define SPLL_PWRON (1 << 1) diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index 4f059b2c05fb..b8a24a75d4ff 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -2076,6 +2076,7 @@ static struct radeon_asic ci_asic = { .set_pcie_lanes = NULL, .set_clock_gating = NULL, .set_uvd_clocks = &cik_set_uvd_clocks, + .set_vce_clocks = &cik_set_vce_clocks, .get_temperature = &ci_get_temp, }, .dpm = { @@ -2180,6 +2181,7 @@ static struct radeon_asic kv_asic = { .set_pcie_lanes = NULL, .set_clock_gating = NULL, .set_uvd_clocks = &cik_set_uvd_clocks, + .set_vce_clocks = &cik_set_vce_clocks, .get_temperature = &kv_get_temp, }, .dpm = { diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index 13f87bf5254b..3d55a3a39e82 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -717,6 +717,7 @@ u32 cik_get_xclk(struct radeon_device *rdev); uint32_t cik_pciep_rreg(struct radeon_device *rdev, uint32_t reg); void cik_pciep_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v); int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk); +int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk); void cik_sdma_fence_ring_emit(struct radeon_device *rdev, struct radeon_fence *fence); bool cik_sdma_semaphore_ring_emit(struct radeon_device *rdev, -- cgit From 8cd366823e0045bfd450138204c7559ac06efcea Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 23 Aug 2013 11:05:24 -0400 Subject: drm/radeon: add vce dpm support for CI Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/ci_dpm.c | 50 ++++++++++++++++++++++++++++++++--------- 1 file changed, 40 insertions(+), 10 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c index 4a0c40186046..c91d0ee9dfdc 100644 --- a/drivers/gpu/drm/radeon/ci_dpm.c +++ b/drivers/gpu/drm/radeon/ci_dpm.c @@ -746,6 +746,14 @@ static void ci_apply_state_adjust_rules(struct radeon_device *rdev, u32 max_sclk_vddc, max_mclk_vddci, max_mclk_vddc; int i; + if (rps->vce_active) { + rps->evclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].evclk; + rps->ecclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].ecclk; + } else { + rps->evclk = 0; + rps->ecclk = 0; + } + if ((rdev->pm.dpm.new_active_crtc_count > 1) || ci_dpm_vblank_too_short(rdev)) disable_mclk_switching = true; @@ -804,6 +812,13 @@ static void ci_apply_state_adjust_rules(struct radeon_device *rdev, sclk = ps->performance_levels[0].sclk; } + if (rps->vce_active) { + if (sclk < rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].sclk) + sclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].sclk; + if (mclk < rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].mclk) + mclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].mclk; + } + ps->performance_levels[0].sclk = sclk; ps->performance_levels[0].mclk = mclk; @@ -3468,7 +3483,6 @@ static int ci_enable_uvd_dpm(struct radeon_device *rdev, bool enable) 0 : -EINVAL; } -#if 0 static int ci_enable_vce_dpm(struct radeon_device *rdev, bool enable) { struct ci_power_info *pi = ci_get_pi(rdev); @@ -3501,6 +3515,7 @@ static int ci_enable_vce_dpm(struct radeon_device *rdev, bool enable) 0 : -EINVAL; } +#if 0 static int ci_enable_samu_dpm(struct radeon_device *rdev, bool enable) { struct ci_power_info *pi = ci_get_pi(rdev); @@ -3587,7 +3602,6 @@ static int ci_update_uvd_dpm(struct radeon_device *rdev, bool gate) return ci_enable_uvd_dpm(rdev, !gate); } -#if 0 static u8 ci_get_vce_boot_level(struct radeon_device *rdev) { u8 i; @@ -3608,13 +3622,11 @@ static int ci_update_vce_dpm(struct radeon_device *rdev, struct radeon_ps *radeon_current_state) { struct ci_power_info *pi = ci_get_pi(rdev); - bool new_vce_clock_non_zero = (radeon_new_state->evclk != 0); - bool old_vce_clock_non_zero = (radeon_current_state->evclk != 0); int ret = 0; u32 tmp; - if (new_vce_clock_non_zero != old_vce_clock_non_zero) { - if (new_vce_clock_non_zero) { + if (radeon_current_state->evclk != radeon_new_state->evclk) { + if (radeon_new_state->evclk) { pi->smc_state_table.VceBootLevel = ci_get_vce_boot_level(rdev); tmp = RREG32_SMC(DPM_TABLE_475); @@ -3630,6 +3642,7 @@ static int ci_update_vce_dpm(struct radeon_device *rdev, return ret; } +#if 0 static int ci_update_samu_dpm(struct radeon_device *rdev, bool gate) { return ci_enable_samu_dpm(rdev, gate); @@ -4752,13 +4765,13 @@ int ci_dpm_set_power_state(struct radeon_device *rdev) DRM_ERROR("ci_generate_dpm_level_enable_mask failed\n"); return ret; } -#if 0 + ret = ci_update_vce_dpm(rdev, new_ps, old_ps); if (ret) { DRM_ERROR("ci_update_vce_dpm failed\n"); return ret; } -#endif + ret = ci_update_sclk_t(rdev); if (ret) { DRM_ERROR("ci_update_sclk_t failed\n"); @@ -4995,6 +5008,21 @@ static int ci_parse_power_table(struct radeon_device *rdev) power_state_offset += 2 + power_state->v2.ucNumDPMLevels; } rdev->pm.dpm.num_ps = state_array->ucNumEntries; + + /* fill in the vce power states */ + for (i = 0; i < RADEON_MAX_VCE_LEVELS; i++) { + u32 sclk, mclk; + clock_array_index = rdev->pm.dpm.vce_states[i].clk_idx; + clock_info = (union pplib_clock_info *) + &clock_info_array->clockInfo[clock_array_index * clock_info_array->ucEntrySize]; + sclk = le16_to_cpu(clock_info->ci.usEngineClockLow); + sclk |= clock_info->ci.ucEngineClockHigh << 16; + mclk = le16_to_cpu(clock_info->ci.usMemoryClockLow); + mclk |= clock_info->ci.ucMemoryClockHigh << 16; + rdev->pm.dpm.vce_states[i].sclk = sclk; + rdev->pm.dpm.vce_states[i].mclk = mclk; + } + return 0; } @@ -5080,12 +5108,14 @@ int ci_dpm_init(struct radeon_device *rdev) ci_dpm_fini(rdev); return ret; } - ret = ci_parse_power_table(rdev); + + ret = r600_parse_extended_power_table(rdev); if (ret) { ci_dpm_fini(rdev); return ret; } - ret = r600_parse_extended_power_table(rdev); + + ret = ci_parse_power_table(rdev); if (ret) { ci_dpm_fini(rdev); return ret; -- cgit From ee35b0024a9d85f9c8745e0481c09d65f2507bd3 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 23 Aug 2013 11:09:21 -0400 Subject: drm/radeon: enable vce dpm on CI VCE dpm dynamically adjusts the uvd clocks on demand. Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/ci_dpm.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c index c91d0ee9dfdc..6669d3252f57 100644 --- a/drivers/gpu/drm/radeon/ci_dpm.c +++ b/drivers/gpu/drm/radeon/ci_dpm.c @@ -5153,6 +5153,7 @@ int ci_dpm_init(struct radeon_device *rdev) pi->caps_sclk_throttle_low_notification = false; pi->caps_uvd_dpm = true; + pi->caps_vce_dpm = true; ci_get_leakage_voltages(rdev); ci_patch_dependency_tables_with_leakage(rdev); -- cgit From 4233290519c779e44a01816cf825f6df067a0886 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 4 Sep 2013 16:17:07 -0400 Subject: drm/radeon: add vce dpm support for KV/KB TODO: plug in cik_vce_suspend()/resume() so we can enable vce powergating. See XXX in code. Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/kv_dpm.c | 46 +++++++++++++++++++++++++++++++---------- 1 file changed, 35 insertions(+), 11 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/kv_dpm.c b/drivers/gpu/drm/radeon/kv_dpm.c index b5bb3a5654ce..e972b885e297 100644 --- a/drivers/gpu/drm/radeon/kv_dpm.c +++ b/drivers/gpu/drm/radeon/kv_dpm.c @@ -1338,13 +1338,11 @@ static int kv_enable_uvd_dpm(struct radeon_device *rdev, bool enable) PPSMC_MSG_UVDDPM_Enable : PPSMC_MSG_UVDDPM_Disable); } -#if 0 static int kv_enable_vce_dpm(struct radeon_device *rdev, bool enable) { return kv_notify_message_to_smu(rdev, enable ? PPSMC_MSG_VCEDPM_Enable : PPSMC_MSG_VCEDPM_Disable); } -#endif static int kv_enable_samu_dpm(struct radeon_device *rdev, bool enable) { @@ -1389,7 +1387,6 @@ static int kv_update_uvd_dpm(struct radeon_device *rdev, bool gate) return kv_enable_uvd_dpm(rdev, !gate); } -#if 0 static u8 kv_get_vce_boot_level(struct radeon_device *rdev) { u8 i; @@ -1414,6 +1411,8 @@ static int kv_update_vce_dpm(struct radeon_device *rdev, int ret; if (radeon_new_state->evclk > 0 && radeon_current_state->evclk == 0) { + kv_dpm_powergate_vce(rdev, false); + /* XXX cik_vce_resume(); */ if (pi->caps_stable_p_state) pi->vce_boot_level = table->count - 1; else @@ -1436,11 +1435,12 @@ static int kv_update_vce_dpm(struct radeon_device *rdev, kv_enable_vce_dpm(rdev, true); } else if (radeon_new_state->evclk == 0 && radeon_current_state->evclk > 0) { kv_enable_vce_dpm(rdev, false); + /* XXX cik_vce_suspend(); */ + kv_dpm_powergate_vce(rdev, true); } return 0; } -#endif static int kv_update_samu_dpm(struct radeon_device *rdev, bool gate) { @@ -1768,7 +1768,7 @@ int kv_dpm_set_power_state(struct radeon_device *rdev) { struct kv_power_info *pi = kv_get_pi(rdev); struct radeon_ps *new_ps = &pi->requested_rps; - /*struct radeon_ps *old_ps = &pi->current_rps;*/ + struct radeon_ps *old_ps = &pi->current_rps; int ret; if (pi->bapm_enable) { @@ -1798,13 +1798,12 @@ int kv_dpm_set_power_state(struct radeon_device *rdev) kv_set_enabled_levels(rdev); kv_force_lowest_valid(rdev); kv_unforce_levels(rdev); -#if 0 + ret = kv_update_vce_dpm(rdev, new_ps, old_ps); if (ret) { DRM_ERROR("kv_update_vce_dpm failed\n"); return ret; } -#endif kv_update_sclk_t(rdev); } } else { @@ -1823,13 +1822,11 @@ int kv_dpm_set_power_state(struct radeon_device *rdev) kv_program_nbps_index_settings(rdev, new_ps); kv_freeze_sclk_dpm(rdev, false); kv_set_enabled_levels(rdev); -#if 0 ret = kv_update_vce_dpm(rdev, new_ps, old_ps); if (ret) { DRM_ERROR("kv_update_vce_dpm failed\n"); return ret; } -#endif kv_update_acp_boot_level(rdev); kv_update_sclk_t(rdev); kv_enable_nb_dpm(rdev); @@ -2037,6 +2034,14 @@ static void kv_apply_state_adjust_rules(struct radeon_device *rdev, struct radeon_clock_and_voltage_limits *max_limits = &rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac; + if (new_rps->vce_active) { + new_rps->evclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].evclk; + new_rps->ecclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].ecclk; + } else { + new_rps->evclk = 0; + new_rps->ecclk = 0; + } + mclk = max_limits->mclk; sclk = min_sclk; @@ -2056,6 +2061,11 @@ static void kv_apply_state_adjust_rules(struct radeon_device *rdev, sclk = stable_p_state_sclk; } + if (new_rps->vce_active) { + if (sclk < rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].sclk) + sclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].sclk; + } + ps->need_dfs_bypass = true; for (i = 0; i < ps->num_levels; i++) { @@ -2092,7 +2102,8 @@ static void kv_apply_state_adjust_rules(struct radeon_device *rdev, } } - pi->video_start = new_rps->dclk || new_rps->vclk; + pi->video_start = new_rps->dclk || new_rps->vclk || + new_rps->evclk || new_rps->ecclk; if ((new_rps->class & ATOM_PPLIB_CLASSIFICATION_UI_MASK) == ATOM_PPLIB_CLASSIFICATION_UI_BATTERY) @@ -2574,6 +2585,19 @@ static int kv_parse_power_table(struct radeon_device *rdev) power_state_offset += 2 + power_state->v2.ucNumDPMLevels; } rdev->pm.dpm.num_ps = state_array->ucNumEntries; + + /* fill in the vce power states */ + for (i = 0; i < RADEON_MAX_VCE_LEVELS; i++) { + u32 sclk; + clock_array_index = rdev->pm.dpm.vce_states[i].clk_idx; + clock_info = (union pplib_clock_info *) + &clock_info_array->clockInfo[clock_array_index * clock_info_array->ucEntrySize]; + sclk = le16_to_cpu(clock_info->sumo.usEngineClockLow); + sclk |= clock_info->sumo.ucEngineClockHigh << 16; + rdev->pm.dpm.vce_states[i].sclk = sclk; + rdev->pm.dpm.vce_states[i].mclk = 0; + } + return 0; } @@ -2624,7 +2648,7 @@ int kv_dpm_init(struct radeon_device *rdev) pi->caps_fps = false; /* true? */ pi->caps_uvd_pg = true; pi->caps_uvd_dpm = true; - pi->caps_vce_pg = false; + pi->caps_vce_pg = false; /* XXX true */ pi->caps_samu_pg = false; pi->caps_acp_pg = false; pi->caps_stable_p_state = false; -- cgit From 03afe6f6480f2544d6cd18866556f1f76bb05f14 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 23 Aug 2013 11:56:26 -0400 Subject: drm/radeon/dpm: enable dynamic vce state switching v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit enable vce states when vce is active. When vce is active, it adjusts the currently selected state (performance, battery, uvd, etc.) v2: add code comments Signed-off-by: Alex Deucher Signed-off-by: Christian König --- drivers/gpu/drm/radeon/radeon.h | 3 ++ drivers/gpu/drm/radeon/radeon_cs.c | 3 ++ drivers/gpu/drm/radeon/radeon_pm.c | 17 ++++++++++ drivers/gpu/drm/radeon/radeon_vce.c | 62 +++++++++++++++++++++++++++++++++++++ 4 files changed, 85 insertions(+) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 693a8fccd94d..540624e7491c 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1518,6 +1518,7 @@ struct radeon_dpm { }; void radeon_dpm_enable_uvd(struct radeon_device *rdev, bool enable); +void radeon_dpm_enable_vce(struct radeon_device *rdev, bool enable); struct radeon_pm { struct mutex mutex; @@ -1638,6 +1639,7 @@ struct radeon_vce { unsigned fb_version; atomic_t handles[RADEON_MAX_VCE_HANDLES]; struct drm_file *filp[RADEON_MAX_VCE_HANDLES]; + struct delayed_work idle_work; }; int radeon_vce_init(struct radeon_device *rdev); @@ -1649,6 +1651,7 @@ int radeon_vce_get_create_msg(struct radeon_device *rdev, int ring, int radeon_vce_get_destroy_msg(struct radeon_device *rdev, int ring, uint32_t handle, struct radeon_fence **fence); void radeon_vce_free_handles(struct radeon_device *rdev, struct drm_file *filp); +void radeon_vce_note_usage(struct radeon_device *rdev); int radeon_vce_cs_reloc(struct radeon_cs_parser *p, int lo, int hi); int radeon_vce_cs_parse(struct radeon_cs_parser *p); bool radeon_vce_semaphore_emit(struct radeon_device *rdev, diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 701ee7981b5c..f28a8d82fa19 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -347,6 +347,9 @@ static int radeon_cs_ib_chunk(struct radeon_device *rdev, if (parser->ring == R600_RING_TYPE_UVD_INDEX) radeon_uvd_note_usage(rdev); + else if ((parser->ring == TN_RING_TYPE_VCE1_INDEX) || + (parser->ring == TN_RING_TYPE_VCE2_INDEX)) + radeon_vce_note_usage(rdev); radeon_cs_sync_rings(parser); r = radeon_ib_schedule(rdev, &parser->ib, NULL); diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index a4687e7b45f8..4ad9af9fc517 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -968,6 +968,23 @@ void radeon_dpm_enable_uvd(struct radeon_device *rdev, bool enable) } } +void radeon_dpm_enable_vce(struct radeon_device *rdev, bool enable) +{ + if (enable) { + mutex_lock(&rdev->pm.mutex); + rdev->pm.dpm.vce_active = true; + /* XXX select vce level based on ring/task */ + rdev->pm.dpm.vce_level = RADEON_VCE_LEVEL_AC_ALL; + mutex_unlock(&rdev->pm.mutex); + } else { + mutex_lock(&rdev->pm.mutex); + rdev->pm.dpm.vce_active = false; + mutex_unlock(&rdev->pm.mutex); + } + + radeon_pm_compute_clocks(rdev); +} + static void radeon_pm_suspend_old(struct radeon_device *rdev) { mutex_lock(&rdev->pm.mutex); diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c index f46563b60921..d130432e313a 100644 --- a/drivers/gpu/drm/radeon/radeon_vce.c +++ b/drivers/gpu/drm/radeon/radeon_vce.c @@ -34,11 +34,16 @@ #include "radeon_asic.h" #include "sid.h" +/* 1 second timeout */ +#define VCE_IDLE_TIMEOUT_MS 1000 + /* Firmware Names */ #define FIRMWARE_BONAIRE "radeon/BONAIRE_vce.bin" MODULE_FIRMWARE(FIRMWARE_BONAIRE); +static void radeon_vce_idle_work_handler(struct work_struct *work); + /** * radeon_vce_init - allocate memory, load vce firmware * @@ -55,6 +60,8 @@ int radeon_vce_init(struct radeon_device *rdev) uint8_t start, mid, end; int i, r; + INIT_DELAYED_WORK(&rdev->vce.idle_work, radeon_vce_idle_work_handler); + switch (rdev->family) { case CHIP_BONAIRE: case CHIP_KAVERI: @@ -219,6 +226,59 @@ int radeon_vce_resume(struct radeon_device *rdev) return 0; } +/** + * radeon_vce_idle_work_handler - power off VCE + * + * @work: pointer to work structure + * + * power of VCE when it's not used any more + */ +static void radeon_vce_idle_work_handler(struct work_struct *work) +{ + struct radeon_device *rdev = + container_of(work, struct radeon_device, vce.idle_work.work); + + if ((radeon_fence_count_emitted(rdev, TN_RING_TYPE_VCE1_INDEX) == 0) && + (radeon_fence_count_emitted(rdev, TN_RING_TYPE_VCE2_INDEX) == 0)) { + if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { + radeon_dpm_enable_vce(rdev, false); + } else { + radeon_set_vce_clocks(rdev, 0, 0); + } + } else { + schedule_delayed_work(&rdev->vce.idle_work, + msecs_to_jiffies(VCE_IDLE_TIMEOUT_MS)); + } +} + +/** + * radeon_vce_note_usage - power up VCE + * + * @rdev: radeon_device pointer + * + * Make sure VCE is powerd up when we want to use it + */ +void radeon_vce_note_usage(struct radeon_device *rdev) +{ + bool streams_changed = false; + bool set_clocks = !cancel_delayed_work_sync(&rdev->vce.idle_work); + set_clocks &= schedule_delayed_work(&rdev->vce.idle_work, + msecs_to_jiffies(VCE_IDLE_TIMEOUT_MS)); + + if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { + /* XXX figure out if the streams changed */ + streams_changed = false; + } + + if (set_clocks || streams_changed) { + if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { + radeon_dpm_enable_vce(rdev, true); + } else { + radeon_set_vce_clocks(rdev, 53300, 40000); + } + } +} + /** * radeon_vce_free_handles - free still open VCE handles * @@ -235,6 +295,8 @@ void radeon_vce_free_handles(struct radeon_device *rdev, struct drm_file *filp) if (!handle || rdev->vce.filp[i] != filp) continue; + radeon_vce_note_usage(rdev); + r = radeon_vce_get_destroy_msg(rdev, TN_RING_TYPE_VCE1_INDEX, handle, NULL); if (r) -- cgit From 44493ba959cfaa7506498441397f83d180e4a509 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 28 Aug 2013 18:53:50 -0400 Subject: drm/radeon/dpm: properly enable/disable vce when vce pg is enabled The adds the appropriate function calls to properly re-init vce before it's used after it has been power gated. Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/kv_dpm.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/kv_dpm.c b/drivers/gpu/drm/radeon/kv_dpm.c index e972b885e297..9ee1f28bbd85 100644 --- a/drivers/gpu/drm/radeon/kv_dpm.c +++ b/drivers/gpu/drm/radeon/kv_dpm.c @@ -1412,7 +1412,6 @@ static int kv_update_vce_dpm(struct radeon_device *rdev, if (radeon_new_state->evclk > 0 && radeon_current_state->evclk == 0) { kv_dpm_powergate_vce(rdev, false); - /* XXX cik_vce_resume(); */ if (pi->caps_stable_p_state) pi->vce_boot_level = table->count - 1; else @@ -1435,7 +1434,6 @@ static int kv_update_vce_dpm(struct radeon_device *rdev, kv_enable_vce_dpm(rdev, true); } else if (radeon_new_state->evclk == 0 && radeon_current_state->evclk > 0) { kv_enable_vce_dpm(rdev, false); - /* XXX cik_vce_suspend(); */ kv_dpm_powergate_vce(rdev, true); } @@ -1575,11 +1573,16 @@ static void kv_dpm_powergate_vce(struct radeon_device *rdev, bool gate) pi->vce_power_gated = gate; if (gate) { - if (pi->caps_vce_pg) + if (pi->caps_vce_pg) { + /* XXX do we need a vce_v1_0_stop() ? */ kv_notify_message_to_smu(rdev, PPSMC_MSG_VCEPowerOFF); + } } else { - if (pi->caps_vce_pg) + if (pi->caps_vce_pg) { kv_notify_message_to_smu(rdev, PPSMC_MSG_VCEPowerON); + vce_v2_0_resume(rdev); + vce_v1_0_start(rdev); + } } } -- cgit From b9fa18837610483b09a07f1419e6b9f333c46023 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 5 Sep 2013 15:14:28 -0400 Subject: drm/radeon: add support for vce 2.0 clock gating Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/cikd.h | 10 ++++ drivers/gpu/drm/radeon/vce_v2_0.c | 111 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 121 insertions(+) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/cikd.h b/drivers/gpu/drm/radeon/cikd.h index ee16380ceba8..213873270d5f 100644 --- a/drivers/gpu/drm/radeon/cikd.h +++ b/drivers/gpu/drm/radeon/cikd.h @@ -2029,8 +2029,18 @@ #define VCE_RB_RPTR 0x2018c #define VCE_RB_WPTR 0x20190 #define VCE_CLOCK_GATING_A 0x202f8 +# define CGC_CLK_GATE_DLY_TIMER_MASK (0xf << 0) +# define CGC_CLK_GATE_DLY_TIMER(x) ((x) << 0) +# define CGC_CLK_GATER_OFF_DLY_TIMER_MASK (0xff << 4) +# define CGC_CLK_GATER_OFF_DLY_TIMER(x) ((x) << 4) +# define CGC_UENC_WAIT_AWAKE (1 << 18) #define VCE_CLOCK_GATING_B 0x202fc +#define VCE_CGTT_CLK_OVERRIDE 0x207a0 #define VCE_UENC_CLOCK_GATING 0x207bc +# define CLOCK_ON_DELAY_MASK (0xf << 0) +# define CLOCK_ON_DELAY(x) ((x) << 0) +# define CLOCK_OFF_DELAY_MASK (0xff << 4) +# define CLOCK_OFF_DELAY(x) ((x) << 4) #define VCE_UENC_REG_CLOCK_GATING 0x207c0 #define VCE_SYS_INT_EN 0x21300 # define VCE_SYS_INT_TRAP_INTERRUPT_EN (1 << 3) diff --git a/drivers/gpu/drm/radeon/vce_v2_0.c b/drivers/gpu/drm/radeon/vce_v2_0.c index 4911d1b00e3b..1ac7bb825a1b 100644 --- a/drivers/gpu/drm/radeon/vce_v2_0.c +++ b/drivers/gpu/drm/radeon/vce_v2_0.c @@ -31,6 +31,115 @@ #include "radeon_asic.h" #include "cikd.h" +static void vce_v2_0_set_sw_cg(struct radeon_device *rdev, bool gated) +{ + u32 tmp; + + if (gated) { + tmp = RREG32(VCE_CLOCK_GATING_B); + tmp |= 0xe70000; + WREG32(VCE_CLOCK_GATING_B, tmp); + + tmp = RREG32(VCE_UENC_CLOCK_GATING); + tmp |= 0xff000000; + WREG32(VCE_UENC_CLOCK_GATING, tmp); + + tmp = RREG32(VCE_UENC_REG_CLOCK_GATING); + tmp &= ~0x3fc; + WREG32(VCE_UENC_REG_CLOCK_GATING, tmp); + + WREG32(VCE_CGTT_CLK_OVERRIDE, 0); + } else { + tmp = RREG32(VCE_CLOCK_GATING_B); + tmp |= 0xe7; + tmp &= ~0xe70000; + WREG32(VCE_CLOCK_GATING_B, tmp); + + tmp = RREG32(VCE_UENC_CLOCK_GATING); + tmp |= 0x1fe000; + tmp &= ~0xff000000; + WREG32(VCE_UENC_CLOCK_GATING, tmp); + + tmp = RREG32(VCE_UENC_REG_CLOCK_GATING); + tmp |= 0x3fc; + WREG32(VCE_UENC_REG_CLOCK_GATING, tmp); + } +} + +static void vce_v2_0_set_dyn_cg(struct radeon_device *rdev, bool gated) +{ + u32 orig, tmp; + + tmp = RREG32(VCE_CLOCK_GATING_B); + tmp &= ~0x00060006; + if (gated) { + tmp |= 0xe10000; + } else { + tmp |= 0xe1; + tmp &= ~0xe10000; + } + WREG32(VCE_CLOCK_GATING_B, tmp); + + orig = tmp = RREG32(VCE_UENC_CLOCK_GATING); + tmp &= ~0x1fe000; + tmp &= ~0xff000000; + if (tmp != orig) + WREG32(VCE_UENC_CLOCK_GATING, tmp); + + orig = tmp = RREG32(VCE_UENC_REG_CLOCK_GATING); + tmp &= ~0x3fc; + if (tmp != orig) + WREG32(VCE_UENC_REG_CLOCK_GATING, tmp); + + if (gated) + WREG32(VCE_CGTT_CLK_OVERRIDE, 0); +} + +static void vce_v2_0_disable_cg(struct radeon_device *rdev) +{ + WREG32(VCE_CGTT_CLK_OVERRIDE, 7); +} + +void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable) +{ + bool sw_cg = false; + + if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_VCE_MGCG)) { + if (sw_cg) + vce_v2_0_set_sw_cg(rdev, true); + else + vce_v2_0_set_dyn_cg(rdev, true); + } else { + vce_v2_0_disable_cg(rdev); + + if (sw_cg) + vce_v2_0_set_sw_cg(rdev, false); + else + vce_v2_0_set_dyn_cg(rdev, false); + } +} + +static void vce_v2_0_init_cg(struct radeon_device *rdev) +{ + u32 tmp; + + tmp = RREG32(VCE_CLOCK_GATING_A); + tmp &= ~(CGC_CLK_GATE_DLY_TIMER_MASK | CGC_CLK_GATER_OFF_DLY_TIMER_MASK); + tmp |= (CGC_CLK_GATE_DLY_TIMER(0) | CGC_CLK_GATER_OFF_DLY_TIMER(4)); + tmp |= CGC_UENC_WAIT_AWAKE; + WREG32(VCE_CLOCK_GATING_A, tmp); + + tmp = RREG32(VCE_UENC_CLOCK_GATING); + tmp &= ~(CLOCK_ON_DELAY_MASK | CLOCK_OFF_DELAY_MASK); + tmp |= (CLOCK_ON_DELAY(0) | CLOCK_OFF_DELAY(4)); + WREG32(VCE_UENC_CLOCK_GATING, tmp); + + tmp = RREG32(VCE_CLOCK_GATING_B); + tmp |= 0x10; + tmp &= ~0x100000; + WREG32(VCE_CLOCK_GATING_B, tmp); +} + int vce_v2_0_resume(struct radeon_device *rdev) { uint64_t addr = rdev->vce.gpu_addr; @@ -66,5 +175,7 @@ int vce_v2_0_resume(struct radeon_device *rdev) WREG32_P(VCE_SYS_INT_EN, VCE_SYS_INT_TRAP_INTERRUPT_EN, ~VCE_SYS_INT_TRAP_INTERRUPT_EN); + vce_v2_0_init_cg(rdev); + return 0; } -- cgit From a1d6f97c8cfa7c3554d0391c0b16505d1d97f380 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 6 Sep 2013 12:33:04 -0400 Subject: drm/radeon/cik: enable/disable vce cg when encoding v2 Some of the vce clocks are automatic, others need to be manually enabled. For ease, just disable cg when vce is active. v2: rebased Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/ci_dpm.c | 9 ++++++++- drivers/gpu/drm/radeon/cik.c | 5 +++++ drivers/gpu/drm/radeon/kv_dpm.c | 4 ++++ 3 files changed, 17 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c index 6669d3252f57..cad89a977527 100644 --- a/drivers/gpu/drm/radeon/ci_dpm.c +++ b/drivers/gpu/drm/radeon/ci_dpm.c @@ -172,6 +172,8 @@ extern void si_trim_voltage_table_to_fit_state_table(struct radeon_device *rdev, extern void cik_enter_rlc_safe_mode(struct radeon_device *rdev); extern void cik_exit_rlc_safe_mode(struct radeon_device *rdev); extern int ci_mc_load_microcode(struct radeon_device *rdev); +extern void cik_update_cg(struct radeon_device *rdev, + u32 block, bool enable); static int ci_get_std_voltage_value_sidd(struct radeon_device *rdev, struct atom_voltage_table_entry *voltage_table, @@ -3627,8 +3629,10 @@ static int ci_update_vce_dpm(struct radeon_device *rdev, if (radeon_current_state->evclk != radeon_new_state->evclk) { if (radeon_new_state->evclk) { - pi->smc_state_table.VceBootLevel = ci_get_vce_boot_level(rdev); + /* turn the clocks on when encoding */ + cik_update_cg(rdev, RADEON_CG_BLOCK_VCE, false); + pi->smc_state_table.VceBootLevel = ci_get_vce_boot_level(rdev); tmp = RREG32_SMC(DPM_TABLE_475); tmp &= ~VceBootLevel_MASK; tmp |= VceBootLevel(pi->smc_state_table.VceBootLevel); @@ -3636,6 +3640,9 @@ static int ci_update_vce_dpm(struct radeon_device *rdev, ret = ci_enable_vce_dpm(rdev, true); } else { + /* turn the clocks off when not encoding */ + cik_update_cg(rdev, RADEON_CG_BLOCK_VCE, true); + ret = ci_enable_vce_dpm(rdev, false); } } diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index ecb16b14f049..2b31c3233a5e 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -75,6 +75,7 @@ extern void si_init_uvd_internal_cg(struct radeon_device *rdev); extern int cik_sdma_resume(struct radeon_device *rdev); extern void cik_sdma_enable(struct radeon_device *rdev, bool enable); extern void cik_sdma_fini(struct radeon_device *rdev); +extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable); static void cik_rlc_stop(struct radeon_device *rdev); static void cik_pcie_gen3_enable(struct radeon_device *rdev); static void cik_program_aspm(struct radeon_device *rdev); @@ -6141,6 +6142,10 @@ void cik_update_cg(struct radeon_device *rdev, cik_enable_hdp_mgcg(rdev, enable); cik_enable_hdp_ls(rdev, enable); } + + if (block & RADEON_CG_BLOCK_VCE) { + vce_v2_0_enable_mgcg(rdev, enable); + } } static void cik_init_cg(struct radeon_device *rdev) diff --git a/drivers/gpu/drm/radeon/kv_dpm.c b/drivers/gpu/drm/radeon/kv_dpm.c index 9ee1f28bbd85..16ec9d56a234 100644 --- a/drivers/gpu/drm/radeon/kv_dpm.c +++ b/drivers/gpu/drm/radeon/kv_dpm.c @@ -1412,6 +1412,8 @@ static int kv_update_vce_dpm(struct radeon_device *rdev, if (radeon_new_state->evclk > 0 && radeon_current_state->evclk == 0) { kv_dpm_powergate_vce(rdev, false); + /* turn the clocks on when encoding */ + cik_update_cg(rdev, RADEON_CG_BLOCK_VCE, false); if (pi->caps_stable_p_state) pi->vce_boot_level = table->count - 1; else @@ -1434,6 +1436,8 @@ static int kv_update_vce_dpm(struct radeon_device *rdev, kv_enable_vce_dpm(rdev, true); } else if (radeon_new_state->evclk == 0 && radeon_current_state->evclk > 0) { kv_enable_vce_dpm(rdev, false); + /* turn the clocks off when not encoding */ + cik_update_cg(rdev, RADEON_CG_BLOCK_VCE, true); kv_dpm_powergate_vce(rdev, true); } -- cgit From ff212f25feb44a915ce9c0144faef7fae27a6e61 Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 18 Feb 2014 14:52:33 +0100 Subject: drm/radeon: drop drivers copy of the rptr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In all cases where it really matters we are using the read functions anyway. Signed-off-by: Christian König Reviewed-by: Alex Deucher --- drivers/gpu/drm/radeon/cik.c | 7 ++----- drivers/gpu/drm/radeon/cik_sdma.c | 4 +--- drivers/gpu/drm/radeon/evergreen.c | 4 +--- drivers/gpu/drm/radeon/evergreen_dma.c | 2 +- drivers/gpu/drm/radeon/ni.c | 6 +++--- drivers/gpu/drm/radeon/ni_dma.c | 4 +--- drivers/gpu/drm/radeon/r100.c | 3 +-- drivers/gpu/drm/radeon/r600.c | 4 +--- drivers/gpu/drm/radeon/r600_dma.c | 4 +--- drivers/gpu/drm/radeon/radeon.h | 4 ++-- drivers/gpu/drm/radeon/radeon_ring.c | 27 +++++++++++++-------------- drivers/gpu/drm/radeon/si.c | 8 +------- drivers/gpu/drm/radeon/si_dma.c | 2 +- drivers/gpu/drm/radeon/uvd_v1_0.c | 2 +- drivers/gpu/drm/radeon/vce_v1_0.c | 4 ++-- 15 files changed, 32 insertions(+), 53 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 2b31c3233a5e..835dcfb78916 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -4031,8 +4031,6 @@ static int cik_cp_gfx_resume(struct radeon_device *rdev) WREG32(CP_RB0_BASE, rb_addr); WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr)); - ring->rptr = RREG32(CP_RB0_RPTR); - /* start the ring */ cik_cp_gfx_start(rdev); rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true; @@ -4587,8 +4585,7 @@ static int cik_cp_compute_resume(struct radeon_device *rdev) rdev->ring[idx].wptr = 0; mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr; WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr); - rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR); - mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr; + mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR); /* set the vmid for the queue */ mqd->queue_state.cp_hqd_vmid = 0; @@ -5118,7 +5115,7 @@ bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) if (!(reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))) { - radeon_ring_lockup_update(ring); + radeon_ring_lockup_update(rdev, ring); return false; } /* force CP activities */ diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c index 1ecb3f1070e3..e474760d714c 100644 --- a/drivers/gpu/drm/radeon/cik_sdma.c +++ b/drivers/gpu/drm/radeon/cik_sdma.c @@ -362,8 +362,6 @@ static int cik_sdma_gfx_resume(struct radeon_device *rdev) ring->wptr = 0; WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2); - ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2; - /* enable DMA RB */ WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE); @@ -713,7 +711,7 @@ bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) mask = RADEON_RESET_DMA1; if (!(reset_mask & mask)) { - radeon_ring_lockup_update(ring); + radeon_ring_lockup_update(rdev, ring); return false; } /* force ring activities */ diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index f2b9e21ce4da..d9156be5b9a6 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -2990,8 +2990,6 @@ static int evergreen_cp_resume(struct radeon_device *rdev) WREG32(CP_RB_BASE, ring->gpu_addr >> 8); WREG32(CP_DEBUG, (1 << 27) | (1 << 28)); - ring->rptr = RREG32(CP_RB_RPTR); - evergreen_cp_start(rdev); ring->ready = true; r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, ring); @@ -3952,7 +3950,7 @@ bool evergreen_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *rin if (!(reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))) { - radeon_ring_lockup_update(ring); + radeon_ring_lockup_update(rdev, ring); return false; } /* force CP activities */ diff --git a/drivers/gpu/drm/radeon/evergreen_dma.c b/drivers/gpu/drm/radeon/evergreen_dma.c index a37b54436382..d448961e9ab2 100644 --- a/drivers/gpu/drm/radeon/evergreen_dma.c +++ b/drivers/gpu/drm/radeon/evergreen_dma.c @@ -174,7 +174,7 @@ bool evergreen_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *rin u32 reset_mask = evergreen_gpu_check_soft_reset(rdev); if (!(reset_mask & RADEON_RESET_DMA)) { - radeon_ring_lockup_update(ring); + radeon_ring_lockup_update(rdev, ring); return false; } /* force ring activities */ diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index ea932ac66fc6..7601532b7372 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -1642,8 +1642,8 @@ static int cayman_cp_resume(struct radeon_device *rdev) ring = &rdev->ring[ridx[i]]; WREG32_P(cp_rb_cntl[i], RB_RPTR_WR_ENA, ~RB_RPTR_WR_ENA); - ring->rptr = ring->wptr = 0; - WREG32(cp_rb_rptr[i], ring->rptr); + ring->wptr = 0; + WREG32(cp_rb_rptr[i], 0); WREG32(cp_rb_wptr[i], ring->wptr); mdelay(1); @@ -1917,7 +1917,7 @@ bool cayman_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) if (!(reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))) { - radeon_ring_lockup_update(ring); + radeon_ring_lockup_update(rdev, ring); return false; } /* force CP activities */ diff --git a/drivers/gpu/drm/radeon/ni_dma.c b/drivers/gpu/drm/radeon/ni_dma.c index 7cf96b15377f..95e533c61f83 100644 --- a/drivers/gpu/drm/radeon/ni_dma.c +++ b/drivers/gpu/drm/radeon/ni_dma.c @@ -248,8 +248,6 @@ int cayman_dma_resume(struct radeon_device *rdev) ring->wptr = 0; WREG32(DMA_RB_WPTR + reg_offset, ring->wptr << 2); - ring->rptr = RREG32(DMA_RB_RPTR + reg_offset) >> 2; - WREG32(DMA_RB_CNTL + reg_offset, rb_cntl | DMA_RB_ENABLE); ring->ready = true; @@ -302,7 +300,7 @@ bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) mask = RADEON_RESET_DMA1; if (!(reset_mask & mask)) { - radeon_ring_lockup_update(ring); + radeon_ring_lockup_update(rdev, ring); return false; } /* force ring activities */ diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index ef024ce3f7cc..3a7438163d06 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -1193,7 +1193,6 @@ int r100_cp_init(struct radeon_device *rdev, unsigned ring_size) WREG32(RADEON_CP_RB_CNTL, tmp); udelay(10); - ring->rptr = RREG32(RADEON_CP_RB_RPTR); /* Set cp mode to bus mastering & enable cp*/ WREG32(RADEON_CP_CSQ_MODE, REG_SET(RADEON_INDIRECT2_START, indirect2_start) | @@ -2523,7 +2522,7 @@ bool r100_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) rbbm_status = RREG32(R_000E40_RBBM_STATUS); if (!G_000E40_GUI_ACTIVE(rbbm_status)) { - radeon_ring_lockup_update(ring); + radeon_ring_lockup_update(rdev, ring); return false; } /* force CP activities */ diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index cdbc4171fe73..085e02590dcf 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -1748,7 +1748,7 @@ bool r600_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) if (!(reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))) { - radeon_ring_lockup_update(ring); + radeon_ring_lockup_update(rdev, ring); return false; } /* force CP activities */ @@ -2604,8 +2604,6 @@ int r600_cp_resume(struct radeon_device *rdev) WREG32(CP_RB_BASE, ring->gpu_addr >> 8); WREG32(CP_DEBUG, (1 << 27) | (1 << 28)); - ring->rptr = RREG32(CP_RB_RPTR); - r600_cp_start(rdev); ring->ready = true; r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, ring); diff --git a/drivers/gpu/drm/radeon/r600_dma.c b/drivers/gpu/drm/radeon/r600_dma.c index b2d4c91e6272..6944e1988426 100644 --- a/drivers/gpu/drm/radeon/r600_dma.c +++ b/drivers/gpu/drm/radeon/r600_dma.c @@ -176,8 +176,6 @@ int r600_dma_resume(struct radeon_device *rdev) ring->wptr = 0; WREG32(DMA_RB_WPTR, ring->wptr << 2); - ring->rptr = RREG32(DMA_RB_RPTR) >> 2; - WREG32(DMA_RB_CNTL, rb_cntl | DMA_RB_ENABLE); ring->ready = true; @@ -221,7 +219,7 @@ bool r600_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) u32 reset_mask = r600_gpu_check_soft_reset(rdev); if (!(reset_mask & RADEON_RESET_DMA)) { - radeon_ring_lockup_update(ring); + radeon_ring_lockup_update(rdev, ring); return false; } /* force ring activities */ diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 540624e7491c..e1c4f9c6772b 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -793,7 +793,6 @@ struct radeon_ib { struct radeon_ring { struct radeon_bo *ring_obj; volatile uint32_t *ring; - unsigned rptr; unsigned rptr_offs; unsigned rptr_save_reg; u64 next_rptr_gpu_addr; @@ -958,7 +957,8 @@ void radeon_ring_undo(struct radeon_ring *ring); void radeon_ring_unlock_undo(struct radeon_device *rdev, struct radeon_ring *cp); int radeon_ring_test(struct radeon_device *rdev, struct radeon_ring *cp); void radeon_ring_force_activity(struct radeon_device *rdev, struct radeon_ring *ring); -void radeon_ring_lockup_update(struct radeon_ring *ring); +void radeon_ring_lockup_update(struct radeon_device *rdev, + struct radeon_ring *ring); bool radeon_ring_test_lockup(struct radeon_device *rdev, struct radeon_ring *ring); unsigned radeon_ring_backup(struct radeon_device *rdev, struct radeon_ring *ring, uint32_t **data); diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index d2980b03d1ad..0f78789d085a 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -342,9 +342,10 @@ bool radeon_ring_supports_scratch_reg(struct radeon_device *rdev, */ void radeon_ring_free_size(struct radeon_device *rdev, struct radeon_ring *ring) { - ring->rptr = radeon_ring_get_rptr(rdev, ring); + uint32_t rptr = radeon_ring_get_rptr(rdev, ring); + /* This works because ring_size is a power of 2 */ - ring->ring_free_dw = (ring->rptr + (ring->ring_size / 4)); + ring->ring_free_dw = rptr + (ring->ring_size / 4); ring->ring_free_dw -= ring->wptr; ring->ring_free_dw &= ring->ptr_mask; if (!ring->ring_free_dw) { @@ -376,7 +377,7 @@ int radeon_ring_alloc(struct radeon_device *rdev, struct radeon_ring *ring, unsi /* This is an empty ring update lockup info to avoid * false positive. */ - radeon_ring_lockup_update(ring); + radeon_ring_lockup_update(rdev, ring); } ndw = (ndw + ring->align_mask) & ~ring->align_mask; while (ndw > (ring->ring_free_dw - 1)) { @@ -490,8 +491,7 @@ void radeon_ring_force_activity(struct radeon_device *rdev, struct radeon_ring * { int r; - radeon_ring_free_size(rdev, ring); - if (ring->rptr == ring->wptr) { + if (radeon_ring_get_rptr(rdev, ring) == ring->wptr) { r = radeon_ring_alloc(rdev, ring, 1); if (!r) { radeon_ring_write(ring, ring->nop); @@ -507,9 +507,10 @@ void radeon_ring_force_activity(struct radeon_device *rdev, struct radeon_ring * * * Update the last rptr value and timestamp (all asics). */ -void radeon_ring_lockup_update(struct radeon_ring *ring) +void radeon_ring_lockup_update(struct radeon_device *rdev, + struct radeon_ring *ring) { - ring->last_rptr = ring->rptr; + ring->last_rptr = radeon_ring_get_rptr(rdev, ring); ring->last_activity = jiffies; } @@ -535,18 +536,18 @@ void radeon_ring_lockup_update(struct radeon_ring *ring) **/ bool radeon_ring_test_lockup(struct radeon_device *rdev, struct radeon_ring *ring) { + uint32_t rptr = radeon_ring_get_rptr(rdev, ring); unsigned long cjiffies, elapsed; cjiffies = jiffies; if (!time_after(cjiffies, ring->last_activity)) { /* likely a wrap around */ - radeon_ring_lockup_update(ring); + radeon_ring_lockup_update(rdev, ring); return false; } - ring->rptr = radeon_ring_get_rptr(rdev, ring); - if (ring->rptr != ring->last_rptr) { + if (rptr != ring->last_rptr) { /* CP is still working no lockup */ - radeon_ring_lockup_update(ring); + radeon_ring_lockup_update(rdev, ring); return false; } elapsed = jiffies_to_msecs(cjiffies - ring->last_activity); @@ -709,7 +710,7 @@ int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *ring, unsig if (radeon_debugfs_ring_init(rdev, ring)) { DRM_ERROR("Failed to register debugfs file for rings !\n"); } - radeon_ring_lockup_update(ring); + radeon_ring_lockup_update(rdev, ring); return 0; } @@ -780,8 +781,6 @@ static int radeon_debugfs_ring_info(struct seq_file *m, void *data) seq_printf(m, "driver's copy of the wptr: 0x%08x [%5d]\n", ring->wptr, ring->wptr); - seq_printf(m, "driver's copy of the rptr: 0x%08x [%5d]\n", - ring->rptr, ring->rptr); seq_printf(m, "last semaphore signal addr : 0x%016llx\n", ring->last_semaphore_signal_addr); seq_printf(m, "last semaphore wait addr : 0x%016llx\n", diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 83578324e5d1..b406a48ef202 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -3434,8 +3434,6 @@ static int si_cp_resume(struct radeon_device *rdev) WREG32(CP_RB0_BASE, ring->gpu_addr >> 8); - ring->rptr = RREG32(CP_RB0_RPTR); - /* ring1 - compute only */ /* Set ring buffer size */ ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; @@ -3460,8 +3458,6 @@ static int si_cp_resume(struct radeon_device *rdev) WREG32(CP_RB1_BASE, ring->gpu_addr >> 8); - ring->rptr = RREG32(CP_RB1_RPTR); - /* ring2 - compute only */ /* Set ring buffer size */ ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; @@ -3486,8 +3482,6 @@ static int si_cp_resume(struct radeon_device *rdev) WREG32(CP_RB2_BASE, ring->gpu_addr >> 8); - ring->rptr = RREG32(CP_RB2_RPTR); - /* start the rings */ si_cp_start(rdev); rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true; @@ -3872,7 +3866,7 @@ bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) if (!(reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))) { - radeon_ring_lockup_update(ring); + radeon_ring_lockup_update(rdev, ring); return false; } /* force CP activities */ diff --git a/drivers/gpu/drm/radeon/si_dma.c b/drivers/gpu/drm/radeon/si_dma.c index 59be2cfcbb47..c75f5337f462 100644 --- a/drivers/gpu/drm/radeon/si_dma.c +++ b/drivers/gpu/drm/radeon/si_dma.c @@ -49,7 +49,7 @@ bool si_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) mask = RADEON_RESET_DMA1; if (!(reset_mask & mask)) { - radeon_ring_lockup_update(ring); + radeon_ring_lockup_update(rdev, ring); return false; } /* force ring activities */ diff --git a/drivers/gpu/drm/radeon/uvd_v1_0.c b/drivers/gpu/drm/radeon/uvd_v1_0.c index d4a68af1a279..0a243f0e5d68 100644 --- a/drivers/gpu/drm/radeon/uvd_v1_0.c +++ b/drivers/gpu/drm/radeon/uvd_v1_0.c @@ -262,7 +262,7 @@ int uvd_v1_0_start(struct radeon_device *rdev) /* Initialize the ring buffer's read and write pointers */ WREG32(UVD_RBC_RB_RPTR, 0x0); - ring->wptr = ring->rptr = RREG32(UVD_RBC_RB_RPTR); + ring->wptr = RREG32(UVD_RBC_RB_RPTR); WREG32(UVD_RBC_RB_WPTR, ring->wptr); /* set the ring address */ diff --git a/drivers/gpu/drm/radeon/vce_v1_0.c b/drivers/gpu/drm/radeon/vce_v1_0.c index e0c3534356a1..b44d9c842f7b 100644 --- a/drivers/gpu/drm/radeon/vce_v1_0.c +++ b/drivers/gpu/drm/radeon/vce_v1_0.c @@ -98,14 +98,14 @@ int vce_v1_0_start(struct radeon_device *rdev) WREG32_P(VCE_STATUS, 1, ~1); ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX]; - WREG32(VCE_RB_RPTR, ring->rptr); + WREG32(VCE_RB_RPTR, ring->wptr); WREG32(VCE_RB_WPTR, ring->wptr); WREG32(VCE_RB_BASE_LO, ring->gpu_addr); WREG32(VCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); WREG32(VCE_RB_SIZE, ring->ring_size / 4); ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX]; - WREG32(VCE_RB_RPTR2, ring->rptr); + WREG32(VCE_RB_RPTR2, ring->wptr); WREG32(VCE_RB_WPTR2, ring->wptr); WREG32(VCE_RB_BASE_LO2, ring->gpu_addr); WREG32(VCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); -- cgit From 2d2fe3f9b60fd3cc9a19dcc3ae892a23825da07f Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 18 Feb 2014 12:37:50 +0100 Subject: drm/radeon: drop radeon_ring_force_activity MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The reason for the false positives was fixed quite some time ago and since most engines can still execute NOPs while being locked up it leads to false negatives. Signed-off-by: Christian König Reviewed-by: Alex Deucher --- drivers/gpu/drm/radeon/cik.c | 2 -- drivers/gpu/drm/radeon/cik_sdma.c | 2 -- drivers/gpu/drm/radeon/evergreen.c | 2 -- drivers/gpu/drm/radeon/evergreen_dma.c | 2 -- drivers/gpu/drm/radeon/ni.c | 2 -- drivers/gpu/drm/radeon/ni_dma.c | 2 -- drivers/gpu/drm/radeon/r100.c | 2 -- drivers/gpu/drm/radeon/r600.c | 2 -- drivers/gpu/drm/radeon/r600_dma.c | 2 -- drivers/gpu/drm/radeon/radeon.h | 1 - drivers/gpu/drm/radeon/radeon_ring.c | 38 +--------------------------------- drivers/gpu/drm/radeon/si.c | 2 -- drivers/gpu/drm/radeon/si_dma.c | 2 -- 13 files changed, 1 insertion(+), 60 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 835dcfb78916..92e38b54efb9 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -5118,8 +5118,6 @@ bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) radeon_ring_lockup_update(rdev, ring); return false; } - /* force CP activities */ - radeon_ring_force_activity(rdev, ring); return radeon_ring_test_lockup(rdev, ring); } diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c index e474760d714c..00150ac49cd2 100644 --- a/drivers/gpu/drm/radeon/cik_sdma.c +++ b/drivers/gpu/drm/radeon/cik_sdma.c @@ -714,8 +714,6 @@ bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) radeon_ring_lockup_update(rdev, ring); return false; } - /* force ring activities */ - radeon_ring_force_activity(rdev, ring); return radeon_ring_test_lockup(rdev, ring); } diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index d9156be5b9a6..c78d8ece9504 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -3953,8 +3953,6 @@ bool evergreen_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *rin radeon_ring_lockup_update(rdev, ring); return false; } - /* force CP activities */ - radeon_ring_force_activity(rdev, ring); return radeon_ring_test_lockup(rdev, ring); } diff --git a/drivers/gpu/drm/radeon/evergreen_dma.c b/drivers/gpu/drm/radeon/evergreen_dma.c index d448961e9ab2..287fe966d7de 100644 --- a/drivers/gpu/drm/radeon/evergreen_dma.c +++ b/drivers/gpu/drm/radeon/evergreen_dma.c @@ -177,8 +177,6 @@ bool evergreen_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *rin radeon_ring_lockup_update(rdev, ring); return false; } - /* force ring activities */ - radeon_ring_force_activity(rdev, ring); return radeon_ring_test_lockup(rdev, ring); } diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 7601532b7372..85168ecd216b 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -1920,8 +1920,6 @@ bool cayman_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) radeon_ring_lockup_update(rdev, ring); return false; } - /* force CP activities */ - radeon_ring_force_activity(rdev, ring); return radeon_ring_test_lockup(rdev, ring); } diff --git a/drivers/gpu/drm/radeon/ni_dma.c b/drivers/gpu/drm/radeon/ni_dma.c index 95e533c61f83..6378e0276691 100644 --- a/drivers/gpu/drm/radeon/ni_dma.c +++ b/drivers/gpu/drm/radeon/ni_dma.c @@ -303,8 +303,6 @@ bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) radeon_ring_lockup_update(rdev, ring); return false; } - /* force ring activities */ - radeon_ring_force_activity(rdev, ring); return radeon_ring_test_lockup(rdev, ring); } diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 3a7438163d06..1690a2dc0721 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -2525,8 +2525,6 @@ bool r100_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) radeon_ring_lockup_update(rdev, ring); return false; } - /* force CP activities */ - radeon_ring_force_activity(rdev, ring); return radeon_ring_test_lockup(rdev, ring); } diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 085e02590dcf..0f4ab928a15a 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -1751,8 +1751,6 @@ bool r600_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) radeon_ring_lockup_update(rdev, ring); return false; } - /* force CP activities */ - radeon_ring_force_activity(rdev, ring); return radeon_ring_test_lockup(rdev, ring); } diff --git a/drivers/gpu/drm/radeon/r600_dma.c b/drivers/gpu/drm/radeon/r600_dma.c index 6944e1988426..53fcb28f5578 100644 --- a/drivers/gpu/drm/radeon/r600_dma.c +++ b/drivers/gpu/drm/radeon/r600_dma.c @@ -222,8 +222,6 @@ bool r600_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) radeon_ring_lockup_update(rdev, ring); return false; } - /* force ring activities */ - radeon_ring_force_activity(rdev, ring); return radeon_ring_test_lockup(rdev, ring); } diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index e1c4f9c6772b..a415f8e9d972 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -956,7 +956,6 @@ void radeon_ring_unlock_commit(struct radeon_device *rdev, struct radeon_ring *c void radeon_ring_undo(struct radeon_ring *ring); void radeon_ring_unlock_undo(struct radeon_device *rdev, struct radeon_ring *cp); int radeon_ring_test(struct radeon_device *rdev, struct radeon_ring *cp); -void radeon_ring_force_activity(struct radeon_device *rdev, struct radeon_ring *ring); void radeon_ring_lockup_update(struct radeon_device *rdev, struct radeon_ring *ring); bool radeon_ring_test_lockup(struct radeon_device *rdev, struct radeon_ring *ring); diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 0f78789d085a..668097abb424 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -478,28 +478,6 @@ void radeon_ring_unlock_undo(struct radeon_device *rdev, struct radeon_ring *rin mutex_unlock(&rdev->ring_lock); } -/** - * radeon_ring_force_activity - add some nop packets to the ring - * - * @rdev: radeon_device pointer - * @ring: radeon_ring structure holding ring information - * - * Add some nop packets to the ring to force activity (all asics). - * Used for lockup detection to see if the rptr is advancing. - */ -void radeon_ring_force_activity(struct radeon_device *rdev, struct radeon_ring *ring) -{ - int r; - - if (radeon_ring_get_rptr(rdev, ring) == ring->wptr) { - r = radeon_ring_alloc(rdev, ring, 1); - if (!r) { - radeon_ring_write(ring, ring->nop); - radeon_ring_commit(rdev, ring); - } - } -} - /** * radeon_ring_lockup_update - update lockup variables * @@ -519,21 +497,7 @@ void radeon_ring_lockup_update(struct radeon_device *rdev, * @rdev: radeon device structure * @ring: radeon_ring structure holding ring information * - * We don't need to initialize the lockup tracking information as we will either - * have CP rptr to a different value of jiffies wrap around which will force - * initialization of the lockup tracking informations. - * - * A possible false positivie is if we get call after while and last_cp_rptr == - * the current CP rptr, even if it's unlikely it might happen. To avoid this - * if the elapsed time since last call is bigger than 2 second than we return - * false and update the tracking information. Due to this the caller must call - * radeon_ring_test_lockup several time in less than 2sec for lockup to be reported - * the fencing code should be cautious about that. - * - * Caller should write to the ring to force CP to do something so we don't get - * false positive when CP is just gived nothing to do. - * - **/ + */ bool radeon_ring_test_lockup(struct radeon_device *rdev, struct radeon_ring *ring) { uint32_t rptr = radeon_ring_get_rptr(rdev, ring); diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index b406a48ef202..8008cb8d5324 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -3869,8 +3869,6 @@ bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) radeon_ring_lockup_update(rdev, ring); return false; } - /* force CP activities */ - radeon_ring_force_activity(rdev, ring); return radeon_ring_test_lockup(rdev, ring); } diff --git a/drivers/gpu/drm/radeon/si_dma.c b/drivers/gpu/drm/radeon/si_dma.c index c75f5337f462..cf0fdad8c278 100644 --- a/drivers/gpu/drm/radeon/si_dma.c +++ b/drivers/gpu/drm/radeon/si_dma.c @@ -52,8 +52,6 @@ bool si_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) radeon_ring_lockup_update(rdev, ring); return false; } - /* force ring activities */ - radeon_ring_force_activity(rdev, ring); return radeon_ring_test_lockup(rdev, ring); } -- cgit From 82dc62a31ce3ed7b4eeea9c65a3b69e81e2ea688 Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 18 Feb 2014 15:03:22 +0100 Subject: drm/radeon: cleanup false positive lockup handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Check always when we calculate the free dw, not just the first time. Signed-off-by: Christian König Reviewed-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_ring.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 668097abb424..b14c86d57607 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -349,7 +349,10 @@ void radeon_ring_free_size(struct radeon_device *rdev, struct radeon_ring *ring) ring->ring_free_dw -= ring->wptr; ring->ring_free_dw &= ring->ptr_mask; if (!ring->ring_free_dw) { + /* this is an empty ring */ ring->ring_free_dw = ring->ring_size / 4; + /* update lockup info to avoid false positive */ + radeon_ring_lockup_update(rdev, ring); } } @@ -373,12 +376,6 @@ int radeon_ring_alloc(struct radeon_device *rdev, struct radeon_ring *ring, unsi /* Align requested size with padding so unlock_commit can * pad safely */ radeon_ring_free_size(rdev, ring); - if (ring->ring_free_dw == (ring->ring_size / 4)) { - /* This is an empty ring update lockup info to avoid - * false positive. - */ - radeon_ring_lockup_update(rdev, ring); - } ndw = (ndw + ring->align_mask) & ~ring->align_mask; while (ndw > (ring->ring_free_dw - 1)) { radeon_ring_free_size(rdev, ring); -- cgit From aee4aa73a1af3176cc3eea5833cae596b4b7dd22 Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 18 Feb 2014 15:24:06 +0100 Subject: drm/radeon: improve ring lockup detection code v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use atomics and jiffies_64, so that we don't need to have the ring mutex locked any more and avoid wrap arounds. v2: fix some checkpatch warnings Signed-off-by: Christian König Reviewed-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon.h | 4 ++-- drivers/gpu/drm/radeon/radeon_ring.c | 23 ++++++++++------------- 2 files changed, 12 insertions(+), 15 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 4581df193932..e98fe5c7f77b 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -805,8 +805,8 @@ struct radeon_ring { unsigned ring_size; unsigned ring_free_dw; int count_dw; - unsigned long last_activity; - unsigned last_rptr; + atomic_t last_rptr; + atomic64_t last_activity; uint64_t gpu_addr; uint32_t align_mask; uint32_t ptr_mask; diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index b14c86d57607..4c4810272371 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -485,8 +485,8 @@ void radeon_ring_unlock_undo(struct radeon_device *rdev, struct radeon_ring *rin void radeon_ring_lockup_update(struct radeon_device *rdev, struct radeon_ring *ring) { - ring->last_rptr = radeon_ring_get_rptr(rdev, ring); - ring->last_activity = jiffies; + atomic_set(&ring->last_rptr, radeon_ring_get_rptr(rdev, ring)); + atomic64_set(&ring->last_activity, jiffies_64); } /** @@ -498,22 +498,19 @@ void radeon_ring_lockup_update(struct radeon_device *rdev, bool radeon_ring_test_lockup(struct radeon_device *rdev, struct radeon_ring *ring) { uint32_t rptr = radeon_ring_get_rptr(rdev, ring); - unsigned long cjiffies, elapsed; + uint64_t last = atomic64_read(&ring->last_activity); + uint64_t elapsed; - cjiffies = jiffies; - if (!time_after(cjiffies, ring->last_activity)) { - /* likely a wrap around */ + if (rptr != atomic_read(&ring->last_rptr)) { + /* ring is still working, no lockup */ radeon_ring_lockup_update(rdev, ring); return false; } - if (rptr != ring->last_rptr) { - /* CP is still working no lockup */ - radeon_ring_lockup_update(rdev, ring); - return false; - } - elapsed = jiffies_to_msecs(cjiffies - ring->last_activity); + + elapsed = jiffies_to_msecs(jiffies_64 - last); if (radeon_lockup_timeout && elapsed >= radeon_lockup_timeout) { - dev_err(rdev->dev, "GPU lockup CP stall for more than %lumsec\n", elapsed); + dev_err(rdev->dev, "ring %d stalled for more than %llumsec\n", + ring->idx, elapsed); return true; } /* give a chance to the GPU ... */ -- cgit From 37615527c5669f0c332534a797e5aaa175b6f3cb Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 18 Feb 2014 15:58:31 +0100 Subject: drm/radeon: cleanup the fence ring locking code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We no longer need to take the ring lock while checking for a gpu lockup, so just cleanup the code. Signed-off-by: Christian König Reviewed-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon.h | 5 ++- drivers/gpu/drm/radeon/radeon_device.c | 4 +-- drivers/gpu/drm/radeon/radeon_fence.c | 59 ++++++------------------------- drivers/gpu/drm/radeon/radeon_pm.c | 4 +-- drivers/gpu/drm/radeon/radeon_ring.c | 2 +- drivers/gpu/drm/radeon/radeon_semaphore.c | 12 +++++-- 6 files changed, 25 insertions(+), 61 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index e98fe5c7f77b..1ac3393bab66 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -363,9 +363,8 @@ int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence **fence, i void radeon_fence_process(struct radeon_device *rdev, int ring); bool radeon_fence_signaled(struct radeon_fence *fence); int radeon_fence_wait(struct radeon_fence *fence, bool interruptible); -int radeon_fence_wait_locked(struct radeon_fence *fence); -int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring); -int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring); +int radeon_fence_wait_next(struct radeon_device *rdev, int ring); +int radeon_fence_wait_empty(struct radeon_device *rdev, int ring); int radeon_fence_wait_any(struct radeon_device *rdev, struct radeon_fence **fences, bool intr); diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index b012cbbc3ed5..fa7841b9d7b6 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1445,10 +1445,9 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend, bool fbcon) /* evict vram memory */ radeon_bo_evict_vram(rdev); - mutex_lock(&rdev->ring_lock); /* wait for gpu to finish processing current batch */ for (i = 0; i < RADEON_NUM_RINGS; i++) { - r = radeon_fence_wait_empty_locked(rdev, i); + r = radeon_fence_wait_empty(rdev, i); if (r) { /* delay GPU reset to resume */ force_completion = true; @@ -1457,7 +1456,6 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend, bool fbcon) if (force_completion) { radeon_fence_driver_force_completion(rdev); } - mutex_unlock(&rdev->ring_lock); radeon_save_bios_scratch_regs(rdev); diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index c37cb79a9489..a77b1c13ea43 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -288,7 +288,6 @@ static bool radeon_fence_any_seq_signaled(struct radeon_device *rdev, u64 *seq) * @rdev: radeon device pointer * @target_seq: sequence number(s) we want to wait for * @intr: use interruptable sleep - * @lock_ring: whether the ring should be locked or not * * Wait for the requested sequence number(s) to be written by any ring * (all asics). Sequnce number array is indexed by ring id. @@ -299,7 +298,7 @@ static bool radeon_fence_any_seq_signaled(struct radeon_device *rdev, u64 *seq) * -EDEADLK is returned when a GPU lockup has been detected. */ static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 *target_seq, - bool intr, bool lock_ring) + bool intr) { uint64_t last_seq[RADEON_NUM_RINGS]; bool signaled; @@ -358,9 +357,6 @@ static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 *target_seq, if (i != RADEON_NUM_RINGS) continue; - if (lock_ring) - mutex_lock(&rdev->ring_lock); - for (i = 0; i < RADEON_NUM_RINGS; ++i) { if (!target_seq[i]) continue; @@ -378,14 +374,9 @@ static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 *target_seq, /* remember that we need an reset */ rdev->needs_reset = true; - if (lock_ring) - mutex_unlock(&rdev->ring_lock); wake_up_all(&rdev->fence_queue); return -EDEADLK; } - - if (lock_ring) - mutex_unlock(&rdev->ring_lock); } } return 0; @@ -416,7 +407,7 @@ int radeon_fence_wait(struct radeon_fence *fence, bool intr) if (seq[fence->ring] == RADEON_FENCE_SIGNALED_SEQ) return 0; - r = radeon_fence_wait_seq(fence->rdev, seq, intr, true); + r = radeon_fence_wait_seq(fence->rdev, seq, intr); if (r) return r; @@ -464,7 +455,7 @@ int radeon_fence_wait_any(struct radeon_device *rdev, if (num_rings == 0) return -ENOENT; - r = radeon_fence_wait_seq(rdev, seq, intr, true); + r = radeon_fence_wait_seq(rdev, seq, intr); if (r) { return r; } @@ -472,37 +463,7 @@ int radeon_fence_wait_any(struct radeon_device *rdev, } /** - * radeon_fence_wait_locked - wait for a fence to signal - * - * @fence: radeon fence object - * - * Wait for the requested fence to signal (all asics). - * Returns 0 if the fence has passed, error for all other cases. - */ -int radeon_fence_wait_locked(struct radeon_fence *fence) -{ - uint64_t seq[RADEON_NUM_RINGS] = {}; - int r; - - if (fence == NULL) { - WARN(1, "Querying an invalid fence : %p !\n", fence); - return -EINVAL; - } - - seq[fence->ring] = fence->seq; - if (seq[fence->ring] == RADEON_FENCE_SIGNALED_SEQ) - return 0; - - r = radeon_fence_wait_seq(fence->rdev, seq, false, false); - if (r) - return r; - - fence->seq = RADEON_FENCE_SIGNALED_SEQ; - return 0; -} - -/** - * radeon_fence_wait_next_locked - wait for the next fence to signal + * radeon_fence_wait_next - wait for the next fence to signal * * @rdev: radeon device pointer * @ring: ring index the fence is associated with @@ -511,7 +472,7 @@ int radeon_fence_wait_locked(struct radeon_fence *fence) * Returns 0 if the next fence has passed, error for all other cases. * Caller must hold ring lock. */ -int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring) +int radeon_fence_wait_next(struct radeon_device *rdev, int ring) { uint64_t seq[RADEON_NUM_RINGS] = {}; @@ -521,11 +482,11 @@ int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring) already the last emited fence */ return -ENOENT; } - return radeon_fence_wait_seq(rdev, seq, false, false); + return radeon_fence_wait_seq(rdev, seq, false); } /** - * radeon_fence_wait_empty_locked - wait for all fences to signal + * radeon_fence_wait_empty - wait for all fences to signal * * @rdev: radeon device pointer * @ring: ring index the fence is associated with @@ -534,7 +495,7 @@ int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring) * Returns 0 if the fences have passed, error for all other cases. * Caller must hold ring lock. */ -int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring) +int radeon_fence_wait_empty(struct radeon_device *rdev, int ring) { uint64_t seq[RADEON_NUM_RINGS] = {}; int r; @@ -543,7 +504,7 @@ int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring) if (!seq[ring]) return 0; - r = radeon_fence_wait_seq(rdev, seq, false, false); + r = radeon_fence_wait_seq(rdev, seq, false); if (r) { if (r == -EDEADLK) return -EDEADLK; @@ -794,7 +755,7 @@ void radeon_fence_driver_fini(struct radeon_device *rdev) for (ring = 0; ring < RADEON_NUM_RINGS; ring++) { if (!rdev->fence_drv[ring].initialized) continue; - r = radeon_fence_wait_empty_locked(rdev, ring); + r = radeon_fence_wait_empty(rdev, ring); if (r) { /* no need to trigger GPU reset as we are unloading */ radeon_fence_driver_force_completion(rdev); diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index 4ad9af9fc517..0119af46ede3 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -260,7 +260,7 @@ static void radeon_pm_set_clocks(struct radeon_device *rdev) if (!ring->ready) { continue; } - r = radeon_fence_wait_empty_locked(rdev, i); + r = radeon_fence_wait_empty(rdev, i); if (r) { /* needs a GPU reset dont reset here */ mutex_unlock(&rdev->ring_lock); @@ -896,7 +896,7 @@ force: for (i = 0; i < RADEON_NUM_RINGS; i++) { struct radeon_ring *ring = &rdev->ring[i]; if (ring->ready) - radeon_fence_wait_empty_locked(rdev, i); + radeon_fence_wait_empty(rdev, i); } /* program the new power state */ diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 4c4810272371..fa140119cdb6 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -382,7 +382,7 @@ int radeon_ring_alloc(struct radeon_device *rdev, struct radeon_ring *ring, unsi if (ndw < ring->ring_free_dw) { break; } - r = radeon_fence_wait_next_locked(rdev, ring->idx); + r = radeon_fence_wait_next(rdev, ring->idx); if (r) return r; } diff --git a/drivers/gpu/drm/radeon/radeon_semaphore.c b/drivers/gpu/drm/radeon/radeon_semaphore.c index 9006b32d5eed..6140af6772c7 100644 --- a/drivers/gpu/drm/radeon/radeon_semaphore.c +++ b/drivers/gpu/drm/radeon/radeon_semaphore.c @@ -147,7 +147,9 @@ int radeon_semaphore_sync_rings(struct radeon_device *rdev, if (++count > RADEON_NUM_SYNCS) { /* not enough room, wait manually */ - radeon_fence_wait_locked(fence); + r = radeon_fence_wait(fence, false); + if (r) + return r; continue; } @@ -161,7 +163,9 @@ int radeon_semaphore_sync_rings(struct radeon_device *rdev, if (!radeon_semaphore_emit_signal(rdev, i, semaphore)) { /* signaling wasn't successful wait manually */ radeon_ring_undo(&rdev->ring[i]); - radeon_fence_wait_locked(fence); + r = radeon_fence_wait(fence, false); + if (r) + return r; continue; } @@ -169,7 +173,9 @@ int radeon_semaphore_sync_rings(struct radeon_device *rdev, if (!radeon_semaphore_emit_wait(rdev, ring, semaphore)) { /* waiting wasn't successful wait manually */ radeon_ring_undo(&rdev->ring[i]); - radeon_fence_wait_locked(fence); + r = radeon_fence_wait(fence, false); + if (r) + return r; continue; } -- cgit From 14a9579ddbf15dd1992a9481a4ec80b0b91656d5 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 21 Feb 2014 11:34:35 -0500 Subject: drm/radeon: use variable UVD clocks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that Christian fixed the performance problems with the feedback buffer in mesa, we can enable variable UVD clocks. There are multiple UVD power states associated with different types and numbers of streams. This uses the appropriate state based on that information rather than always using the fastest UVD clocks which saves some power. One possible downside is that this may adversely affect decode benchmarks since these power states target specific playback requirements rather than maximum performance. If that becomes an issue, we can add a sysfs attribute to force the max UVD state. Signed-off-by: Alex Deucher Reviewed-by: Christian König --- drivers/gpu/drm/radeon/radeon_pm.c | 3 --- drivers/gpu/drm/radeon/radeon_uvd.c | 3 +-- 2 files changed, 1 insertion(+), 5 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index 0119af46ede3..ee738a524639 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -943,8 +943,6 @@ void radeon_dpm_enable_uvd(struct radeon_device *rdev, bool enable) if (enable) { mutex_lock(&rdev->pm.mutex); rdev->pm.dpm.uvd_active = true; - /* disable this for now */ -#if 0 if ((rdev->pm.dpm.sd == 1) && (rdev->pm.dpm.hd == 0)) dpm_state = POWER_STATE_TYPE_INTERNAL_UVD_SD; else if ((rdev->pm.dpm.sd == 2) && (rdev->pm.dpm.hd == 0)) @@ -954,7 +952,6 @@ void radeon_dpm_enable_uvd(struct radeon_device *rdev, bool enable) else if ((rdev->pm.dpm.sd == 0) && (rdev->pm.dpm.hd == 2)) dpm_state = POWER_STATE_TYPE_INTERNAL_UVD_HD2; else -#endif dpm_state = POWER_STATE_TYPE_INTERNAL_UVD; rdev->pm.dpm.state = dpm_state; mutex_unlock(&rdev->pm.mutex); diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c index 6781fee1eaad..ceb7b289b745 100644 --- a/drivers/gpu/drm/radeon/radeon_uvd.c +++ b/drivers/gpu/drm/radeon/radeon_uvd.c @@ -805,8 +805,7 @@ void radeon_uvd_note_usage(struct radeon_device *rdev) (rdev->pm.dpm.hd != hd)) { rdev->pm.dpm.sd = sd; rdev->pm.dpm.hd = hd; - /* disable this for now */ - /*streams_changed = true;*/ + streams_changed = true; } } -- cgit From bda72d58a20120aee1f78eb17d7eddb955d6696b Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 2 Mar 2014 00:56:17 +0100 Subject: drm/radeon: add a way to get and set initial buffer domains v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When passing buffers between processes, the receiving process needs to know the original buffer domain, so that it doesn't accidentally move the buffer. v2: reserve the buffer Signed-off-by: Marek Olšák Reviewed-by: Christian König --- drivers/gpu/drm/radeon/radeon.h | 3 +++ drivers/gpu/drm/radeon/radeon_drv.c | 3 ++- drivers/gpu/drm/radeon/radeon_gem.c | 36 ++++++++++++++++++++++++++++++++++ drivers/gpu/drm/radeon/radeon_kms.c | 1 + drivers/gpu/drm/radeon/radeon_object.c | 3 +++ 5 files changed, 45 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 1ac3393bab66..c20d88c93940 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -456,6 +456,7 @@ struct radeon_bo { /* Protected by gem.mutex */ struct list_head list; /* Protected by tbo.reserved */ + u32 initial_domain; u32 placements[3]; struct ttm_placement placement; struct ttm_buffer_object tbo; @@ -2116,6 +2117,8 @@ int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); int radeon_gem_va_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); +int radeon_gem_op_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp); int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); int radeon_gem_set_tiling_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 84a1bbb75f91..4392b7c95ee6 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -79,9 +79,10 @@ * 2.35.0 - Add CIK macrotile mode array query * 2.36.0 - Fix CIK DCE tiling setup * 2.37.0 - allow GS ring setup on r6xx/r7xx + * 2.38.0 - RADEON_GEM_OP (GET_INITIAL_DOMAIN, SET_INITIAL_DOMAIN) */ #define KMS_DRIVER_MAJOR 2 -#define KMS_DRIVER_MINOR 37 +#define KMS_DRIVER_MINOR 38 #define KMS_DRIVER_PATCHLEVEL 0 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags); int radeon_driver_unload_kms(struct drm_device *dev); diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index b96c819024b3..9863ca742494 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -533,6 +533,42 @@ out: return r; } +int radeon_gem_op_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp) +{ + struct drm_radeon_gem_op *args = data; + struct drm_gem_object *gobj; + struct radeon_bo *robj; + int r; + + gobj = drm_gem_object_lookup(dev, filp, args->handle); + if (gobj == NULL) { + return -ENOENT; + } + robj = gem_to_radeon_bo(gobj); + r = radeon_bo_reserve(robj, false); + if (unlikely(r)) + goto out; + + switch (args->op) { + case RADEON_GEM_OP_GET_INITIAL_DOMAIN: + args->value = robj->initial_domain; + break; + case RADEON_GEM_OP_SET_INITIAL_DOMAIN: + robj->initial_domain = args->value & (RADEON_GEM_DOMAIN_VRAM | + RADEON_GEM_DOMAIN_GTT | + RADEON_GEM_DOMAIN_CPU); + break; + default: + r = -EINVAL; + } + + radeon_bo_unreserve(robj); +out: + drm_gem_object_unreference_unlocked(gobj); + return r; +} + int radeon_mode_dumb_create(struct drm_file *file_priv, struct drm_device *dev, struct drm_mode_create_dumb *args) diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index baff98be65b1..0b631ebeeb18 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -814,5 +814,6 @@ const struct drm_ioctl_desc radeon_ioctls_kms[] = { DRM_IOCTL_DEF_DRV(RADEON_GEM_GET_TILING, radeon_gem_get_tiling_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(RADEON_GEM_BUSY, radeon_gem_busy_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(RADEON_GEM_VA, radeon_gem_va_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(RADEON_GEM_OP, radeon_gem_op_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), }; int radeon_max_kms_ioctl = DRM_ARRAY_SIZE(radeon_ioctls_kms); diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 08595cf90b01..dd12bb4025ac 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -145,6 +145,9 @@ int radeon_bo_create(struct radeon_device *rdev, bo->surface_reg = -1; INIT_LIST_HEAD(&bo->list); INIT_LIST_HEAD(&bo->va); + bo->initial_domain = domain & (RADEON_GEM_DOMAIN_VRAM | + RADEON_GEM_DOMAIN_GTT | + RADEON_GEM_DOMAIN_CPU); radeon_ttm_placement_from_domain(bo, domain); /* Kernel allocation are uninterruptible */ down_read(&rdev->pm.mclk_lock); -- cgit From 67e8e3f970ad747d3c854fb40f8ec0cecedd9089 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 2 Mar 2014 00:56:18 +0100 Subject: drm/radeon: track memory statistics about VRAM and GTT usage and buffer moves v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The statistics are: - VRAM usage in bytes - GTT usage in bytes - number of bytes moved by TTM The last one is actually a counter, so you need to sample it before and after command submission and take the difference. This is useful for finding performance bottlenecks. Userspace queries are also added. v2: use atomic64_t Signed-off-by: Marek Olšák Reviewed-by: Christian König --- drivers/gpu/drm/radeon/radeon.h | 4 ++++ drivers/gpu/drm/radeon/radeon_kms.c | 15 ++++++++++++++ drivers/gpu/drm/radeon/radeon_object.c | 36 +++++++++++++++++++++++++++++++++- drivers/gpu/drm/radeon/radeon_object.h | 2 +- drivers/gpu/drm/radeon/radeon_ttm.c | 8 +++++++- 5 files changed, 62 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index c20d88c93940..7bb8fd96f3ce 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -2309,6 +2309,10 @@ struct radeon_device { /* virtual memory */ struct radeon_vm_manager vm_manager; struct mutex gpu_clock_mutex; + /* memory stats */ + atomic64_t vram_usage; + atomic64_t gtt_usage; + atomic64_t num_bytes_moved; /* ACPI interface */ struct radeon_atif atif; struct radeon_atcs atcs; diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index 0b631ebeeb18..806506c03a79 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -486,6 +486,21 @@ static int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file case RADEON_INFO_VCE_FB_VERSION: *value = rdev->vce.fb_version; break; + case RADEON_INFO_NUM_BYTES_MOVED: + value = (uint32_t*)&value64; + value_size = sizeof(uint64_t); + value64 = atomic64_read(&rdev->num_bytes_moved); + break; + case RADEON_INFO_VRAM_USAGE: + value = (uint32_t*)&value64; + value_size = sizeof(uint64_t); + value64 = atomic64_read(&rdev->vram_usage); + break; + case RADEON_INFO_GTT_USAGE: + value = (uint32_t*)&value64; + value_size = sizeof(uint64_t); + value64 = atomic64_read(&rdev->gtt_usage); + break; default: DRM_DEBUG_KMS("Invalid request %d\n", info->request); return -EINVAL; diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index dd12bb4025ac..282d6a248396 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -56,11 +56,36 @@ static void radeon_bo_clear_va(struct radeon_bo *bo) } } +static void radeon_update_memory_usage(struct radeon_bo *bo, + unsigned mem_type, int sign) +{ + struct radeon_device *rdev = bo->rdev; + u64 size = (u64)bo->tbo.num_pages << PAGE_SHIFT; + + switch (mem_type) { + case TTM_PL_TT: + if (sign > 0) + atomic64_add(size, &rdev->gtt_usage); + else + atomic64_sub(size, &rdev->gtt_usage); + break; + case TTM_PL_VRAM: + if (sign > 0) + atomic64_add(size, &rdev->vram_usage); + else + atomic64_sub(size, &rdev->vram_usage); + break; + } +} + static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo) { struct radeon_bo *bo; bo = container_of(tbo, struct radeon_bo, tbo); + + radeon_update_memory_usage(bo, bo->tbo.mem.mem_type, -1); + mutex_lock(&bo->rdev->gem.mutex); list_del_init(&bo->list); mutex_unlock(&bo->rdev->gem.mutex); @@ -567,14 +592,23 @@ int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved, } void radeon_bo_move_notify(struct ttm_buffer_object *bo, - struct ttm_mem_reg *mem) + struct ttm_mem_reg *new_mem) { struct radeon_bo *rbo; + if (!radeon_ttm_bo_is_radeon_bo(bo)) return; + rbo = container_of(bo, struct radeon_bo, tbo); radeon_bo_check_tiling(rbo, 0, 1); radeon_vm_bo_invalidate(rbo->rdev, rbo); + + /* update statistics */ + if (!new_mem) + return; + + radeon_update_memory_usage(rbo, bo->mem.mem_type, -1); + radeon_update_memory_usage(rbo, new_mem->mem_type, 1); } int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo) diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h index 209b11150263..a9a8c11bd80d 100644 --- a/drivers/gpu/drm/radeon/radeon_object.h +++ b/drivers/gpu/drm/radeon/radeon_object.h @@ -151,7 +151,7 @@ extern void radeon_bo_get_tiling_flags(struct radeon_bo *bo, extern int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved, bool force_drop); extern void radeon_bo_move_notify(struct ttm_buffer_object *bo, - struct ttm_mem_reg *mem); + struct ttm_mem_reg *new_mem); extern int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo); extern int radeon_bo_get_surface_reg(struct radeon_bo *bo); diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 77f5b0c3edb8..60dfce889ecf 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -406,8 +406,14 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, if (r) { memcpy: r = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem); + if (r) { + return r; + } } - return r; + + /* update statistics */ + atomic64_add((u64)bo->num_pages << PAGE_SHIFT, &rdev->num_bytes_moved); + return 0; } static int radeon_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem) -- cgit From 0bc490a8d9e0f2f54ec8f9d09a367db66605ff40 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 2 Mar 2014 00:56:19 +0100 Subject: drm/radeon: deduplicate code in radeon_gem_busy_ioctl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Marek Olšák Reviewed-by: Christian König --- drivers/gpu/drm/radeon/radeon_gem.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index 9863ca742494..d09650c1d720 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -344,18 +344,7 @@ int radeon_gem_busy_ioctl(struct drm_device *dev, void *data, } robj = gem_to_radeon_bo(gobj); r = radeon_bo_wait(robj, &cur_placement, true); - switch (cur_placement) { - case TTM_PL_VRAM: - args->domain = RADEON_GEM_DOMAIN_VRAM; - break; - case TTM_PL_TT: - args->domain = RADEON_GEM_DOMAIN_GTT; - break; - case TTM_PL_SYSTEM: - args->domain = RADEON_GEM_DOMAIN_CPU; - default: - break; - } + args->domain = radeon_mem_type_to_domain(cur_placement); drm_gem_object_unreference_unlocked(gobj); r = radeon_gem_handle_lockup(rdev, r); return r; -- cgit From 4330441a745ea0f1fd881438a0bbdfedda65f74a Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 2 Mar 2014 00:56:20 +0100 Subject: drm/radeon: add buffers to the LRU list from smallest to largest MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Marek Olšák Reviewed-by: Christian König --- drivers/gpu/drm/radeon/radeon_cs.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index f28a8d82fa19..d49a3f705e49 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -24,6 +24,7 @@ * Authors: * Jerome Glisse */ +#include #include #include #include "radeon_reg.h" @@ -290,6 +291,16 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) return 0; } +static int cmp_size_smaller_first(void *priv, struct list_head *a, + struct list_head *b) +{ + struct radeon_bo_list *la = list_entry(a, struct radeon_bo_list, tv.head); + struct radeon_bo_list *lb = list_entry(b, struct radeon_bo_list, tv.head); + + /* Sort A before B if A is smaller. */ + return (int)la->bo->tbo.num_pages - (int)lb->bo->tbo.num_pages; +} + /** * cs_parser_fini() - clean parser states * @parser: parser structure holding parsing context. @@ -303,6 +314,18 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bo unsigned i; if (!error) { + /* Sort the buffer list from the smallest to largest buffer, + * which affects the order of buffers in the LRU list. + * This assures that the smallest buffers are added first + * to the LRU list, so they are likely to be later evicted + * first, instead of large buffers whose eviction is more + * expensive. + * + * This slightly lowers the number of bytes moved by TTM + * per frame under memory pressure. + */ + list_sort(NULL, &parser->validated, cmp_size_smaller_first); + ttm_eu_fence_buffer_objects(&parser->ticket, &parser->validated, parser->ib.fence); -- cgit From c9b76548899cde2e729e3bca015d7e78ec5baad7 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 2 Mar 2014 00:56:21 +0100 Subject: drm/radeon: validate relocations in the order determined by userspace v3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Userspace should set the first 4 bits of drm_radeon_cs_reloc::flags to a number from 0 to 15. The higher the number, the higher the priority, which means a buffer with a higher number will be validated sooner. The old behavior is preserved: Buffers used for write are prioritized over read-only buffers if the userspace doesn't set the number. v2: add buffers to buckets directly, then concatenate them v3: use a stable sort Signed-off-by: Marek Olšák Reviewed-by: Christian König --- drivers/gpu/drm/radeon/radeon.h | 1 - drivers/gpu/drm/radeon/radeon_cs.c | 64 ++++++++++++++++++++++++++++++++-- drivers/gpu/drm/radeon/radeon_object.c | 10 ------ drivers/gpu/drm/radeon/radeon_object.h | 2 -- 4 files changed, 61 insertions(+), 16 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 7bb8fd96f3ce..efad56705e32 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -483,7 +483,6 @@ struct radeon_bo_list { struct ttm_validate_buffer tv; struct radeon_bo *bo; uint64_t gpu_offset; - bool written; unsigned domain; unsigned alt_domain; u32 tiling_flags; diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index d49a3f705e49..07e165128dbf 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -31,10 +31,52 @@ #include "radeon.h" #include "radeon_trace.h" +#define RADEON_CS_MAX_PRIORITY 32u +#define RADEON_CS_NUM_BUCKETS (RADEON_CS_MAX_PRIORITY + 1) + +/* This is based on the bucket sort with O(n) time complexity. + * An item with priority "i" is added to bucket[i]. The lists are then + * concatenated in descending order. + */ +struct radeon_cs_buckets { + struct list_head bucket[RADEON_CS_NUM_BUCKETS]; +}; + +static void radeon_cs_buckets_init(struct radeon_cs_buckets *b) +{ + unsigned i; + + for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++) + INIT_LIST_HEAD(&b->bucket[i]); +} + +static void radeon_cs_buckets_add(struct radeon_cs_buckets *b, + struct list_head *item, unsigned priority) +{ + /* Since buffers which appear sooner in the relocation list are + * likely to be used more often than buffers which appear later + * in the list, the sort mustn't change the ordering of buffers + * with the same priority, i.e. it must be stable. + */ + list_add_tail(item, &b->bucket[min(priority, RADEON_CS_MAX_PRIORITY)]); +} + +static void radeon_cs_buckets_get_list(struct radeon_cs_buckets *b, + struct list_head *out_list) +{ + unsigned i; + + /* Connect the sorted buckets in the output list. */ + for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++) { + list_splice(&b->bucket[i], out_list); + } +} + static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) { struct drm_device *ddev = p->rdev->ddev; struct radeon_cs_chunk *chunk; + struct radeon_cs_buckets buckets; unsigned i, j; bool duplicate; @@ -53,8 +95,12 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) if (p->relocs == NULL) { return -ENOMEM; } + + radeon_cs_buckets_init(&buckets); + for (i = 0; i < p->nrelocs; i++) { struct drm_radeon_cs_reloc *r; + unsigned priority; duplicate = false; r = (struct drm_radeon_cs_reloc *)&chunk->kdata[i*4]; @@ -80,7 +126,14 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) p->relocs_ptr[i] = &p->relocs[i]; p->relocs[i].robj = gem_to_radeon_bo(p->relocs[i].gobj); p->relocs[i].lobj.bo = p->relocs[i].robj; - p->relocs[i].lobj.written = !!r->write_domain; + + /* The userspace buffer priorities are from 0 to 15. A higher + * number means the buffer is more important. + * Also, the buffers used for write have a higher priority than + * the buffers used for read only, which doubles the range + * to 0 to 31. 32 is reserved for the kernel driver. + */ + priority = (r->flags & 0xf) * 2 + !!r->write_domain; /* the first reloc of an UVD job is the msg and that must be in VRAM, also but everything into VRAM on AGP cards to avoid @@ -94,6 +147,8 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) p->relocs[i].lobj.alt_domain = RADEON_GEM_DOMAIN_VRAM; + /* prioritize this over any other relocation */ + priority = RADEON_CS_MAX_PRIORITY; } else { uint32_t domain = r->write_domain ? r->write_domain : r->read_domains; @@ -107,9 +162,12 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) p->relocs[i].lobj.tv.bo = &p->relocs[i].robj->tbo; p->relocs[i].handle = r->handle; - radeon_bo_list_add_object(&p->relocs[i].lobj, - &p->validated); + radeon_cs_buckets_add(&buckets, &p->relocs[i].lobj.tv.head, + priority); } + + radeon_cs_buckets_get_list(&buckets, &p->validated); + return radeon_bo_list_validate(&p->ticket, &p->validated, p->ring); } diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 282d6a248396..8399fe021769 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -366,16 +366,6 @@ void radeon_bo_fini(struct radeon_device *rdev) arch_phys_wc_del(rdev->mc.vram_mtrr); } -void radeon_bo_list_add_object(struct radeon_bo_list *lobj, - struct list_head *head) -{ - if (lobj->written) { - list_add(&lobj->tv.head, head); - } else { - list_add_tail(&lobj->tv.head, head); - } -} - int radeon_bo_list_validate(struct ww_acquire_ctx *ticket, struct list_head *head, int ring) { diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h index a9a8c11bd80d..6c3ca9edc2f4 100644 --- a/drivers/gpu/drm/radeon/radeon_object.h +++ b/drivers/gpu/drm/radeon/radeon_object.h @@ -138,8 +138,6 @@ extern int radeon_bo_evict_vram(struct radeon_device *rdev); extern void radeon_bo_force_delete(struct radeon_device *rdev); extern int radeon_bo_init(struct radeon_device *rdev); extern void radeon_bo_fini(struct radeon_device *rdev); -extern void radeon_bo_list_add_object(struct radeon_bo_list *lobj, - struct list_head *head); extern int radeon_bo_list_validate(struct ww_acquire_ctx *ticket, struct list_head *head, int ring); extern int radeon_bo_fbdev_mmap(struct radeon_bo *bo, -- cgit From 19dff56a5f4ba1f3a6e28282415a95a48c27bccf Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 2 Mar 2014 00:56:22 +0100 Subject: drm/radeon: limit how much memory TTM can move per IB according to VRAM usage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Marek Olšák Reviewed-by: Christian König --- drivers/gpu/drm/radeon/radeon_cs.c | 2 +- drivers/gpu/drm/radeon/radeon_object.c | 88 +++++++++++++++++++++++++++++++--- drivers/gpu/drm/radeon/radeon_object.h | 3 +- 3 files changed, 85 insertions(+), 8 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 07e165128dbf..5abae403ea4f 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -168,7 +168,7 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) radeon_cs_buckets_get_list(&buckets, &p->validated); - return radeon_bo_list_validate(&p->ticket, &p->validated, p->ring); + return radeon_bo_list_validate(p->rdev, &p->ticket, &p->validated, p->ring); } static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority) diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 8399fe021769..ed03f2d15853 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -366,29 +366,105 @@ void radeon_bo_fini(struct radeon_device *rdev) arch_phys_wc_del(rdev->mc.vram_mtrr); } -int radeon_bo_list_validate(struct ww_acquire_ctx *ticket, +/* Returns how many bytes TTM can move per IB. + */ +static u64 radeon_bo_get_threshold_for_moves(struct radeon_device *rdev) +{ + u64 real_vram_size = rdev->mc.real_vram_size; + u64 vram_usage = atomic64_read(&rdev->vram_usage); + + /* This function is based on the current VRAM usage. + * + * - If all of VRAM is free, allow relocating the number of bytes that + * is equal to 1/4 of the size of VRAM for this IB. + + * - If more than one half of VRAM is occupied, only allow relocating + * 1 MB of data for this IB. + * + * - From 0 to one half of used VRAM, the threshold decreases + * linearly. + * __________________ + * 1/4 of -|\ | + * VRAM | \ | + * | \ | + * | \ | + * | \ | + * | \ | + * | \ | + * | \________|1 MB + * |----------------| + * VRAM 0 % 100 % + * used used + * + * Note: It's a threshold, not a limit. The threshold must be crossed + * for buffer relocations to stop, so any buffer of an arbitrary size + * can be moved as long as the threshold isn't crossed before + * the relocation takes place. We don't want to disable buffer + * relocations completely. + * + * The idea is that buffers should be placed in VRAM at creation time + * and TTM should only do a minimum number of relocations during + * command submission. In practice, you need to submit at least + * a dozen IBs to move all buffers to VRAM if they are in GTT. + * + * Also, things can get pretty crazy under memory pressure and actual + * VRAM usage can change a lot, so playing safe even at 50% does + * consistently increase performance. + */ + + u64 half_vram = real_vram_size >> 1; + u64 half_free_vram = vram_usage >= half_vram ? 0 : half_vram - vram_usage; + u64 bytes_moved_threshold = half_free_vram >> 1; + return max(bytes_moved_threshold, 1024*1024ull); +} + +int radeon_bo_list_validate(struct radeon_device *rdev, + struct ww_acquire_ctx *ticket, struct list_head *head, int ring) { struct radeon_bo_list *lobj; struct radeon_bo *bo; - u32 domain; int r; + u64 bytes_moved = 0, initial_bytes_moved; + u64 bytes_moved_threshold = radeon_bo_get_threshold_for_moves(rdev); r = ttm_eu_reserve_buffers(ticket, head); if (unlikely(r != 0)) { return r; } + list_for_each_entry(lobj, head, tv.head) { bo = lobj->bo; if (!bo->pin_count) { - domain = lobj->domain; - + u32 domain = lobj->domain; + u32 current_domain = + radeon_mem_type_to_domain(bo->tbo.mem.mem_type); + + /* Check if this buffer will be moved and don't move it + * if we have moved too many buffers for this IB already. + * + * Note that this allows moving at least one buffer of + * any size, because it doesn't take the current "bo" + * into account. We don't want to disallow buffer moves + * completely. + */ + if (current_domain != RADEON_GEM_DOMAIN_CPU && + (domain & current_domain) == 0 && /* will be moved */ + bytes_moved > bytes_moved_threshold) { + /* don't move it */ + domain = current_domain; + } + retry: radeon_ttm_placement_from_domain(bo, domain); if (ring == R600_RING_TYPE_UVD_INDEX) radeon_uvd_force_into_uvd_segment(bo); - r = ttm_bo_validate(&bo->tbo, &bo->placement, - true, false); + + initial_bytes_moved = atomic64_read(&rdev->num_bytes_moved); + r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); + bytes_moved += atomic64_read(&rdev->num_bytes_moved) - + initial_bytes_moved; + if (unlikely(r)) { if (r != -ERESTARTSYS && domain != lobj->alt_domain) { domain = lobj->alt_domain; diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h index 6c3ca9edc2f4..7dff64d1f592 100644 --- a/drivers/gpu/drm/radeon/radeon_object.h +++ b/drivers/gpu/drm/radeon/radeon_object.h @@ -138,7 +138,8 @@ extern int radeon_bo_evict_vram(struct radeon_device *rdev); extern void radeon_bo_force_delete(struct radeon_device *rdev); extern int radeon_bo_init(struct radeon_device *rdev); extern void radeon_bo_fini(struct radeon_device *rdev); -extern int radeon_bo_list_validate(struct ww_acquire_ctx *ticket, +extern int radeon_bo_list_validate(struct radeon_device *rdev, + struct ww_acquire_ctx *ticket, struct list_head *head, int ring); extern int radeon_bo_fbdev_mmap(struct radeon_bo *bo, struct vm_area_struct *vma); -- cgit From f1e3dc708aaadb960b15ee40029f611475f14027 Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 20 Feb 2014 17:34:06 +0100 Subject: drm/radeon: fix missing bo reservation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Christian König --- drivers/gpu/drm/radeon/radeon_kms.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index 806506c03a79..7a810d0796a0 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -561,6 +561,10 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) radeon_vm_init(rdev, &fpriv->vm); + r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false); + if (r) + return r; + /* map the ib pool buffer read only into * virtual address space */ bo_va = radeon_vm_bo_add(rdev, &fpriv->vm, @@ -568,6 +572,8 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) r = radeon_vm_bo_set_addr(rdev, bo_va, RADEON_VA_IB_OFFSET, RADEON_VM_PAGE_READABLE | RADEON_VM_PAGE_SNOOPED); + + radeon_bo_unreserve(rdev->ring_tmp_bo.bo); if (r) { radeon_vm_fini(rdev, &fpriv->vm); kfree(fpriv); -- cgit From b03b4e4b6eb0563f2dc83c482b57b90b637ab81c Mon Sep 17 00:00:00 2001 From: Christian König Date: Fri, 28 Feb 2014 13:16:32 +0100 Subject: drm/radeon: fix VCE suspend/resume MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Christian König --- drivers/gpu/drm/radeon/radeon.h | 1 - drivers/gpu/drm/radeon/radeon_vce.c | 63 ++++++++++++++++++++----------------- 2 files changed, 34 insertions(+), 30 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index efad56705e32..40ab8a28c998 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1634,7 +1634,6 @@ int radeon_uvd_send_upll_ctlreq(struct radeon_device *rdev, struct radeon_vce { struct radeon_bo *vcpu_bo; - void *cpu_addr; uint64_t gpu_addr; unsigned fw_version; unsigned fb_version; diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c index d130432e313a..39ec7d8f33ab 100644 --- a/drivers/gpu/drm/radeon/radeon_vce.c +++ b/drivers/gpu/drm/radeon/radeon_vce.c @@ -119,7 +119,7 @@ int radeon_vce_init(struct radeon_device *rdev) if (rdev->vce.fw_version != ((40 << 24) | (2 << 16) | (2 << 8))) return -EINVAL; - /* load firmware into VRAM */ + /* allocate firmware, stack and heap BO */ size = RADEON_GPU_PAGE_ALIGN(rdev->vce_fw->size) + RADEON_VCE_STACK_SIZE + RADEON_VCE_HEAP_SIZE; @@ -130,16 +130,21 @@ int radeon_vce_init(struct radeon_device *rdev) return r; } - r = radeon_vce_resume(rdev); - if (r) + r = radeon_bo_reserve(rdev->vce.vcpu_bo, false); + if (r) { + radeon_bo_unref(&rdev->vce.vcpu_bo); + dev_err(rdev->dev, "(%d) failed to reserve VCE bo\n", r); return r; + } - memset(rdev->vce.cpu_addr, 0, size); - memcpy(rdev->vce.cpu_addr, rdev->vce_fw->data, rdev->vce_fw->size); - - r = radeon_vce_suspend(rdev); - if (r) + r = radeon_bo_pin(rdev->vce.vcpu_bo, RADEON_GEM_DOMAIN_VRAM, + &rdev->vce.gpu_addr); + radeon_bo_unreserve(rdev->vce.vcpu_bo); + if (r) { + radeon_bo_unref(&rdev->vce.vcpu_bo); + dev_err(rdev->dev, "(%d) VCE bo pin failed\n", r); return r; + } for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i) { atomic_set(&rdev->vce.handles[i], 0); @@ -158,8 +163,12 @@ int radeon_vce_init(struct radeon_device *rdev) */ void radeon_vce_fini(struct radeon_device *rdev) { - radeon_vce_suspend(rdev); + if (rdev->vce.vcpu_bo == NULL) + return; + radeon_bo_unref(&rdev->vce.vcpu_bo); + + release_firmware(rdev->vce_fw); } /** @@ -167,22 +176,23 @@ void radeon_vce_fini(struct radeon_device *rdev) * * @rdev: radeon_device pointer * - * TODO: Test VCE suspend/resume */ int radeon_vce_suspend(struct radeon_device *rdev) { - int r; + int i; if (rdev->vce.vcpu_bo == NULL) return 0; - r = radeon_bo_reserve(rdev->vce.vcpu_bo, false); - if (!r) { - radeon_bo_kunmap(rdev->vce.vcpu_bo); - radeon_bo_unpin(rdev->vce.vcpu_bo); - radeon_bo_unreserve(rdev->vce.vcpu_bo); - } - return r; + for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i) + if (atomic_read(&rdev->vce.handles[i])) + break; + + if (i == RADEON_MAX_VCE_HANDLES) + return 0; + + /* TODO: suspending running encoding sessions isn't supported */ + return -EINVAL; } /** @@ -190,10 +200,10 @@ int radeon_vce_suspend(struct radeon_device *rdev) * * @rdev: radeon_device pointer * - * TODO: Test VCE suspend/resume */ int radeon_vce_resume(struct radeon_device *rdev) { + void *cpu_addr; int r; if (rdev->vce.vcpu_bo == NULL) @@ -201,26 +211,21 @@ int radeon_vce_resume(struct radeon_device *rdev) r = radeon_bo_reserve(rdev->vce.vcpu_bo, false); if (r) { - radeon_bo_unref(&rdev->vce.vcpu_bo); dev_err(rdev->dev, "(%d) failed to reserve VCE bo\n", r); return r; } - r = radeon_bo_pin(rdev->vce.vcpu_bo, RADEON_GEM_DOMAIN_VRAM, - &rdev->vce.gpu_addr); + r = radeon_bo_kmap(rdev->vce.vcpu_bo, &cpu_addr); if (r) { radeon_bo_unreserve(rdev->vce.vcpu_bo); - radeon_bo_unref(&rdev->vce.vcpu_bo); - dev_err(rdev->dev, "(%d) VCE bo pin failed\n", r); - return r; - } - - r = radeon_bo_kmap(rdev->vce.vcpu_bo, &rdev->vce.cpu_addr); - if (r) { dev_err(rdev->dev, "(%d) VCE map failed\n", r); return r; } + memcpy(cpu_addr, rdev->vce_fw->data, rdev->vce_fw->size); + + radeon_bo_kunmap(rdev->vce.vcpu_bo); + radeon_bo_unreserve(rdev->vce.vcpu_bo); return 0; -- cgit From 2280ab57b6edc8581497d5e101c4694faf839c3e Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 20 Feb 2014 10:25:15 +0100 Subject: drm/radeon: separate gart and vm functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both are complex enough on their own. Signed-off-by: Christian König Reviewed-by: Alex Deucher --- drivers/gpu/drm/radeon/Makefile | 2 +- drivers/gpu/drm/radeon/radeon_gart.c | 958 ---------------------------------- drivers/gpu/drm/radeon/radeon_vm.c | 981 +++++++++++++++++++++++++++++++++++ 3 files changed, 982 insertions(+), 959 deletions(-) create mode 100644 drivers/gpu/drm/radeon/radeon_vm.c (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile index ed60caa32518..09433534dc47 100644 --- a/drivers/gpu/drm/radeon/Makefile +++ b/drivers/gpu/drm/radeon/Makefile @@ -80,7 +80,7 @@ radeon-y += radeon_device.o radeon_asic.o radeon_kms.o \ r600_dpm.o rs780_dpm.o rv6xx_dpm.o rv770_dpm.o rv730_dpm.o rv740_dpm.o \ rv770_smc.o cypress_dpm.o btc_dpm.o sumo_dpm.o sumo_smc.o trinity_dpm.o \ trinity_smc.o ni_dpm.o si_smc.o si_dpm.o kv_smc.o kv_dpm.o ci_smc.o \ - ci_dpm.o dce6_afmt.o + ci_dpm.o dce6_afmt.o radeon_vm.o # add async DMA block radeon-y += \ diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index a8f9b463bf2a..2e723651069b 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -28,8 +28,6 @@ #include #include #include "radeon.h" -#include "radeon_reg.h" -#include "radeon_trace.h" /* * GART @@ -394,959 +392,3 @@ void radeon_gart_fini(struct radeon_device *rdev) radeon_dummy_page_fini(rdev); } - -/* - * GPUVM - * GPUVM is similar to the legacy gart on older asics, however - * rather than there being a single global gart table - * for the entire GPU, there are multiple VM page tables active - * at any given time. The VM page tables can contain a mix - * vram pages and system memory pages and system memory pages - * can be mapped as snooped (cached system pages) or unsnooped - * (uncached system pages). - * Each VM has an ID associated with it and there is a page table - * associated with each VMID. When execting a command buffer, - * the kernel tells the the ring what VMID to use for that command - * buffer. VMIDs are allocated dynamically as commands are submitted. - * The userspace drivers maintain their own address space and the kernel - * sets up their pages tables accordingly when they submit their - * command buffers and a VMID is assigned. - * Cayman/Trinity support up to 8 active VMs at any given time; - * SI supports 16. - */ - -/* - * vm helpers - * - * TODO bind a default page at vm initialization for default address - */ - -/** - * radeon_vm_num_pde - return the number of page directory entries - * - * @rdev: radeon_device pointer - * - * Calculate the number of page directory entries (cayman+). - */ -static unsigned radeon_vm_num_pdes(struct radeon_device *rdev) -{ - return rdev->vm_manager.max_pfn >> RADEON_VM_BLOCK_SIZE; -} - -/** - * radeon_vm_directory_size - returns the size of the page directory in bytes - * - * @rdev: radeon_device pointer - * - * Calculate the size of the page directory in bytes (cayman+). - */ -static unsigned radeon_vm_directory_size(struct radeon_device *rdev) -{ - return RADEON_GPU_PAGE_ALIGN(radeon_vm_num_pdes(rdev) * 8); -} - -/** - * radeon_vm_manager_init - init the vm manager - * - * @rdev: radeon_device pointer - * - * Init the vm manager (cayman+). - * Returns 0 for success, error for failure. - */ -int radeon_vm_manager_init(struct radeon_device *rdev) -{ - struct radeon_vm *vm; - struct radeon_bo_va *bo_va; - int r; - unsigned size; - - if (!rdev->vm_manager.enabled) { - /* allocate enough for 2 full VM pts */ - size = radeon_vm_directory_size(rdev); - size += rdev->vm_manager.max_pfn * 8; - size *= 2; - r = radeon_sa_bo_manager_init(rdev, &rdev->vm_manager.sa_manager, - RADEON_GPU_PAGE_ALIGN(size), - RADEON_VM_PTB_ALIGN_SIZE, - RADEON_GEM_DOMAIN_VRAM); - if (r) { - dev_err(rdev->dev, "failed to allocate vm bo (%dKB)\n", - (rdev->vm_manager.max_pfn * 8) >> 10); - return r; - } - - r = radeon_asic_vm_init(rdev); - if (r) - return r; - - rdev->vm_manager.enabled = true; - - r = radeon_sa_bo_manager_start(rdev, &rdev->vm_manager.sa_manager); - if (r) - return r; - } - - /* restore page table */ - list_for_each_entry(vm, &rdev->vm_manager.lru_vm, list) { - if (vm->page_directory == NULL) - continue; - - list_for_each_entry(bo_va, &vm->va, vm_list) { - bo_va->valid = false; - } - } - return 0; -} - -/** - * radeon_vm_free_pt - free the page table for a specific vm - * - * @rdev: radeon_device pointer - * @vm: vm to unbind - * - * Free the page table of a specific vm (cayman+). - * - * Global and local mutex must be lock! - */ -static void radeon_vm_free_pt(struct radeon_device *rdev, - struct radeon_vm *vm) -{ - struct radeon_bo_va *bo_va; - int i; - - if (!vm->page_directory) - return; - - list_del_init(&vm->list); - radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); - - list_for_each_entry(bo_va, &vm->va, vm_list) { - bo_va->valid = false; - } - - if (vm->page_tables == NULL) - return; - - for (i = 0; i < radeon_vm_num_pdes(rdev); i++) - radeon_sa_bo_free(rdev, &vm->page_tables[i], vm->fence); - - kfree(vm->page_tables); -} - -/** - * radeon_vm_manager_fini - tear down the vm manager - * - * @rdev: radeon_device pointer - * - * Tear down the VM manager (cayman+). - */ -void radeon_vm_manager_fini(struct radeon_device *rdev) -{ - struct radeon_vm *vm, *tmp; - int i; - - if (!rdev->vm_manager.enabled) - return; - - mutex_lock(&rdev->vm_manager.lock); - /* free all allocated page tables */ - list_for_each_entry_safe(vm, tmp, &rdev->vm_manager.lru_vm, list) { - mutex_lock(&vm->mutex); - radeon_vm_free_pt(rdev, vm); - mutex_unlock(&vm->mutex); - } - for (i = 0; i < RADEON_NUM_VM; ++i) { - radeon_fence_unref(&rdev->vm_manager.active[i]); - } - radeon_asic_vm_fini(rdev); - mutex_unlock(&rdev->vm_manager.lock); - - radeon_sa_bo_manager_suspend(rdev, &rdev->vm_manager.sa_manager); - radeon_sa_bo_manager_fini(rdev, &rdev->vm_manager.sa_manager); - rdev->vm_manager.enabled = false; -} - -/** - * radeon_vm_evict - evict page table to make room for new one - * - * @rdev: radeon_device pointer - * @vm: VM we want to allocate something for - * - * Evict a VM from the lru, making sure that it isn't @vm. (cayman+). - * Returns 0 for success, -ENOMEM for failure. - * - * Global and local mutex must be locked! - */ -static int radeon_vm_evict(struct radeon_device *rdev, struct radeon_vm *vm) -{ - struct radeon_vm *vm_evict; - - if (list_empty(&rdev->vm_manager.lru_vm)) - return -ENOMEM; - - vm_evict = list_first_entry(&rdev->vm_manager.lru_vm, - struct radeon_vm, list); - if (vm_evict == vm) - return -ENOMEM; - - mutex_lock(&vm_evict->mutex); - radeon_vm_free_pt(rdev, vm_evict); - mutex_unlock(&vm_evict->mutex); - return 0; -} - -/** - * radeon_vm_alloc_pt - allocates a page table for a VM - * - * @rdev: radeon_device pointer - * @vm: vm to bind - * - * Allocate a page table for the requested vm (cayman+). - * Returns 0 for success, error for failure. - * - * Global and local mutex must be locked! - */ -int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm) -{ - unsigned pd_size, pd_entries, pts_size; - struct radeon_ib ib; - int r; - - if (vm == NULL) { - return -EINVAL; - } - - if (vm->page_directory != NULL) { - return 0; - } - - pd_size = radeon_vm_directory_size(rdev); - pd_entries = radeon_vm_num_pdes(rdev); - -retry: - r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, - &vm->page_directory, pd_size, - RADEON_VM_PTB_ALIGN_SIZE, false); - if (r == -ENOMEM) { - r = radeon_vm_evict(rdev, vm); - if (r) - return r; - goto retry; - - } else if (r) { - return r; - } - - vm->pd_gpu_addr = radeon_sa_bo_gpu_addr(vm->page_directory); - - /* Initially clear the page directory */ - r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, - NULL, pd_entries * 2 + 64); - if (r) { - radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); - return r; - } - - ib.length_dw = 0; - - radeon_asic_vm_set_page(rdev, &ib, vm->pd_gpu_addr, - 0, pd_entries, 0, 0); - - radeon_semaphore_sync_to(ib.semaphore, vm->fence); - r = radeon_ib_schedule(rdev, &ib, NULL); - if (r) { - radeon_ib_free(rdev, &ib); - radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); - return r; - } - radeon_fence_unref(&vm->fence); - vm->fence = radeon_fence_ref(ib.fence); - radeon_ib_free(rdev, &ib); - radeon_fence_unref(&vm->last_flush); - - /* allocate page table array */ - pts_size = radeon_vm_num_pdes(rdev) * sizeof(struct radeon_sa_bo *); - vm->page_tables = kzalloc(pts_size, GFP_KERNEL); - - if (vm->page_tables == NULL) { - DRM_ERROR("Cannot allocate memory for page table array\n"); - radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); - return -ENOMEM; - } - - return 0; -} - -/** - * radeon_vm_add_to_lru - add VMs page table to LRU list - * - * @rdev: radeon_device pointer - * @vm: vm to add to LRU - * - * Add the allocated page table to the LRU list (cayman+). - * - * Global mutex must be locked! - */ -void radeon_vm_add_to_lru(struct radeon_device *rdev, struct radeon_vm *vm) -{ - list_del_init(&vm->list); - list_add_tail(&vm->list, &rdev->vm_manager.lru_vm); -} - -/** - * radeon_vm_grab_id - allocate the next free VMID - * - * @rdev: radeon_device pointer - * @vm: vm to allocate id for - * @ring: ring we want to submit job to - * - * Allocate an id for the vm (cayman+). - * Returns the fence we need to sync to (if any). - * - * Global and local mutex must be locked! - */ -struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, - struct radeon_vm *vm, int ring) -{ - struct radeon_fence *best[RADEON_NUM_RINGS] = {}; - unsigned choices[2] = {}; - unsigned i; - - /* check if the id is still valid */ - if (vm->last_id_use && vm->last_id_use == rdev->vm_manager.active[vm->id]) - return NULL; - - /* we definately need to flush */ - radeon_fence_unref(&vm->last_flush); - - /* skip over VMID 0, since it is the system VM */ - for (i = 1; i < rdev->vm_manager.nvm; ++i) { - struct radeon_fence *fence = rdev->vm_manager.active[i]; - - if (fence == NULL) { - /* found a free one */ - vm->id = i; - trace_radeon_vm_grab_id(vm->id, ring); - return NULL; - } - - if (radeon_fence_is_earlier(fence, best[fence->ring])) { - best[fence->ring] = fence; - choices[fence->ring == ring ? 0 : 1] = i; - } - } - - for (i = 0; i < 2; ++i) { - if (choices[i]) { - vm->id = choices[i]; - trace_radeon_vm_grab_id(vm->id, ring); - return rdev->vm_manager.active[choices[i]]; - } - } - - /* should never happen */ - BUG(); - return NULL; -} - -/** - * radeon_vm_fence - remember fence for vm - * - * @rdev: radeon_device pointer - * @vm: vm we want to fence - * @fence: fence to remember - * - * Fence the vm (cayman+). - * Set the fence used to protect page table and id. - * - * Global and local mutex must be locked! - */ -void radeon_vm_fence(struct radeon_device *rdev, - struct radeon_vm *vm, - struct radeon_fence *fence) -{ - radeon_fence_unref(&rdev->vm_manager.active[vm->id]); - rdev->vm_manager.active[vm->id] = radeon_fence_ref(fence); - - radeon_fence_unref(&vm->fence); - vm->fence = radeon_fence_ref(fence); - - radeon_fence_unref(&vm->last_id_use); - vm->last_id_use = radeon_fence_ref(fence); -} - -/** - * radeon_vm_bo_find - find the bo_va for a specific vm & bo - * - * @vm: requested vm - * @bo: requested buffer object - * - * Find @bo inside the requested vm (cayman+). - * Search inside the @bos vm list for the requested vm - * Returns the found bo_va or NULL if none is found - * - * Object has to be reserved! - */ -struct radeon_bo_va *radeon_vm_bo_find(struct radeon_vm *vm, - struct radeon_bo *bo) -{ - struct radeon_bo_va *bo_va; - - list_for_each_entry(bo_va, &bo->va, bo_list) { - if (bo_va->vm == vm) { - return bo_va; - } - } - return NULL; -} - -/** - * radeon_vm_bo_add - add a bo to a specific vm - * - * @rdev: radeon_device pointer - * @vm: requested vm - * @bo: radeon buffer object - * - * Add @bo into the requested vm (cayman+). - * Add @bo to the list of bos associated with the vm - * Returns newly added bo_va or NULL for failure - * - * Object has to be reserved! - */ -struct radeon_bo_va *radeon_vm_bo_add(struct radeon_device *rdev, - struct radeon_vm *vm, - struct radeon_bo *bo) -{ - struct radeon_bo_va *bo_va; - - bo_va = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL); - if (bo_va == NULL) { - return NULL; - } - bo_va->vm = vm; - bo_va->bo = bo; - bo_va->soffset = 0; - bo_va->eoffset = 0; - bo_va->flags = 0; - bo_va->valid = false; - bo_va->ref_count = 1; - INIT_LIST_HEAD(&bo_va->bo_list); - INIT_LIST_HEAD(&bo_va->vm_list); - - mutex_lock(&vm->mutex); - list_add(&bo_va->vm_list, &vm->va); - list_add_tail(&bo_va->bo_list, &bo->va); - mutex_unlock(&vm->mutex); - - return bo_va; -} - -/** - * radeon_vm_bo_set_addr - set bos virtual address inside a vm - * - * @rdev: radeon_device pointer - * @bo_va: bo_va to store the address - * @soffset: requested offset of the buffer in the VM address space - * @flags: attributes of pages (read/write/valid/etc.) - * - * Set offset of @bo_va (cayman+). - * Validate and set the offset requested within the vm address space. - * Returns 0 for success, error for failure. - * - * Object has to be reserved! - */ -int radeon_vm_bo_set_addr(struct radeon_device *rdev, - struct radeon_bo_va *bo_va, - uint64_t soffset, - uint32_t flags) -{ - uint64_t size = radeon_bo_size(bo_va->bo); - uint64_t eoffset, last_offset = 0; - struct radeon_vm *vm = bo_va->vm; - struct radeon_bo_va *tmp; - struct list_head *head; - unsigned last_pfn; - - if (soffset) { - /* make sure object fit at this offset */ - eoffset = soffset + size; - if (soffset >= eoffset) { - return -EINVAL; - } - - last_pfn = eoffset / RADEON_GPU_PAGE_SIZE; - if (last_pfn > rdev->vm_manager.max_pfn) { - dev_err(rdev->dev, "va above limit (0x%08X > 0x%08X)\n", - last_pfn, rdev->vm_manager.max_pfn); - return -EINVAL; - } - - } else { - eoffset = last_pfn = 0; - } - - mutex_lock(&vm->mutex); - head = &vm->va; - last_offset = 0; - list_for_each_entry(tmp, &vm->va, vm_list) { - if (bo_va == tmp) { - /* skip over currently modified bo */ - continue; - } - - if (soffset >= last_offset && eoffset <= tmp->soffset) { - /* bo can be added before this one */ - break; - } - if (eoffset > tmp->soffset && soffset < tmp->eoffset) { - /* bo and tmp overlap, invalid offset */ - dev_err(rdev->dev, "bo %p va 0x%08X conflict with (bo %p 0x%08X 0x%08X)\n", - bo_va->bo, (unsigned)bo_va->soffset, tmp->bo, - (unsigned)tmp->soffset, (unsigned)tmp->eoffset); - mutex_unlock(&vm->mutex); - return -EINVAL; - } - last_offset = tmp->eoffset; - head = &tmp->vm_list; - } - - bo_va->soffset = soffset; - bo_va->eoffset = eoffset; - bo_va->flags = flags; - bo_va->valid = false; - list_move(&bo_va->vm_list, head); - - mutex_unlock(&vm->mutex); - return 0; -} - -/** - * radeon_vm_map_gart - get the physical address of a gart page - * - * @rdev: radeon_device pointer - * @addr: the unmapped addr - * - * Look up the physical address of the page that the pte resolves - * to (cayman+). - * Returns the physical address of the page. - */ -uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr) -{ - uint64_t result; - - /* page table offset */ - result = rdev->gart.pages_addr[addr >> PAGE_SHIFT]; - - /* in case cpu page size != gpu page size*/ - result |= addr & (~PAGE_MASK); - - return result; -} - -/** - * radeon_vm_page_flags - translate page flags to what the hw uses - * - * @flags: flags comming from userspace - * - * Translate the flags the userspace ABI uses to hw flags. - */ -static uint32_t radeon_vm_page_flags(uint32_t flags) -{ - uint32_t hw_flags = 0; - hw_flags |= (flags & RADEON_VM_PAGE_VALID) ? R600_PTE_VALID : 0; - hw_flags |= (flags & RADEON_VM_PAGE_READABLE) ? R600_PTE_READABLE : 0; - hw_flags |= (flags & RADEON_VM_PAGE_WRITEABLE) ? R600_PTE_WRITEABLE : 0; - if (flags & RADEON_VM_PAGE_SYSTEM) { - hw_flags |= R600_PTE_SYSTEM; - hw_flags |= (flags & RADEON_VM_PAGE_SNOOPED) ? R600_PTE_SNOOPED : 0; - } - return hw_flags; -} - -/** - * radeon_vm_update_pdes - make sure that page directory is valid - * - * @rdev: radeon_device pointer - * @vm: requested vm - * @start: start of GPU address range - * @end: end of GPU address range - * - * Allocates new page tables if necessary - * and updates the page directory (cayman+). - * Returns 0 for success, error for failure. - * - * Global and local mutex must be locked! - */ -static int radeon_vm_update_pdes(struct radeon_device *rdev, - struct radeon_vm *vm, - struct radeon_ib *ib, - uint64_t start, uint64_t end) -{ - static const uint32_t incr = RADEON_VM_PTE_COUNT * 8; - - uint64_t last_pde = ~0, last_pt = ~0; - unsigned count = 0; - uint64_t pt_idx; - int r; - - start = (start / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE; - end = (end / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE; - - /* walk over the address space and update the page directory */ - for (pt_idx = start; pt_idx <= end; ++pt_idx) { - uint64_t pde, pt; - - if (vm->page_tables[pt_idx]) - continue; - -retry: - r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, - &vm->page_tables[pt_idx], - RADEON_VM_PTE_COUNT * 8, - RADEON_GPU_PAGE_SIZE, false); - - if (r == -ENOMEM) { - r = radeon_vm_evict(rdev, vm); - if (r) - return r; - goto retry; - } else if (r) { - return r; - } - - pde = vm->pd_gpu_addr + pt_idx * 8; - - pt = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]); - - if (((last_pde + 8 * count) != pde) || - ((last_pt + incr * count) != pt)) { - - if (count) { - radeon_asic_vm_set_page(rdev, ib, last_pde, - last_pt, count, incr, - R600_PTE_VALID); - - count *= RADEON_VM_PTE_COUNT; - radeon_asic_vm_set_page(rdev, ib, last_pt, 0, - count, 0, 0); - } - - count = 1; - last_pde = pde; - last_pt = pt; - } else { - ++count; - } - } - - if (count) { - radeon_asic_vm_set_page(rdev, ib, last_pde, last_pt, count, - incr, R600_PTE_VALID); - - count *= RADEON_VM_PTE_COUNT; - radeon_asic_vm_set_page(rdev, ib, last_pt, 0, - count, 0, 0); - } - - return 0; -} - -/** - * radeon_vm_update_ptes - make sure that page tables are valid - * - * @rdev: radeon_device pointer - * @vm: requested vm - * @start: start of GPU address range - * @end: end of GPU address range - * @dst: destination address to map to - * @flags: mapping flags - * - * Update the page tables in the range @start - @end (cayman+). - * - * Global and local mutex must be locked! - */ -static void radeon_vm_update_ptes(struct radeon_device *rdev, - struct radeon_vm *vm, - struct radeon_ib *ib, - uint64_t start, uint64_t end, - uint64_t dst, uint32_t flags) -{ - static const uint64_t mask = RADEON_VM_PTE_COUNT - 1; - - uint64_t last_pte = ~0, last_dst = ~0; - unsigned count = 0; - uint64_t addr; - - start = start / RADEON_GPU_PAGE_SIZE; - end = end / RADEON_GPU_PAGE_SIZE; - - /* walk over the address space and update the page tables */ - for (addr = start; addr < end; ) { - uint64_t pt_idx = addr >> RADEON_VM_BLOCK_SIZE; - unsigned nptes; - uint64_t pte; - - if ((addr & ~mask) == (end & ~mask)) - nptes = end - addr; - else - nptes = RADEON_VM_PTE_COUNT - (addr & mask); - - pte = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]); - pte += (addr & mask) * 8; - - if ((last_pte + 8 * count) != pte) { - - if (count) { - radeon_asic_vm_set_page(rdev, ib, last_pte, - last_dst, count, - RADEON_GPU_PAGE_SIZE, - flags); - } - - count = nptes; - last_pte = pte; - last_dst = dst; - } else { - count += nptes; - } - - addr += nptes; - dst += nptes * RADEON_GPU_PAGE_SIZE; - } - - if (count) { - radeon_asic_vm_set_page(rdev, ib, last_pte, - last_dst, count, - RADEON_GPU_PAGE_SIZE, flags); - } -} - -/** - * radeon_vm_bo_update - map a bo into the vm page table - * - * @rdev: radeon_device pointer - * @vm: requested vm - * @bo: radeon buffer object - * @mem: ttm mem - * - * Fill in the page table entries for @bo (cayman+). - * Returns 0 for success, -EINVAL for failure. - * - * Object have to be reserved & global and local mutex must be locked! - */ -int radeon_vm_bo_update(struct radeon_device *rdev, - struct radeon_vm *vm, - struct radeon_bo *bo, - struct ttm_mem_reg *mem) -{ - struct radeon_ib ib; - struct radeon_bo_va *bo_va; - unsigned nptes, npdes, ndw; - uint64_t addr; - int r; - - /* nothing to do if vm isn't bound */ - if (vm->page_directory == NULL) - return 0; - - bo_va = radeon_vm_bo_find(vm, bo); - if (bo_va == NULL) { - dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm); - return -EINVAL; - } - - if (!bo_va->soffset) { - dev_err(rdev->dev, "bo %p don't has a mapping in vm %p\n", - bo, vm); - return -EINVAL; - } - - if ((bo_va->valid && mem) || (!bo_va->valid && mem == NULL)) - return 0; - - bo_va->flags &= ~RADEON_VM_PAGE_VALID; - bo_va->flags &= ~RADEON_VM_PAGE_SYSTEM; - if (mem) { - addr = mem->start << PAGE_SHIFT; - if (mem->mem_type != TTM_PL_SYSTEM) { - bo_va->flags |= RADEON_VM_PAGE_VALID; - bo_va->valid = true; - } - if (mem->mem_type == TTM_PL_TT) { - bo_va->flags |= RADEON_VM_PAGE_SYSTEM; - } else { - addr += rdev->vm_manager.vram_base_offset; - } - } else { - addr = 0; - bo_va->valid = false; - } - - trace_radeon_vm_bo_update(bo_va); - - nptes = radeon_bo_ngpu_pages(bo); - - /* assume two extra pdes in case the mapping overlaps the borders */ - npdes = (nptes >> RADEON_VM_BLOCK_SIZE) + 2; - - /* padding, etc. */ - ndw = 64; - - if (RADEON_VM_BLOCK_SIZE > 11) - /* reserve space for one header for every 2k dwords */ - ndw += (nptes >> 11) * 4; - else - /* reserve space for one header for - every (1 << BLOCK_SIZE) entries */ - ndw += (nptes >> RADEON_VM_BLOCK_SIZE) * 4; - - /* reserve space for pte addresses */ - ndw += nptes * 2; - - /* reserve space for one header for every 2k dwords */ - ndw += (npdes >> 11) * 4; - - /* reserve space for pde addresses */ - ndw += npdes * 2; - - /* reserve space for clearing new page tables */ - ndw += npdes * 2 * RADEON_VM_PTE_COUNT; - - /* update too big for an IB */ - if (ndw > 0xfffff) - return -ENOMEM; - - r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, ndw * 4); - if (r) - return r; - ib.length_dw = 0; - - r = radeon_vm_update_pdes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset); - if (r) { - radeon_ib_free(rdev, &ib); - return r; - } - - radeon_vm_update_ptes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset, - addr, radeon_vm_page_flags(bo_va->flags)); - - radeon_semaphore_sync_to(ib.semaphore, vm->fence); - r = radeon_ib_schedule(rdev, &ib, NULL); - if (r) { - radeon_ib_free(rdev, &ib); - return r; - } - radeon_fence_unref(&vm->fence); - vm->fence = radeon_fence_ref(ib.fence); - radeon_ib_free(rdev, &ib); - radeon_fence_unref(&vm->last_flush); - - return 0; -} - -/** - * radeon_vm_bo_rmv - remove a bo to a specific vm - * - * @rdev: radeon_device pointer - * @bo_va: requested bo_va - * - * Remove @bo_va->bo from the requested vm (cayman+). - * Remove @bo_va->bo from the list of bos associated with the bo_va->vm and - * remove the ptes for @bo_va in the page table. - * Returns 0 for success. - * - * Object have to be reserved! - */ -int radeon_vm_bo_rmv(struct radeon_device *rdev, - struct radeon_bo_va *bo_va) -{ - int r = 0; - - mutex_lock(&rdev->vm_manager.lock); - mutex_lock(&bo_va->vm->mutex); - if (bo_va->soffset) { - r = radeon_vm_bo_update(rdev, bo_va->vm, bo_va->bo, NULL); - } - mutex_unlock(&rdev->vm_manager.lock); - list_del(&bo_va->vm_list); - mutex_unlock(&bo_va->vm->mutex); - list_del(&bo_va->bo_list); - - kfree(bo_va); - return r; -} - -/** - * radeon_vm_bo_invalidate - mark the bo as invalid - * - * @rdev: radeon_device pointer - * @vm: requested vm - * @bo: radeon buffer object - * - * Mark @bo as invalid (cayman+). - */ -void radeon_vm_bo_invalidate(struct radeon_device *rdev, - struct radeon_bo *bo) -{ - struct radeon_bo_va *bo_va; - - list_for_each_entry(bo_va, &bo->va, bo_list) { - bo_va->valid = false; - } -} - -/** - * radeon_vm_init - initialize a vm instance - * - * @rdev: radeon_device pointer - * @vm: requested vm - * - * Init @vm fields (cayman+). - */ -void radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) -{ - vm->id = 0; - vm->fence = NULL; - vm->last_flush = NULL; - vm->last_id_use = NULL; - mutex_init(&vm->mutex); - INIT_LIST_HEAD(&vm->list); - INIT_LIST_HEAD(&vm->va); -} - -/** - * radeon_vm_fini - tear down a vm instance - * - * @rdev: radeon_device pointer - * @vm: requested vm - * - * Tear down @vm (cayman+). - * Unbind the VM and remove all bos from the vm bo list - */ -void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) -{ - struct radeon_bo_va *bo_va, *tmp; - int r; - - mutex_lock(&rdev->vm_manager.lock); - mutex_lock(&vm->mutex); - radeon_vm_free_pt(rdev, vm); - mutex_unlock(&rdev->vm_manager.lock); - - if (!list_empty(&vm->va)) { - dev_err(rdev->dev, "still active bo inside vm\n"); - } - list_for_each_entry_safe(bo_va, tmp, &vm->va, vm_list) { - list_del_init(&bo_va->vm_list); - r = radeon_bo_reserve(bo_va->bo, false); - if (!r) { - list_del_init(&bo_va->bo_list); - radeon_bo_unreserve(bo_va->bo); - kfree(bo_va); - } - } - radeon_fence_unref(&vm->fence); - radeon_fence_unref(&vm->last_flush); - radeon_fence_unref(&vm->last_id_use); - mutex_unlock(&vm->mutex); -} diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c new file mode 100644 index 000000000000..433b1ebd07ea --- /dev/null +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -0,0 +1,981 @@ +/* + * Copyright 2008 Advanced Micro Devices, Inc. + * Copyright 2008 Red Hat Inc. + * Copyright 2009 Jerome Glisse. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Dave Airlie + * Alex Deucher + * Jerome Glisse + */ +#include +#include +#include "radeon.h" +#include "radeon_trace.h" + +/* + * GPUVM + * GPUVM is similar to the legacy gart on older asics, however + * rather than there being a single global gart table + * for the entire GPU, there are multiple VM page tables active + * at any given time. The VM page tables can contain a mix + * vram pages and system memory pages and system memory pages + * can be mapped as snooped (cached system pages) or unsnooped + * (uncached system pages). + * Each VM has an ID associated with it and there is a page table + * associated with each VMID. When execting a command buffer, + * the kernel tells the the ring what VMID to use for that command + * buffer. VMIDs are allocated dynamically as commands are submitted. + * The userspace drivers maintain their own address space and the kernel + * sets up their pages tables accordingly when they submit their + * command buffers and a VMID is assigned. + * Cayman/Trinity support up to 8 active VMs at any given time; + * SI supports 16. + */ + +/** + * radeon_vm_num_pde - return the number of page directory entries + * + * @rdev: radeon_device pointer + * + * Calculate the number of page directory entries (cayman+). + */ +static unsigned radeon_vm_num_pdes(struct radeon_device *rdev) +{ + return rdev->vm_manager.max_pfn >> RADEON_VM_BLOCK_SIZE; +} + +/** + * radeon_vm_directory_size - returns the size of the page directory in bytes + * + * @rdev: radeon_device pointer + * + * Calculate the size of the page directory in bytes (cayman+). + */ +static unsigned radeon_vm_directory_size(struct radeon_device *rdev) +{ + return RADEON_GPU_PAGE_ALIGN(radeon_vm_num_pdes(rdev) * 8); +} + +/** + * radeon_vm_manager_init - init the vm manager + * + * @rdev: radeon_device pointer + * + * Init the vm manager (cayman+). + * Returns 0 for success, error for failure. + */ +int radeon_vm_manager_init(struct radeon_device *rdev) +{ + struct radeon_vm *vm; + struct radeon_bo_va *bo_va; + int r; + unsigned size; + + if (!rdev->vm_manager.enabled) { + /* allocate enough for 2 full VM pts */ + size = radeon_vm_directory_size(rdev); + size += rdev->vm_manager.max_pfn * 8; + size *= 2; + r = radeon_sa_bo_manager_init(rdev, &rdev->vm_manager.sa_manager, + RADEON_GPU_PAGE_ALIGN(size), + RADEON_VM_PTB_ALIGN_SIZE, + RADEON_GEM_DOMAIN_VRAM); + if (r) { + dev_err(rdev->dev, "failed to allocate vm bo (%dKB)\n", + (rdev->vm_manager.max_pfn * 8) >> 10); + return r; + } + + r = radeon_asic_vm_init(rdev); + if (r) + return r; + + rdev->vm_manager.enabled = true; + + r = radeon_sa_bo_manager_start(rdev, &rdev->vm_manager.sa_manager); + if (r) + return r; + } + + /* restore page table */ + list_for_each_entry(vm, &rdev->vm_manager.lru_vm, list) { + if (vm->page_directory == NULL) + continue; + + list_for_each_entry(bo_va, &vm->va, vm_list) { + bo_va->valid = false; + } + } + return 0; +} + +/** + * radeon_vm_free_pt - free the page table for a specific vm + * + * @rdev: radeon_device pointer + * @vm: vm to unbind + * + * Free the page table of a specific vm (cayman+). + * + * Global and local mutex must be lock! + */ +static void radeon_vm_free_pt(struct radeon_device *rdev, + struct radeon_vm *vm) +{ + struct radeon_bo_va *bo_va; + int i; + + if (!vm->page_directory) + return; + + list_del_init(&vm->list); + radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); + + list_for_each_entry(bo_va, &vm->va, vm_list) { + bo_va->valid = false; + } + + if (vm->page_tables == NULL) + return; + + for (i = 0; i < radeon_vm_num_pdes(rdev); i++) + radeon_sa_bo_free(rdev, &vm->page_tables[i], vm->fence); + + kfree(vm->page_tables); +} + +/** + * radeon_vm_manager_fini - tear down the vm manager + * + * @rdev: radeon_device pointer + * + * Tear down the VM manager (cayman+). + */ +void radeon_vm_manager_fini(struct radeon_device *rdev) +{ + struct radeon_vm *vm, *tmp; + int i; + + if (!rdev->vm_manager.enabled) + return; + + mutex_lock(&rdev->vm_manager.lock); + /* free all allocated page tables */ + list_for_each_entry_safe(vm, tmp, &rdev->vm_manager.lru_vm, list) { + mutex_lock(&vm->mutex); + radeon_vm_free_pt(rdev, vm); + mutex_unlock(&vm->mutex); + } + for (i = 0; i < RADEON_NUM_VM; ++i) { + radeon_fence_unref(&rdev->vm_manager.active[i]); + } + radeon_asic_vm_fini(rdev); + mutex_unlock(&rdev->vm_manager.lock); + + radeon_sa_bo_manager_suspend(rdev, &rdev->vm_manager.sa_manager); + radeon_sa_bo_manager_fini(rdev, &rdev->vm_manager.sa_manager); + rdev->vm_manager.enabled = false; +} + +/** + * radeon_vm_evict - evict page table to make room for new one + * + * @rdev: radeon_device pointer + * @vm: VM we want to allocate something for + * + * Evict a VM from the lru, making sure that it isn't @vm. (cayman+). + * Returns 0 for success, -ENOMEM for failure. + * + * Global and local mutex must be locked! + */ +static int radeon_vm_evict(struct radeon_device *rdev, struct radeon_vm *vm) +{ + struct radeon_vm *vm_evict; + + if (list_empty(&rdev->vm_manager.lru_vm)) + return -ENOMEM; + + vm_evict = list_first_entry(&rdev->vm_manager.lru_vm, + struct radeon_vm, list); + if (vm_evict == vm) + return -ENOMEM; + + mutex_lock(&vm_evict->mutex); + radeon_vm_free_pt(rdev, vm_evict); + mutex_unlock(&vm_evict->mutex); + return 0; +} + +/** + * radeon_vm_alloc_pt - allocates a page table for a VM + * + * @rdev: radeon_device pointer + * @vm: vm to bind + * + * Allocate a page table for the requested vm (cayman+). + * Returns 0 for success, error for failure. + * + * Global and local mutex must be locked! + */ +int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm) +{ + unsigned pd_size, pd_entries, pts_size; + struct radeon_ib ib; + int r; + + if (vm == NULL) { + return -EINVAL; + } + + if (vm->page_directory != NULL) { + return 0; + } + + pd_size = radeon_vm_directory_size(rdev); + pd_entries = radeon_vm_num_pdes(rdev); + +retry: + r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, + &vm->page_directory, pd_size, + RADEON_VM_PTB_ALIGN_SIZE, false); + if (r == -ENOMEM) { + r = radeon_vm_evict(rdev, vm); + if (r) + return r; + goto retry; + + } else if (r) { + return r; + } + + vm->pd_gpu_addr = radeon_sa_bo_gpu_addr(vm->page_directory); + + /* Initially clear the page directory */ + r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, + NULL, pd_entries * 2 + 64); + if (r) { + radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); + return r; + } + + ib.length_dw = 0; + + radeon_asic_vm_set_page(rdev, &ib, vm->pd_gpu_addr, + 0, pd_entries, 0, 0); + + radeon_semaphore_sync_to(ib.semaphore, vm->fence); + r = radeon_ib_schedule(rdev, &ib, NULL); + if (r) { + radeon_ib_free(rdev, &ib); + radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); + return r; + } + radeon_fence_unref(&vm->fence); + vm->fence = radeon_fence_ref(ib.fence); + radeon_ib_free(rdev, &ib); + radeon_fence_unref(&vm->last_flush); + + /* allocate page table array */ + pts_size = radeon_vm_num_pdes(rdev) * sizeof(struct radeon_sa_bo *); + vm->page_tables = kzalloc(pts_size, GFP_KERNEL); + + if (vm->page_tables == NULL) { + DRM_ERROR("Cannot allocate memory for page table array\n"); + radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); + return -ENOMEM; + } + + return 0; +} + +/** + * radeon_vm_add_to_lru - add VMs page table to LRU list + * + * @rdev: radeon_device pointer + * @vm: vm to add to LRU + * + * Add the allocated page table to the LRU list (cayman+). + * + * Global mutex must be locked! + */ +void radeon_vm_add_to_lru(struct radeon_device *rdev, struct radeon_vm *vm) +{ + list_del_init(&vm->list); + list_add_tail(&vm->list, &rdev->vm_manager.lru_vm); +} + +/** + * radeon_vm_grab_id - allocate the next free VMID + * + * @rdev: radeon_device pointer + * @vm: vm to allocate id for + * @ring: ring we want to submit job to + * + * Allocate an id for the vm (cayman+). + * Returns the fence we need to sync to (if any). + * + * Global and local mutex must be locked! + */ +struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, + struct radeon_vm *vm, int ring) +{ + struct radeon_fence *best[RADEON_NUM_RINGS] = {}; + unsigned choices[2] = {}; + unsigned i; + + /* check if the id is still valid */ + if (vm->last_id_use && vm->last_id_use == rdev->vm_manager.active[vm->id]) + return NULL; + + /* we definately need to flush */ + radeon_fence_unref(&vm->last_flush); + + /* skip over VMID 0, since it is the system VM */ + for (i = 1; i < rdev->vm_manager.nvm; ++i) { + struct radeon_fence *fence = rdev->vm_manager.active[i]; + + if (fence == NULL) { + /* found a free one */ + vm->id = i; + trace_radeon_vm_grab_id(vm->id, ring); + return NULL; + } + + if (radeon_fence_is_earlier(fence, best[fence->ring])) { + best[fence->ring] = fence; + choices[fence->ring == ring ? 0 : 1] = i; + } + } + + for (i = 0; i < 2; ++i) { + if (choices[i]) { + vm->id = choices[i]; + trace_radeon_vm_grab_id(vm->id, ring); + return rdev->vm_manager.active[choices[i]]; + } + } + + /* should never happen */ + BUG(); + return NULL; +} + +/** + * radeon_vm_fence - remember fence for vm + * + * @rdev: radeon_device pointer + * @vm: vm we want to fence + * @fence: fence to remember + * + * Fence the vm (cayman+). + * Set the fence used to protect page table and id. + * + * Global and local mutex must be locked! + */ +void radeon_vm_fence(struct radeon_device *rdev, + struct radeon_vm *vm, + struct radeon_fence *fence) +{ + radeon_fence_unref(&rdev->vm_manager.active[vm->id]); + rdev->vm_manager.active[vm->id] = radeon_fence_ref(fence); + + radeon_fence_unref(&vm->fence); + vm->fence = radeon_fence_ref(fence); + + radeon_fence_unref(&vm->last_id_use); + vm->last_id_use = radeon_fence_ref(fence); +} + +/** + * radeon_vm_bo_find - find the bo_va for a specific vm & bo + * + * @vm: requested vm + * @bo: requested buffer object + * + * Find @bo inside the requested vm (cayman+). + * Search inside the @bos vm list for the requested vm + * Returns the found bo_va or NULL if none is found + * + * Object has to be reserved! + */ +struct radeon_bo_va *radeon_vm_bo_find(struct radeon_vm *vm, + struct radeon_bo *bo) +{ + struct radeon_bo_va *bo_va; + + list_for_each_entry(bo_va, &bo->va, bo_list) { + if (bo_va->vm == vm) { + return bo_va; + } + } + return NULL; +} + +/** + * radeon_vm_bo_add - add a bo to a specific vm + * + * @rdev: radeon_device pointer + * @vm: requested vm + * @bo: radeon buffer object + * + * Add @bo into the requested vm (cayman+). + * Add @bo to the list of bos associated with the vm + * Returns newly added bo_va or NULL for failure + * + * Object has to be reserved! + */ +struct radeon_bo_va *radeon_vm_bo_add(struct radeon_device *rdev, + struct radeon_vm *vm, + struct radeon_bo *bo) +{ + struct radeon_bo_va *bo_va; + + bo_va = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL); + if (bo_va == NULL) { + return NULL; + } + bo_va->vm = vm; + bo_va->bo = bo; + bo_va->soffset = 0; + bo_va->eoffset = 0; + bo_va->flags = 0; + bo_va->valid = false; + bo_va->ref_count = 1; + INIT_LIST_HEAD(&bo_va->bo_list); + INIT_LIST_HEAD(&bo_va->vm_list); + + mutex_lock(&vm->mutex); + list_add(&bo_va->vm_list, &vm->va); + list_add_tail(&bo_va->bo_list, &bo->va); + mutex_unlock(&vm->mutex); + + return bo_va; +} + +/** + * radeon_vm_bo_set_addr - set bos virtual address inside a vm + * + * @rdev: radeon_device pointer + * @bo_va: bo_va to store the address + * @soffset: requested offset of the buffer in the VM address space + * @flags: attributes of pages (read/write/valid/etc.) + * + * Set offset of @bo_va (cayman+). + * Validate and set the offset requested within the vm address space. + * Returns 0 for success, error for failure. + * + * Object has to be reserved! + */ +int radeon_vm_bo_set_addr(struct radeon_device *rdev, + struct radeon_bo_va *bo_va, + uint64_t soffset, + uint32_t flags) +{ + uint64_t size = radeon_bo_size(bo_va->bo); + uint64_t eoffset, last_offset = 0; + struct radeon_vm *vm = bo_va->vm; + struct radeon_bo_va *tmp; + struct list_head *head; + unsigned last_pfn; + + if (soffset) { + /* make sure object fit at this offset */ + eoffset = soffset + size; + if (soffset >= eoffset) { + return -EINVAL; + } + + last_pfn = eoffset / RADEON_GPU_PAGE_SIZE; + if (last_pfn > rdev->vm_manager.max_pfn) { + dev_err(rdev->dev, "va above limit (0x%08X > 0x%08X)\n", + last_pfn, rdev->vm_manager.max_pfn); + return -EINVAL; + } + + } else { + eoffset = last_pfn = 0; + } + + mutex_lock(&vm->mutex); + head = &vm->va; + last_offset = 0; + list_for_each_entry(tmp, &vm->va, vm_list) { + if (bo_va == tmp) { + /* skip over currently modified bo */ + continue; + } + + if (soffset >= last_offset && eoffset <= tmp->soffset) { + /* bo can be added before this one */ + break; + } + if (eoffset > tmp->soffset && soffset < tmp->eoffset) { + /* bo and tmp overlap, invalid offset */ + dev_err(rdev->dev, "bo %p va 0x%08X conflict with (bo %p 0x%08X 0x%08X)\n", + bo_va->bo, (unsigned)bo_va->soffset, tmp->bo, + (unsigned)tmp->soffset, (unsigned)tmp->eoffset); + mutex_unlock(&vm->mutex); + return -EINVAL; + } + last_offset = tmp->eoffset; + head = &tmp->vm_list; + } + + bo_va->soffset = soffset; + bo_va->eoffset = eoffset; + bo_va->flags = flags; + bo_va->valid = false; + list_move(&bo_va->vm_list, head); + + mutex_unlock(&vm->mutex); + return 0; +} + +/** + * radeon_vm_map_gart - get the physical address of a gart page + * + * @rdev: radeon_device pointer + * @addr: the unmapped addr + * + * Look up the physical address of the page that the pte resolves + * to (cayman+). + * Returns the physical address of the page. + */ +uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr) +{ + uint64_t result; + + /* page table offset */ + result = rdev->gart.pages_addr[addr >> PAGE_SHIFT]; + + /* in case cpu page size != gpu page size*/ + result |= addr & (~PAGE_MASK); + + return result; +} + +/** + * radeon_vm_page_flags - translate page flags to what the hw uses + * + * @flags: flags comming from userspace + * + * Translate the flags the userspace ABI uses to hw flags. + */ +static uint32_t radeon_vm_page_flags(uint32_t flags) +{ + uint32_t hw_flags = 0; + hw_flags |= (flags & RADEON_VM_PAGE_VALID) ? R600_PTE_VALID : 0; + hw_flags |= (flags & RADEON_VM_PAGE_READABLE) ? R600_PTE_READABLE : 0; + hw_flags |= (flags & RADEON_VM_PAGE_WRITEABLE) ? R600_PTE_WRITEABLE : 0; + if (flags & RADEON_VM_PAGE_SYSTEM) { + hw_flags |= R600_PTE_SYSTEM; + hw_flags |= (flags & RADEON_VM_PAGE_SNOOPED) ? R600_PTE_SNOOPED : 0; + } + return hw_flags; +} + +/** + * radeon_vm_update_pdes - make sure that page directory is valid + * + * @rdev: radeon_device pointer + * @vm: requested vm + * @start: start of GPU address range + * @end: end of GPU address range + * + * Allocates new page tables if necessary + * and updates the page directory (cayman+). + * Returns 0 for success, error for failure. + * + * Global and local mutex must be locked! + */ +static int radeon_vm_update_pdes(struct radeon_device *rdev, + struct radeon_vm *vm, + struct radeon_ib *ib, + uint64_t start, uint64_t end) +{ + static const uint32_t incr = RADEON_VM_PTE_COUNT * 8; + + uint64_t last_pde = ~0, last_pt = ~0; + unsigned count = 0; + uint64_t pt_idx; + int r; + + start = (start / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE; + end = (end / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE; + + /* walk over the address space and update the page directory */ + for (pt_idx = start; pt_idx <= end; ++pt_idx) { + uint64_t pde, pt; + + if (vm->page_tables[pt_idx]) + continue; + +retry: + r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, + &vm->page_tables[pt_idx], + RADEON_VM_PTE_COUNT * 8, + RADEON_GPU_PAGE_SIZE, false); + + if (r == -ENOMEM) { + r = radeon_vm_evict(rdev, vm); + if (r) + return r; + goto retry; + } else if (r) { + return r; + } + + pde = vm->pd_gpu_addr + pt_idx * 8; + + pt = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]); + + if (((last_pde + 8 * count) != pde) || + ((last_pt + incr * count) != pt)) { + + if (count) { + radeon_asic_vm_set_page(rdev, ib, last_pde, + last_pt, count, incr, + R600_PTE_VALID); + + count *= RADEON_VM_PTE_COUNT; + radeon_asic_vm_set_page(rdev, ib, last_pt, 0, + count, 0, 0); + } + + count = 1; + last_pde = pde; + last_pt = pt; + } else { + ++count; + } + } + + if (count) { + radeon_asic_vm_set_page(rdev, ib, last_pde, last_pt, count, + incr, R600_PTE_VALID); + + count *= RADEON_VM_PTE_COUNT; + radeon_asic_vm_set_page(rdev, ib, last_pt, 0, + count, 0, 0); + } + + return 0; +} + +/** + * radeon_vm_update_ptes - make sure that page tables are valid + * + * @rdev: radeon_device pointer + * @vm: requested vm + * @start: start of GPU address range + * @end: end of GPU address range + * @dst: destination address to map to + * @flags: mapping flags + * + * Update the page tables in the range @start - @end (cayman+). + * + * Global and local mutex must be locked! + */ +static void radeon_vm_update_ptes(struct radeon_device *rdev, + struct radeon_vm *vm, + struct radeon_ib *ib, + uint64_t start, uint64_t end, + uint64_t dst, uint32_t flags) +{ + static const uint64_t mask = RADEON_VM_PTE_COUNT - 1; + + uint64_t last_pte = ~0, last_dst = ~0; + unsigned count = 0; + uint64_t addr; + + start = start / RADEON_GPU_PAGE_SIZE; + end = end / RADEON_GPU_PAGE_SIZE; + + /* walk over the address space and update the page tables */ + for (addr = start; addr < end; ) { + uint64_t pt_idx = addr >> RADEON_VM_BLOCK_SIZE; + unsigned nptes; + uint64_t pte; + + if ((addr & ~mask) == (end & ~mask)) + nptes = end - addr; + else + nptes = RADEON_VM_PTE_COUNT - (addr & mask); + + pte = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]); + pte += (addr & mask) * 8; + + if ((last_pte + 8 * count) != pte) { + + if (count) { + radeon_asic_vm_set_page(rdev, ib, last_pte, + last_dst, count, + RADEON_GPU_PAGE_SIZE, + flags); + } + + count = nptes; + last_pte = pte; + last_dst = dst; + } else { + count += nptes; + } + + addr += nptes; + dst += nptes * RADEON_GPU_PAGE_SIZE; + } + + if (count) { + radeon_asic_vm_set_page(rdev, ib, last_pte, + last_dst, count, + RADEON_GPU_PAGE_SIZE, flags); + } +} + +/** + * radeon_vm_bo_update - map a bo into the vm page table + * + * @rdev: radeon_device pointer + * @vm: requested vm + * @bo: radeon buffer object + * @mem: ttm mem + * + * Fill in the page table entries for @bo (cayman+). + * Returns 0 for success, -EINVAL for failure. + * + * Object have to be reserved & global and local mutex must be locked! + */ +int radeon_vm_bo_update(struct radeon_device *rdev, + struct radeon_vm *vm, + struct radeon_bo *bo, + struct ttm_mem_reg *mem) +{ + struct radeon_ib ib; + struct radeon_bo_va *bo_va; + unsigned nptes, npdes, ndw; + uint64_t addr; + int r; + + /* nothing to do if vm isn't bound */ + if (vm->page_directory == NULL) + return 0; + + bo_va = radeon_vm_bo_find(vm, bo); + if (bo_va == NULL) { + dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm); + return -EINVAL; + } + + if (!bo_va->soffset) { + dev_err(rdev->dev, "bo %p don't has a mapping in vm %p\n", + bo, vm); + return -EINVAL; + } + + if ((bo_va->valid && mem) || (!bo_va->valid && mem == NULL)) + return 0; + + bo_va->flags &= ~RADEON_VM_PAGE_VALID; + bo_va->flags &= ~RADEON_VM_PAGE_SYSTEM; + if (mem) { + addr = mem->start << PAGE_SHIFT; + if (mem->mem_type != TTM_PL_SYSTEM) { + bo_va->flags |= RADEON_VM_PAGE_VALID; + bo_va->valid = true; + } + if (mem->mem_type == TTM_PL_TT) { + bo_va->flags |= RADEON_VM_PAGE_SYSTEM; + } else { + addr += rdev->vm_manager.vram_base_offset; + } + } else { + addr = 0; + bo_va->valid = false; + } + + trace_radeon_vm_bo_update(bo_va); + + nptes = radeon_bo_ngpu_pages(bo); + + /* assume two extra pdes in case the mapping overlaps the borders */ + npdes = (nptes >> RADEON_VM_BLOCK_SIZE) + 2; + + /* padding, etc. */ + ndw = 64; + + if (RADEON_VM_BLOCK_SIZE > 11) + /* reserve space for one header for every 2k dwords */ + ndw += (nptes >> 11) * 4; + else + /* reserve space for one header for + every (1 << BLOCK_SIZE) entries */ + ndw += (nptes >> RADEON_VM_BLOCK_SIZE) * 4; + + /* reserve space for pte addresses */ + ndw += nptes * 2; + + /* reserve space for one header for every 2k dwords */ + ndw += (npdes >> 11) * 4; + + /* reserve space for pde addresses */ + ndw += npdes * 2; + + /* reserve space for clearing new page tables */ + ndw += npdes * 2 * RADEON_VM_PTE_COUNT; + + /* update too big for an IB */ + if (ndw > 0xfffff) + return -ENOMEM; + + r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, ndw * 4); + if (r) + return r; + ib.length_dw = 0; + + r = radeon_vm_update_pdes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset); + if (r) { + radeon_ib_free(rdev, &ib); + return r; + } + + radeon_vm_update_ptes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset, + addr, radeon_vm_page_flags(bo_va->flags)); + + radeon_semaphore_sync_to(ib.semaphore, vm->fence); + r = radeon_ib_schedule(rdev, &ib, NULL); + if (r) { + radeon_ib_free(rdev, &ib); + return r; + } + radeon_fence_unref(&vm->fence); + vm->fence = radeon_fence_ref(ib.fence); + radeon_ib_free(rdev, &ib); + radeon_fence_unref(&vm->last_flush); + + return 0; +} + +/** + * radeon_vm_bo_rmv - remove a bo to a specific vm + * + * @rdev: radeon_device pointer + * @bo_va: requested bo_va + * + * Remove @bo_va->bo from the requested vm (cayman+). + * Remove @bo_va->bo from the list of bos associated with the bo_va->vm and + * remove the ptes for @bo_va in the page table. + * Returns 0 for success. + * + * Object have to be reserved! + */ +int radeon_vm_bo_rmv(struct radeon_device *rdev, + struct radeon_bo_va *bo_va) +{ + int r = 0; + + mutex_lock(&rdev->vm_manager.lock); + mutex_lock(&bo_va->vm->mutex); + if (bo_va->soffset) { + r = radeon_vm_bo_update(rdev, bo_va->vm, bo_va->bo, NULL); + } + mutex_unlock(&rdev->vm_manager.lock); + list_del(&bo_va->vm_list); + mutex_unlock(&bo_va->vm->mutex); + list_del(&bo_va->bo_list); + + kfree(bo_va); + return r; +} + +/** + * radeon_vm_bo_invalidate - mark the bo as invalid + * + * @rdev: radeon_device pointer + * @vm: requested vm + * @bo: radeon buffer object + * + * Mark @bo as invalid (cayman+). + */ +void radeon_vm_bo_invalidate(struct radeon_device *rdev, + struct radeon_bo *bo) +{ + struct radeon_bo_va *bo_va; + + list_for_each_entry(bo_va, &bo->va, bo_list) { + bo_va->valid = false; + } +} + +/** + * radeon_vm_init - initialize a vm instance + * + * @rdev: radeon_device pointer + * @vm: requested vm + * + * Init @vm fields (cayman+). + */ +void radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) +{ + vm->id = 0; + vm->fence = NULL; + vm->last_flush = NULL; + vm->last_id_use = NULL; + mutex_init(&vm->mutex); + INIT_LIST_HEAD(&vm->list); + INIT_LIST_HEAD(&vm->va); +} + +/** + * radeon_vm_fini - tear down a vm instance + * + * @rdev: radeon_device pointer + * @vm: requested vm + * + * Tear down @vm (cayman+). + * Unbind the VM and remove all bos from the vm bo list + */ +void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) +{ + struct radeon_bo_va *bo_va, *tmp; + int r; + + mutex_lock(&rdev->vm_manager.lock); + mutex_lock(&vm->mutex); + radeon_vm_free_pt(rdev, vm); + mutex_unlock(&rdev->vm_manager.lock); + + if (!list_empty(&vm->va)) { + dev_err(rdev->dev, "still active bo inside vm\n"); + } + list_for_each_entry_safe(bo_va, tmp, &vm->va, vm_list) { + list_del_init(&bo_va->vm_list); + r = radeon_bo_reserve(bo_va->bo, false); + if (!r) { + list_del_init(&bo_va->bo_list); + radeon_bo_unreserve(bo_va->bo); + kfree(bo_va); + } + } + radeon_fence_unref(&vm->fence); + radeon_fence_unref(&vm->last_flush); + radeon_fence_unref(&vm->last_id_use); + mutex_unlock(&vm->mutex); +} -- cgit From fa68834342e992dcb58e6dd7d9dbe05c6b6b27d9 Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 20 Feb 2014 10:47:05 +0100 Subject: drm/radeon: further cleanup vm flushing & fencing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Christian König Reviewed-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon.h | 3 +++ drivers/gpu/drm/radeon/radeon_cs.c | 4 ---- drivers/gpu/drm/radeon/radeon_ring.c | 16 +++++++--------- drivers/gpu/drm/radeon/radeon_vm.c | 31 ++++++++++++++++++++++++++++--- 4 files changed, 38 insertions(+), 16 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 40ab8a28c998..644d922990ef 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -2804,6 +2804,9 @@ int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm); void radeon_vm_add_to_lru(struct radeon_device *rdev, struct radeon_vm *vm); struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, struct radeon_vm *vm, int ring); +void radeon_vm_flush(struct radeon_device *rdev, + struct radeon_vm *vm, + int ring); void radeon_vm_fence(struct radeon_device *rdev, struct radeon_vm *vm, struct radeon_fence *fence); diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 5abae403ea4f..f92df2e8ebdd 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -511,10 +511,6 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, r = radeon_ib_schedule(rdev, &parser->ib, NULL); } - if (!r) { - radeon_vm_fence(rdev, vm, parser->ib.fence); - } - out: radeon_vm_add_to_lru(rdev, vm); mutex_unlock(&vm->mutex); diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index fa140119cdb6..665591a7faad 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -153,11 +153,9 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, return r; } - /* if we can't remember our last VM flush then flush now! */ - /* XXX figure out why we have to flush for every IB */ - if (ib->vm /*&& !ib->vm->last_flush*/) { - radeon_ring_vm_flush(rdev, ib->ring, ib->vm); - } + if (ib->vm) + radeon_vm_flush(rdev, ib->vm, ib->ring); + if (const_ib) { radeon_ring_ib_execute(rdev, const_ib->ring, const_ib); radeon_semaphore_free(rdev, &const_ib->semaphore, NULL); @@ -172,10 +170,10 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, if (const_ib) { const_ib->fence = radeon_fence_ref(ib->fence); } - /* we just flushed the VM, remember that */ - if (ib->vm && !ib->vm->last_flush) { - ib->vm->last_flush = radeon_fence_ref(ib->fence); - } + + if (ib->vm) + radeon_vm_fence(rdev, ib->vm, ib->fence); + radeon_ring_unlock_commit(rdev, ring); return 0; } diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index 433b1ebd07ea..516017689793 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -378,6 +378,27 @@ struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, return NULL; } +/** + * radeon_vm_flush - hardware flush the vm + * + * @rdev: radeon_device pointer + * @vm: vm we want to flush + * @ring: ring to use for flush + * + * Flush the vm (cayman+). + * + * Global and local mutex must be locked! + */ +void radeon_vm_flush(struct radeon_device *rdev, + struct radeon_vm *vm, + int ring) +{ + /* if we can't remember our last VM flush then flush now! */ + /* XXX figure out why we have to flush all the time */ + if (!vm->last_flush || true) + radeon_ring_vm_flush(rdev, ring, vm); +} + /** * radeon_vm_fence - remember fence for vm * @@ -394,14 +415,18 @@ void radeon_vm_fence(struct radeon_device *rdev, struct radeon_vm *vm, struct radeon_fence *fence) { - radeon_fence_unref(&rdev->vm_manager.active[vm->id]); - rdev->vm_manager.active[vm->id] = radeon_fence_ref(fence); - radeon_fence_unref(&vm->fence); vm->fence = radeon_fence_ref(fence); + radeon_fence_unref(&rdev->vm_manager.active[vm->id]); + rdev->vm_manager.active[vm->id] = radeon_fence_ref(fence); + radeon_fence_unref(&vm->last_id_use); vm->last_id_use = radeon_fence_ref(fence); + + /* we just flushed the VM, remember that */ + if (!vm->last_flush) + vm->last_flush = radeon_fence_ref(fence); } /** -- cgit From 6d2f2944e95e504a7d33385eeeb9bb7fcca72592 Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 20 Feb 2014 13:42:17 +0100 Subject: drm/radeon: use normal BOs for the page tables v4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No need to make it more complicated than necessary, just allocate the page tables as normal BO and flush whenever the address change. v2: update comments and function name v3: squash bug fixes, page directory and tables patch v4: rebased on Mareks changes Signed-off-by: Christian König --- drivers/gpu/drm/radeon/radeon.h | 23 +- drivers/gpu/drm/radeon/radeon_cs.c | 44 +-- drivers/gpu/drm/radeon/radeon_device.c | 1 - drivers/gpu/drm/radeon/radeon_kms.c | 4 +- drivers/gpu/drm/radeon/radeon_vm.c | 492 +++++++++++++++------------------ 5 files changed, 269 insertions(+), 295 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 644d922990ef..c31e3c2c1b3a 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -857,17 +857,22 @@ struct radeon_mec { #define R600_PTE_READABLE (1 << 5) #define R600_PTE_WRITEABLE (1 << 6) +struct radeon_vm_pt { + struct radeon_bo *bo; + uint64_t addr; +}; + struct radeon_vm { - struct list_head list; struct list_head va; unsigned id; /* contains the page directory */ - struct radeon_sa_bo *page_directory; + struct radeon_bo *page_directory; uint64_t pd_gpu_addr; + unsigned max_pde_used; /* array of page tables, one for each page directory entry */ - struct radeon_sa_bo **page_tables; + struct radeon_vm_pt *page_tables; struct mutex mutex; /* last fence for cs using this vm */ @@ -880,9 +885,7 @@ struct radeon_vm { struct radeon_vm_manager { struct mutex lock; - struct list_head lru_vm; struct radeon_fence *active[RADEON_NUM_VM]; - struct radeon_sa_manager sa_manager; uint32_t max_pfn; /* number of VMIDs */ unsigned nvm; @@ -1011,6 +1014,7 @@ struct radeon_cs_parser { unsigned nrelocs; struct radeon_cs_reloc *relocs; struct radeon_cs_reloc **relocs_ptr; + struct radeon_bo_list *vm_bos; struct list_head validated; unsigned dma_reloc_idx; /* indices of various chunks */ @@ -2798,10 +2802,11 @@ extern void radeon_program_register_sequence(struct radeon_device *rdev, */ int radeon_vm_manager_init(struct radeon_device *rdev); void radeon_vm_manager_fini(struct radeon_device *rdev); -void radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm); +int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm); void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm); -int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm); -void radeon_vm_add_to_lru(struct radeon_device *rdev, struct radeon_vm *vm); +struct radeon_bo_list *radeon_vm_get_bos(struct radeon_device *rdev, + struct radeon_vm *vm, + struct list_head *head); struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, struct radeon_vm *vm, int ring); void radeon_vm_flush(struct radeon_device *rdev, @@ -2811,6 +2816,8 @@ void radeon_vm_fence(struct radeon_device *rdev, struct radeon_vm *vm, struct radeon_fence *fence); uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr); +int radeon_vm_update_page_directory(struct radeon_device *rdev, + struct radeon_vm *vm); int radeon_vm_bo_update(struct radeon_device *rdev, struct radeon_vm *vm, struct radeon_bo *bo, diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index f92df2e8ebdd..420c28dd6a1c 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -168,6 +168,10 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) radeon_cs_buckets_get_list(&buckets, &p->validated); + if (p->cs_flags & RADEON_CS_USE_VM) + p->vm_bos = radeon_vm_get_bos(p->rdev, p->ib.vm, + &p->validated); + return radeon_bo_list_validate(p->rdev, &p->ticket, &p->validated, p->ring); } @@ -401,6 +405,7 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bo kfree(parser->track); kfree(parser->relocs); kfree(parser->relocs_ptr); + kfree(parser->vm_bos); for (i = 0; i < parser->nchunks; i++) drm_free_large(parser->chunks[i].kdata); kfree(parser->chunks); @@ -440,24 +445,32 @@ static int radeon_cs_ib_chunk(struct radeon_device *rdev, return r; } -static int radeon_bo_vm_update_pte(struct radeon_cs_parser *parser, +static int radeon_bo_vm_update_pte(struct radeon_cs_parser *p, struct radeon_vm *vm) { - struct radeon_device *rdev = parser->rdev; - struct radeon_bo_list *lobj; - struct radeon_bo *bo; - int r; + struct radeon_device *rdev = p->rdev; + int i, r; - r = radeon_vm_bo_update(rdev, vm, rdev->ring_tmp_bo.bo, &rdev->ring_tmp_bo.bo->tbo.mem); - if (r) { + r = radeon_vm_update_page_directory(rdev, vm); + if (r) return r; - } - list_for_each_entry(lobj, &parser->validated, tv.head) { - bo = lobj->bo; - r = radeon_vm_bo_update(parser->rdev, vm, bo, &bo->tbo.mem); - if (r) { + + r = radeon_vm_bo_update(rdev, vm, rdev->ring_tmp_bo.bo, + &rdev->ring_tmp_bo.bo->tbo.mem); + if (r) + return r; + + for (i = 0; i < p->nrelocs; i++) { + struct radeon_bo *bo; + + /* ignore duplicates */ + if (p->relocs_ptr[i] != &p->relocs[i]) + continue; + + bo = p->relocs[i].robj; + r = radeon_vm_bo_update(rdev, vm, bo, &bo->tbo.mem); + if (r) return r; - } } return 0; } @@ -491,10 +504,6 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, mutex_lock(&rdev->vm_manager.lock); mutex_lock(&vm->mutex); - r = radeon_vm_alloc_pt(rdev, vm); - if (r) { - goto out; - } r = radeon_bo_vm_update_pte(parser, vm); if (r) { goto out; @@ -512,7 +521,6 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, } out: - radeon_vm_add_to_lru(rdev, vm); mutex_unlock(&vm->mutex); mutex_unlock(&rdev->vm_manager.lock); return r; diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index fa7841b9d7b6..e58dbabd7d7d 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1198,7 +1198,6 @@ int radeon_device_init(struct radeon_device *rdev, * Max GPUVM size for cayman and SI is 40 bits. */ rdev->vm_manager.max_pfn = 1 << 20; - INIT_LIST_HEAD(&rdev->vm_manager.lru_vm); /* Set asic functions */ r = radeon_asic_init(rdev); diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index 7a810d0796a0..37b1deacb5b1 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -559,7 +559,9 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) return -ENOMEM; } - radeon_vm_init(rdev, &fpriv->vm); + r = radeon_vm_init(rdev, &fpriv->vm); + if (r) + return r; r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false); if (r) diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index 516017689793..44b6918a87ba 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -84,84 +84,18 @@ static unsigned radeon_vm_directory_size(struct radeon_device *rdev) */ int radeon_vm_manager_init(struct radeon_device *rdev) { - struct radeon_vm *vm; - struct radeon_bo_va *bo_va; int r; - unsigned size; if (!rdev->vm_manager.enabled) { - /* allocate enough for 2 full VM pts */ - size = radeon_vm_directory_size(rdev); - size += rdev->vm_manager.max_pfn * 8; - size *= 2; - r = radeon_sa_bo_manager_init(rdev, &rdev->vm_manager.sa_manager, - RADEON_GPU_PAGE_ALIGN(size), - RADEON_VM_PTB_ALIGN_SIZE, - RADEON_GEM_DOMAIN_VRAM); - if (r) { - dev_err(rdev->dev, "failed to allocate vm bo (%dKB)\n", - (rdev->vm_manager.max_pfn * 8) >> 10); - return r; - } - r = radeon_asic_vm_init(rdev); if (r) return r; rdev->vm_manager.enabled = true; - - r = radeon_sa_bo_manager_start(rdev, &rdev->vm_manager.sa_manager); - if (r) - return r; - } - - /* restore page table */ - list_for_each_entry(vm, &rdev->vm_manager.lru_vm, list) { - if (vm->page_directory == NULL) - continue; - - list_for_each_entry(bo_va, &vm->va, vm_list) { - bo_va->valid = false; - } } return 0; } -/** - * radeon_vm_free_pt - free the page table for a specific vm - * - * @rdev: radeon_device pointer - * @vm: vm to unbind - * - * Free the page table of a specific vm (cayman+). - * - * Global and local mutex must be lock! - */ -static void radeon_vm_free_pt(struct radeon_device *rdev, - struct radeon_vm *vm) -{ - struct radeon_bo_va *bo_va; - int i; - - if (!vm->page_directory) - return; - - list_del_init(&vm->list); - radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); - - list_for_each_entry(bo_va, &vm->va, vm_list) { - bo_va->valid = false; - } - - if (vm->page_tables == NULL) - return; - - for (i = 0; i < radeon_vm_num_pdes(rdev); i++) - radeon_sa_bo_free(rdev, &vm->page_tables[i], vm->fence); - - kfree(vm->page_tables); -} - /** * radeon_vm_manager_fini - tear down the vm manager * @@ -171,155 +105,59 @@ static void radeon_vm_free_pt(struct radeon_device *rdev, */ void radeon_vm_manager_fini(struct radeon_device *rdev) { - struct radeon_vm *vm, *tmp; int i; if (!rdev->vm_manager.enabled) return; mutex_lock(&rdev->vm_manager.lock); - /* free all allocated page tables */ - list_for_each_entry_safe(vm, tmp, &rdev->vm_manager.lru_vm, list) { - mutex_lock(&vm->mutex); - radeon_vm_free_pt(rdev, vm); - mutex_unlock(&vm->mutex); - } - for (i = 0; i < RADEON_NUM_VM; ++i) { + for (i = 0; i < RADEON_NUM_VM; ++i) radeon_fence_unref(&rdev->vm_manager.active[i]); - } radeon_asic_vm_fini(rdev); - mutex_unlock(&rdev->vm_manager.lock); - - radeon_sa_bo_manager_suspend(rdev, &rdev->vm_manager.sa_manager); - radeon_sa_bo_manager_fini(rdev, &rdev->vm_manager.sa_manager); rdev->vm_manager.enabled = false; + mutex_unlock(&rdev->vm_manager.lock); } /** - * radeon_vm_evict - evict page table to make room for new one - * - * @rdev: radeon_device pointer - * @vm: VM we want to allocate something for + * radeon_vm_get_bos - add the vm BOs to a validation list * - * Evict a VM from the lru, making sure that it isn't @vm. (cayman+). - * Returns 0 for success, -ENOMEM for failure. + * @vm: vm providing the BOs + * @head: head of validation list * - * Global and local mutex must be locked! + * Add the page directory to the list of BOs to + * validate for command submission (cayman+). */ -static int radeon_vm_evict(struct radeon_device *rdev, struct radeon_vm *vm) +struct radeon_bo_list *radeon_vm_get_bos(struct radeon_device *rdev, + struct radeon_vm *vm, + struct list_head *head) { - struct radeon_vm *vm_evict; + struct radeon_bo_list *list; + unsigned i, idx, size; - if (list_empty(&rdev->vm_manager.lru_vm)) - return -ENOMEM; - - vm_evict = list_first_entry(&rdev->vm_manager.lru_vm, - struct radeon_vm, list); - if (vm_evict == vm) - return -ENOMEM; - - mutex_lock(&vm_evict->mutex); - radeon_vm_free_pt(rdev, vm_evict); - mutex_unlock(&vm_evict->mutex); - return 0; -} - -/** - * radeon_vm_alloc_pt - allocates a page table for a VM - * - * @rdev: radeon_device pointer - * @vm: vm to bind - * - * Allocate a page table for the requested vm (cayman+). - * Returns 0 for success, error for failure. - * - * Global and local mutex must be locked! - */ -int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm) -{ - unsigned pd_size, pd_entries, pts_size; - struct radeon_ib ib; - int r; - - if (vm == NULL) { - return -EINVAL; - } - - if (vm->page_directory != NULL) { - return 0; - } - - pd_size = radeon_vm_directory_size(rdev); - pd_entries = radeon_vm_num_pdes(rdev); - -retry: - r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, - &vm->page_directory, pd_size, - RADEON_VM_PTB_ALIGN_SIZE, false); - if (r == -ENOMEM) { - r = radeon_vm_evict(rdev, vm); - if (r) - return r; - goto retry; - - } else if (r) { - return r; - } - - vm->pd_gpu_addr = radeon_sa_bo_gpu_addr(vm->page_directory); - - /* Initially clear the page directory */ - r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, - NULL, pd_entries * 2 + 64); - if (r) { - radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); - return r; - } - - ib.length_dw = 0; + size = (radeon_vm_num_pdes(rdev) + 1) * sizeof(struct radeon_bo_list); + list = kmalloc(size, GFP_KERNEL); + if (!list) + return NULL; - radeon_asic_vm_set_page(rdev, &ib, vm->pd_gpu_addr, - 0, pd_entries, 0, 0); + /* add the vm page table to the list */ + list[0].bo = vm->page_directory; + list[0].domain = RADEON_GEM_DOMAIN_VRAM; + list[0].alt_domain = RADEON_GEM_DOMAIN_VRAM; + list[0].tv.bo = &vm->page_directory->tbo; + list_add(&list[0].tv.head, head); - radeon_semaphore_sync_to(ib.semaphore, vm->fence); - r = radeon_ib_schedule(rdev, &ib, NULL); - if (r) { - radeon_ib_free(rdev, &ib); - radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); - return r; - } - radeon_fence_unref(&vm->fence); - vm->fence = radeon_fence_ref(ib.fence); - radeon_ib_free(rdev, &ib); - radeon_fence_unref(&vm->last_flush); - - /* allocate page table array */ - pts_size = radeon_vm_num_pdes(rdev) * sizeof(struct radeon_sa_bo *); - vm->page_tables = kzalloc(pts_size, GFP_KERNEL); + for (i = 0, idx = 1; i <= vm->max_pde_used; i++) { + if (!vm->page_tables[i].bo) + continue; - if (vm->page_tables == NULL) { - DRM_ERROR("Cannot allocate memory for page table array\n"); - radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); - return -ENOMEM; + list[idx].bo = vm->page_tables[i].bo; + list[idx].domain = RADEON_GEM_DOMAIN_VRAM; + list[idx].alt_domain = RADEON_GEM_DOMAIN_VRAM; + list[idx].tv.bo = &list[idx].bo->tbo; + list_add(&list[idx++].tv.head, head); } - return 0; -} - -/** - * radeon_vm_add_to_lru - add VMs page table to LRU list - * - * @rdev: radeon_device pointer - * @vm: vm to add to LRU - * - * Add the allocated page table to the LRU list (cayman+). - * - * Global mutex must be locked! - */ -void radeon_vm_add_to_lru(struct radeon_device *rdev, struct radeon_vm *vm) -{ - list_del_init(&vm->list); - list_add_tail(&vm->list, &rdev->vm_manager.lru_vm); + return list; } /** @@ -393,10 +231,14 @@ void radeon_vm_flush(struct radeon_device *rdev, struct radeon_vm *vm, int ring) { + uint64_t pd_addr = radeon_bo_gpu_offset(vm->page_directory); + /* if we can't remember our last VM flush then flush now! */ /* XXX figure out why we have to flush all the time */ - if (!vm->last_flush || true) + if (!vm->last_flush || true || pd_addr != vm->pd_gpu_addr) { + vm->pd_gpu_addr = pd_addr; radeon_ring_vm_flush(rdev, ring, vm); + } } /** @@ -495,6 +337,63 @@ struct radeon_bo_va *radeon_vm_bo_add(struct radeon_device *rdev, return bo_va; } +/** + * radeon_vm_clear_bo - initially clear the page dir/table + * + * @rdev: radeon_device pointer + * @bo: bo to clear + */ +static int radeon_vm_clear_bo(struct radeon_device *rdev, + struct radeon_bo *bo) +{ + struct ttm_validate_buffer tv; + struct ww_acquire_ctx ticket; + struct list_head head; + struct radeon_ib ib; + unsigned entries; + uint64_t addr; + int r; + + memset(&tv, 0, sizeof(tv)); + tv.bo = &bo->tbo; + + INIT_LIST_HEAD(&head); + list_add(&tv.head, &head); + + r = ttm_eu_reserve_buffers(&ticket, &head); + if (r) + return r; + + r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); + if (r) + goto error; + + addr = radeon_bo_gpu_offset(bo); + entries = radeon_bo_size(bo) / 8; + + r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, + NULL, entries * 2 + 64); + if (r) + goto error; + + ib.length_dw = 0; + + radeon_asic_vm_set_page(rdev, &ib, addr, 0, entries, 0, 0); + + r = radeon_ib_schedule(rdev, &ib, NULL); + if (r) + goto error; + + ttm_eu_fence_buffer_objects(&ticket, &head, ib.fence); + radeon_ib_free(rdev, &ib); + + return 0; + +error: + ttm_eu_backoff_reservation(&ticket, &head); + return r; +} + /** * radeon_vm_bo_set_addr - set bos virtual address inside a vm * @@ -519,7 +418,8 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, struct radeon_vm *vm = bo_va->vm; struct radeon_bo_va *tmp; struct list_head *head; - unsigned last_pfn; + unsigned last_pfn, pt_idx; + int r; if (soffset) { /* make sure object fit at this offset */ @@ -570,8 +470,53 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, bo_va->valid = false; list_move(&bo_va->vm_list, head); + soffset = (soffset / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE; + eoffset = (eoffset / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE; + + if (eoffset > vm->max_pde_used) + vm->max_pde_used = eoffset; + + radeon_bo_unreserve(bo_va->bo); + + /* walk over the address space and allocate the page tables */ + for (pt_idx = soffset; pt_idx <= eoffset; ++pt_idx) { + struct radeon_bo *pt; + + if (vm->page_tables[pt_idx].bo) + continue; + + /* drop mutex to allocate and clear page table */ + mutex_unlock(&vm->mutex); + + r = radeon_bo_create(rdev, RADEON_VM_PTE_COUNT * 8, + RADEON_GPU_PAGE_SIZE, false, + RADEON_GEM_DOMAIN_VRAM, NULL, &pt); + if (r) + return r; + + r = radeon_vm_clear_bo(rdev, pt); + if (r) { + radeon_bo_unref(&pt); + radeon_bo_reserve(bo_va->bo, false); + return r; + } + + /* aquire mutex again */ + mutex_lock(&vm->mutex); + if (vm->page_tables[pt_idx].bo) { + /* someone else allocated the pt in the meantime */ + mutex_unlock(&vm->mutex); + radeon_bo_unref(&pt); + mutex_lock(&vm->mutex); + continue; + } + + vm->page_tables[pt_idx].addr = 0; + vm->page_tables[pt_idx].bo = pt; + } + mutex_unlock(&vm->mutex); - return 0; + return radeon_bo_reserve(bo_va->bo, false); } /** @@ -631,58 +576,53 @@ static uint32_t radeon_vm_page_flags(uint32_t flags) * * Global and local mutex must be locked! */ -static int radeon_vm_update_pdes(struct radeon_device *rdev, - struct radeon_vm *vm, - struct radeon_ib *ib, - uint64_t start, uint64_t end) +int radeon_vm_update_page_directory(struct radeon_device *rdev, + struct radeon_vm *vm) { static const uint32_t incr = RADEON_VM_PTE_COUNT * 8; + uint64_t pd_addr = radeon_bo_gpu_offset(vm->page_directory); uint64_t last_pde = ~0, last_pt = ~0; - unsigned count = 0; - uint64_t pt_idx; + unsigned count = 0, pt_idx, ndw; + struct radeon_ib ib; int r; - start = (start / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE; - end = (end / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE; + /* padding, etc. */ + ndw = 64; + + /* assume the worst case */ + ndw += vm->max_pde_used * 12; + + /* update too big for an IB */ + if (ndw > 0xfffff) + return -ENOMEM; + + r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, ndw * 4); + if (r) + return r; + ib.length_dw = 0; /* walk over the address space and update the page directory */ - for (pt_idx = start; pt_idx <= end; ++pt_idx) { + for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) { + struct radeon_bo *bo = vm->page_tables[pt_idx].bo; uint64_t pde, pt; - if (vm->page_tables[pt_idx]) + if (bo == NULL) continue; -retry: - r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, - &vm->page_tables[pt_idx], - RADEON_VM_PTE_COUNT * 8, - RADEON_GPU_PAGE_SIZE, false); - - if (r == -ENOMEM) { - r = radeon_vm_evict(rdev, vm); - if (r) - return r; - goto retry; - } else if (r) { - return r; - } - - pde = vm->pd_gpu_addr + pt_idx * 8; - - pt = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]); + pt = radeon_bo_gpu_offset(bo); + if (vm->page_tables[pt_idx].addr == pt) + continue; + vm->page_tables[pt_idx].addr = pt; + pde = pd_addr + pt_idx * 8; if (((last_pde + 8 * count) != pde) || ((last_pt + incr * count) != pt)) { if (count) { - radeon_asic_vm_set_page(rdev, ib, last_pde, + radeon_asic_vm_set_page(rdev, &ib, last_pde, last_pt, count, incr, R600_PTE_VALID); - - count *= RADEON_VM_PTE_COUNT; - radeon_asic_vm_set_page(rdev, ib, last_pt, 0, - count, 0, 0); } count = 1; @@ -693,14 +633,22 @@ retry: } } - if (count) { - radeon_asic_vm_set_page(rdev, ib, last_pde, last_pt, count, + if (count) + radeon_asic_vm_set_page(rdev, &ib, last_pde, last_pt, count, incr, R600_PTE_VALID); - count *= RADEON_VM_PTE_COUNT; - radeon_asic_vm_set_page(rdev, ib, last_pt, 0, - count, 0, 0); + if (ib.length_dw != 0) { + radeon_semaphore_sync_to(ib.semaphore, vm->last_id_use); + r = radeon_ib_schedule(rdev, &ib, NULL); + if (r) { + radeon_ib_free(rdev, &ib); + return r; + } + radeon_fence_unref(&vm->fence); + vm->fence = radeon_fence_ref(ib.fence); + radeon_fence_unref(&vm->last_flush); } + radeon_ib_free(rdev, &ib); return 0; } @@ -745,7 +693,7 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev, else nptes = RADEON_VM_PTE_COUNT - (addr & mask); - pte = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]); + pte = radeon_bo_gpu_offset(vm->page_tables[pt_idx].bo); pte += (addr & mask) * 8; if ((last_pte + 8 * count) != pte) { @@ -795,14 +743,10 @@ int radeon_vm_bo_update(struct radeon_device *rdev, { struct radeon_ib ib; struct radeon_bo_va *bo_va; - unsigned nptes, npdes, ndw; + unsigned nptes, ndw; uint64_t addr; int r; - /* nothing to do if vm isn't bound */ - if (vm->page_directory == NULL) - return 0; - bo_va = radeon_vm_bo_find(vm, bo); if (bo_va == NULL) { dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm); @@ -840,9 +784,6 @@ int radeon_vm_bo_update(struct radeon_device *rdev, nptes = radeon_bo_ngpu_pages(bo); - /* assume two extra pdes in case the mapping overlaps the borders */ - npdes = (nptes >> RADEON_VM_BLOCK_SIZE) + 2; - /* padding, etc. */ ndw = 64; @@ -857,15 +798,6 @@ int radeon_vm_bo_update(struct radeon_device *rdev, /* reserve space for pte addresses */ ndw += nptes * 2; - /* reserve space for one header for every 2k dwords */ - ndw += (npdes >> 11) * 4; - - /* reserve space for pde addresses */ - ndw += npdes * 2; - - /* reserve space for clearing new page tables */ - ndw += npdes * 2 * RADEON_VM_PTE_COUNT; - /* update too big for an IB */ if (ndw > 0xfffff) return -ENOMEM; @@ -875,12 +807,6 @@ int radeon_vm_bo_update(struct radeon_device *rdev, return r; ib.length_dw = 0; - r = radeon_vm_update_pdes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset); - if (r) { - radeon_ib_free(rdev, &ib); - return r; - } - radeon_vm_update_ptes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset, addr, radeon_vm_page_flags(bo_va->flags)); @@ -957,15 +883,43 @@ void radeon_vm_bo_invalidate(struct radeon_device *rdev, * * Init @vm fields (cayman+). */ -void radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) +int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) { + unsigned pd_size, pd_entries, pts_size; + int r; + vm->id = 0; vm->fence = NULL; vm->last_flush = NULL; vm->last_id_use = NULL; mutex_init(&vm->mutex); - INIT_LIST_HEAD(&vm->list); INIT_LIST_HEAD(&vm->va); + + pd_size = radeon_vm_directory_size(rdev); + pd_entries = radeon_vm_num_pdes(rdev); + + /* allocate page table array */ + pts_size = pd_entries * sizeof(struct radeon_vm_pt); + vm->page_tables = kzalloc(pts_size, GFP_KERNEL); + if (vm->page_tables == NULL) { + DRM_ERROR("Cannot allocate memory for page table array\n"); + return -ENOMEM; + } + + r = radeon_bo_create(rdev, pd_size, RADEON_VM_PTB_ALIGN_SIZE, false, + RADEON_GEM_DOMAIN_VRAM, NULL, + &vm->page_directory); + if (r) + return r; + + r = radeon_vm_clear_bo(rdev, vm->page_directory); + if (r) { + radeon_bo_unref(&vm->page_directory); + vm->page_directory = NULL; + return r; + } + + return 0; } /** @@ -980,12 +934,7 @@ void radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) { struct radeon_bo_va *bo_va, *tmp; - int r; - - mutex_lock(&rdev->vm_manager.lock); - mutex_lock(&vm->mutex); - radeon_vm_free_pt(rdev, vm); - mutex_unlock(&rdev->vm_manager.lock); + int i, r; if (!list_empty(&vm->va)) { dev_err(rdev->dev, "still active bo inside vm\n"); @@ -999,8 +948,17 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) kfree(bo_va); } } + + + for (i = 0; i < radeon_vm_num_pdes(rdev); i++) + radeon_bo_unref(&vm->page_tables[i].bo); + kfree(vm->page_tables); + + radeon_bo_unref(&vm->page_directory); + radeon_fence_unref(&vm->fence); radeon_fence_unref(&vm->last_flush); radeon_fence_unref(&vm->last_id_use); - mutex_unlock(&vm->mutex); + + mutex_destroy(&vm->mutex); } -- cgit From 529364e05bc093dc41b0c9e67e94ac82442c1b35 Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 20 Feb 2014 19:33:15 +0100 Subject: drm/radeon: remove global vm lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Not needed any more. Signed-off-by: Christian König Reviewed-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon.h | 1 - drivers/gpu/drm/radeon/radeon_cs.c | 4 ---- drivers/gpu/drm/radeon/radeon_device.c | 3 +-- drivers/gpu/drm/radeon/radeon_ring.c | 7 +++++++ drivers/gpu/drm/radeon/radeon_vm.c | 10 +++------- 5 files changed, 11 insertions(+), 14 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index c31e3c2c1b3a..cd6a48099547 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -884,7 +884,6 @@ struct radeon_vm { }; struct radeon_vm_manager { - struct mutex lock; struct radeon_fence *active[RADEON_NUM_VM]; uint32_t max_pfn; /* number of VMIDs */ diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 420c28dd6a1c..0570e7675fd3 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -502,7 +502,6 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, if (parser->ring == R600_RING_TYPE_UVD_INDEX) radeon_uvd_note_usage(rdev); - mutex_lock(&rdev->vm_manager.lock); mutex_lock(&vm->mutex); r = radeon_bo_vm_update_pte(parser, vm); if (r) { @@ -510,8 +509,6 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, } radeon_cs_sync_rings(parser); radeon_semaphore_sync_to(parser->ib.semaphore, vm->fence); - radeon_semaphore_sync_to(parser->ib.semaphore, - radeon_vm_grab_id(rdev, vm, parser->ring)); if ((rdev->family >= CHIP_TAHITI) && (parser->chunk_const_ib_idx != -1)) { @@ -522,7 +519,6 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, out: mutex_unlock(&vm->mutex); - mutex_unlock(&rdev->vm_manager.lock); return r; } diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index e58dbabd7d7d..7db44de90d6c 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1191,8 +1191,7 @@ int radeon_device_init(struct radeon_device *rdev, r = radeon_gem_init(rdev); if (r) return r; - /* initialize vm here */ - mutex_init(&rdev->vm_manager.lock); + /* Adjust VM size here. * Currently set to 4GB ((1 << 20) 4k pages). * Max GPUVM size for cayman and SI is 40 bits. diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 665591a7faad..5321e249021f 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -145,6 +145,13 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, return r; } + /* grab a vm id if necessary */ + if (ib->vm) { + struct radeon_fence *vm_id_fence; + vm_id_fence = radeon_vm_grab_id(rdev, ib->vm, ib->ring); + radeon_semaphore_sync_to(ib->semaphore, vm_id_fence); + } + /* sync with other rings */ r = radeon_semaphore_sync_rings(rdev, ib->semaphore, ib->ring); if (r) { diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index 44b6918a87ba..81d91b513ce1 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -110,12 +110,10 @@ void radeon_vm_manager_fini(struct radeon_device *rdev) if (!rdev->vm_manager.enabled) return; - mutex_lock(&rdev->vm_manager.lock); for (i = 0; i < RADEON_NUM_VM; ++i) radeon_fence_unref(&rdev->vm_manager.active[i]); radeon_asic_vm_fini(rdev); rdev->vm_manager.enabled = false; - mutex_unlock(&rdev->vm_manager.lock); } /** @@ -734,7 +732,7 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev, * Fill in the page table entries for @bo (cayman+). * Returns 0 for success, -EINVAL for failure. * - * Object have to be reserved & global and local mutex must be locked! + * Object have to be reserved and mutex must be locked! */ int radeon_vm_bo_update(struct radeon_device *rdev, struct radeon_vm *vm, @@ -842,12 +840,10 @@ int radeon_vm_bo_rmv(struct radeon_device *rdev, { int r = 0; - mutex_lock(&rdev->vm_manager.lock); mutex_lock(&bo_va->vm->mutex); - if (bo_va->soffset) { + if (bo_va->soffset) r = radeon_vm_bo_update(rdev, bo_va->vm, bo_va->bo, NULL); - } - mutex_unlock(&rdev->vm_manager.lock); + list_del(&bo_va->vm_list); mutex_unlock(&bo_va->vm->mutex); list_del(&bo_va->bo_list); -- cgit From 4d1526466296360f56f93c195848c1202b0cc10b Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 20 Feb 2014 21:48:00 +0100 Subject: drm/radeon: drop non blocking allocations from sub allocator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Not needed any more. Signed-off-by: Christian König --- drivers/gpu/drm/radeon/radeon_object.h | 2 +- drivers/gpu/drm/radeon/radeon_ring.c | 2 +- drivers/gpu/drm/radeon/radeon_sa.c | 7 ++----- drivers/gpu/drm/radeon/radeon_semaphore.c | 2 +- 4 files changed, 5 insertions(+), 8 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h index 7dff64d1f592..9e7b25a0629d 100644 --- a/drivers/gpu/drm/radeon/radeon_object.h +++ b/drivers/gpu/drm/radeon/radeon_object.h @@ -180,7 +180,7 @@ extern int radeon_sa_bo_manager_suspend(struct radeon_device *rdev, extern int radeon_sa_bo_new(struct radeon_device *rdev, struct radeon_sa_manager *sa_manager, struct radeon_sa_bo **sa_bo, - unsigned size, unsigned align, bool block); + unsigned size, unsigned align); extern void radeon_sa_bo_free(struct radeon_device *rdev, struct radeon_sa_bo **sa_bo, struct radeon_fence *fence); diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 5321e249021f..8b0dfdd23793 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -63,7 +63,7 @@ int radeon_ib_get(struct radeon_device *rdev, int ring, { int r; - r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, &ib->sa_bo, size, 256, true); + r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, &ib->sa_bo, size, 256); if (r) { dev_err(rdev->dev, "failed to get a new IB (%d)\n", r); return r; diff --git a/drivers/gpu/drm/radeon/radeon_sa.c b/drivers/gpu/drm/radeon/radeon_sa.c index c0625805cdd7..adcf3e2f07da 100644 --- a/drivers/gpu/drm/radeon/radeon_sa.c +++ b/drivers/gpu/drm/radeon/radeon_sa.c @@ -312,7 +312,7 @@ static bool radeon_sa_bo_next_hole(struct radeon_sa_manager *sa_manager, int radeon_sa_bo_new(struct radeon_device *rdev, struct radeon_sa_manager *sa_manager, struct radeon_sa_bo **sa_bo, - unsigned size, unsigned align, bool block) + unsigned size, unsigned align) { struct radeon_fence *fences[RADEON_NUM_RINGS]; unsigned tries[RADEON_NUM_RINGS]; @@ -353,14 +353,11 @@ int radeon_sa_bo_new(struct radeon_device *rdev, r = radeon_fence_wait_any(rdev, fences, false); spin_lock(&sa_manager->wq.lock); /* if we have nothing to wait for block */ - if (r == -ENOENT && block) { + if (r == -ENOENT) { r = wait_event_interruptible_locked( sa_manager->wq, radeon_sa_event(sa_manager, size, align) ); - - } else if (r == -ENOENT) { - r = -ENOMEM; } } while (!r); diff --git a/drivers/gpu/drm/radeon/radeon_semaphore.c b/drivers/gpu/drm/radeon/radeon_semaphore.c index 6140af6772c7..dbd6bcde92de 100644 --- a/drivers/gpu/drm/radeon/radeon_semaphore.c +++ b/drivers/gpu/drm/radeon/radeon_semaphore.c @@ -42,7 +42,7 @@ int radeon_semaphore_create(struct radeon_device *rdev, return -ENOMEM; } r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, &(*semaphore)->sa_bo, - 8 * RADEON_NUM_SYNCS, 8, true); + 8 * RADEON_NUM_SYNCS, 8); if (r) { kfree(*semaphore); *semaphore = NULL; -- cgit From df0af4403aa8df728a62ccb62a61b3244871068f Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 3 Mar 2014 12:38:08 +0100 Subject: drm/radeon: remove struct radeon_bo_list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Just move all fields into radeon_cs_reloc, removing unused/duplicated fields. Signed-off-by: Christian König Reviewed-by: Alex Deucher --- drivers/gpu/drm/radeon/evergreen_cs.c | 210 ++++++++++++++++----------------- drivers/gpu/drm/radeon/r100.c | 40 +++---- drivers/gpu/drm/radeon/r200.c | 20 ++-- drivers/gpu/drm/radeon/r300.c | 32 ++--- drivers/gpu/drm/radeon/r600_cs.c | 110 ++++++++--------- drivers/gpu/drm/radeon/radeon.h | 24 ++-- drivers/gpu/drm/radeon/radeon_cs.c | 23 ++-- drivers/gpu/drm/radeon/radeon_object.c | 4 +- drivers/gpu/drm/radeon/radeon_uvd.c | 2 +- drivers/gpu/drm/radeon/radeon_vce.c | 2 +- drivers/gpu/drm/radeon/radeon_vm.c | 22 ++-- 11 files changed, 244 insertions(+), 245 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c index c7cac07f139b..5c8b358f9fba 100644 --- a/drivers/gpu/drm/radeon/evergreen_cs.c +++ b/drivers/gpu/drm/radeon/evergreen_cs.c @@ -1165,7 +1165,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) "0x%04X\n", reg); return -EINVAL; } - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); break; case DB_DEPTH_CONTROL: track->db_depth_control = radeon_get_ib_value(p, idx); @@ -1196,12 +1196,12 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) } ib[idx] &= ~Z_ARRAY_MODE(0xf); track->db_z_info &= ~Z_ARRAY_MODE(0xf); - ib[idx] |= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags)); - track->db_z_info |= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags)); - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) { + ib[idx] |= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags)); + track->db_z_info |= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags)); + if (reloc->tiling_flags & RADEON_TILING_MACRO) { unsigned bankw, bankh, mtaspect, tile_split; - evergreen_tiling_fields(reloc->lobj.tiling_flags, + evergreen_tiling_fields(reloc->tiling_flags, &bankw, &bankh, &mtaspect, &tile_split); ib[idx] |= DB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks)); @@ -1237,7 +1237,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) return -EINVAL; } track->db_z_read_offset = radeon_get_ib_value(p, idx); - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); track->db_z_read_bo = reloc->robj; track->db_dirty = true; break; @@ -1249,7 +1249,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) return -EINVAL; } track->db_z_write_offset = radeon_get_ib_value(p, idx); - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); track->db_z_write_bo = reloc->robj; track->db_dirty = true; break; @@ -1261,7 +1261,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) return -EINVAL; } track->db_s_read_offset = radeon_get_ib_value(p, idx); - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); track->db_s_read_bo = reloc->robj; track->db_dirty = true; break; @@ -1273,7 +1273,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) return -EINVAL; } track->db_s_write_offset = radeon_get_ib_value(p, idx); - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); track->db_s_write_bo = reloc->robj; track->db_dirty = true; break; @@ -1297,7 +1297,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) } tmp = (reg - VGT_STRMOUT_BUFFER_BASE_0) / 16; track->vgt_strmout_bo_offset[tmp] = radeon_get_ib_value(p, idx) << 8; - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); track->vgt_strmout_bo[tmp] = reloc->robj; track->streamout_dirty = true; break; @@ -1317,7 +1317,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) "0x%04X\n", reg); return -EINVAL; } - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); case CB_TARGET_MASK: track->cb_target_mask = radeon_get_ib_value(p, idx); track->cb_dirty = true; @@ -1381,8 +1381,8 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) "0x%04X\n", reg); return -EINVAL; } - ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags)); - track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags)); + ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags)); + track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags)); } track->cb_dirty = true; break; @@ -1399,8 +1399,8 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) "0x%04X\n", reg); return -EINVAL; } - ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags)); - track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags)); + ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags)); + track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags)); } track->cb_dirty = true; break; @@ -1461,10 +1461,10 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) return -EINVAL; } if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) { + if (reloc->tiling_flags & RADEON_TILING_MACRO) { unsigned bankw, bankh, mtaspect, tile_split; - evergreen_tiling_fields(reloc->lobj.tiling_flags, + evergreen_tiling_fields(reloc->tiling_flags, &bankw, &bankh, &mtaspect, &tile_split); ib[idx] |= CB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks)); @@ -1489,10 +1489,10 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) return -EINVAL; } if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) { + if (reloc->tiling_flags & RADEON_TILING_MACRO) { unsigned bankw, bankh, mtaspect, tile_split; - evergreen_tiling_fields(reloc->lobj.tiling_flags, + evergreen_tiling_fields(reloc->tiling_flags, &bankw, &bankh, &mtaspect, &tile_split); ib[idx] |= CB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks)); @@ -1520,7 +1520,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg); return -EINVAL; } - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); track->cb_color_fmask_bo[tmp] = reloc->robj; break; case CB_COLOR0_CMASK: @@ -1537,7 +1537,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg); return -EINVAL; } - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); track->cb_color_cmask_bo[tmp] = reloc->robj; break; case CB_COLOR0_FMASK_SLICE: @@ -1578,7 +1578,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) } tmp = (reg - CB_COLOR0_BASE) / 0x3c; track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx); - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); track->cb_color_bo[tmp] = reloc->robj; track->cb_dirty = true; break; @@ -1594,7 +1594,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) } tmp = ((reg - CB_COLOR8_BASE) / 0x1c) + 8; track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx); - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); track->cb_color_bo[tmp] = reloc->robj; track->cb_dirty = true; break; @@ -1606,7 +1606,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) return -EINVAL; } track->htile_offset = radeon_get_ib_value(p, idx); - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); track->htile_bo = reloc->robj; track->db_dirty = true; break; @@ -1723,7 +1723,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) "0x%04X\n", reg); return -EINVAL; } - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); break; case SX_MEMORY_EXPORT_BASE: if (p->rdev->family >= CHIP_CAYMAN) { @@ -1737,7 +1737,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) "0x%04X\n", reg); return -EINVAL; } - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); break; case CAYMAN_SX_SCATTER_EXPORT_BASE: if (p->rdev->family < CHIP_CAYMAN) { @@ -1751,7 +1751,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) "0x%04X\n", reg); return -EINVAL; } - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); break; case SX_MISC: track->sx_misc_kill_all_prims = (radeon_get_ib_value(p, idx) & 0x1) != 0; @@ -1836,7 +1836,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, return -EINVAL; } - offset = reloc->lobj.gpu_offset + + offset = reloc->gpu_offset + (idx_value & 0xfffffff0) + ((u64)(tmp & 0xff) << 32); @@ -1882,7 +1882,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, return -EINVAL; } - offset = reloc->lobj.gpu_offset + + offset = reloc->gpu_offset + idx_value + ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32); @@ -1909,7 +1909,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, return -EINVAL; } - offset = reloc->lobj.gpu_offset + + offset = reloc->gpu_offset + idx_value + ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32); @@ -1937,7 +1937,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, return -EINVAL; } - offset = reloc->lobj.gpu_offset + + offset = reloc->gpu_offset + radeon_get_ib_value(p, idx+1) + ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); @@ -2027,7 +2027,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, DRM_ERROR("bad DISPATCH_INDIRECT\n"); return -EINVAL; } - ib[idx+0] = idx_value + (u32)(reloc->lobj.gpu_offset & 0xffffffff); + ib[idx+0] = idx_value + (u32)(reloc->gpu_offset & 0xffffffff); r = evergreen_cs_track_check(p); if (r) { dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); @@ -2049,7 +2049,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, return -EINVAL; } - offset = reloc->lobj.gpu_offset + + offset = reloc->gpu_offset + (radeon_get_ib_value(p, idx+1) & 0xfffffffc) + ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); @@ -2106,7 +2106,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, tmp = radeon_get_ib_value(p, idx) + ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32); - offset = reloc->lobj.gpu_offset + tmp; + offset = reloc->gpu_offset + tmp; if ((tmp + size) > radeon_bo_size(reloc->robj)) { dev_warn(p->dev, "CP DMA src buffer too small (%llu %lu)\n", @@ -2144,7 +2144,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, tmp = radeon_get_ib_value(p, idx+2) + ((u64)(radeon_get_ib_value(p, idx+3) & 0xff) << 32); - offset = reloc->lobj.gpu_offset + tmp; + offset = reloc->gpu_offset + tmp; if ((tmp + size) > radeon_bo_size(reloc->robj)) { dev_warn(p->dev, "CP DMA dst buffer too small (%llu %lu)\n", @@ -2174,7 +2174,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, DRM_ERROR("bad SURFACE_SYNC\n"); return -EINVAL; } - ib[idx+2] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx+2] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); } break; case PACKET3_EVENT_WRITE: @@ -2190,7 +2190,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, DRM_ERROR("bad EVENT_WRITE\n"); return -EINVAL; } - offset = reloc->lobj.gpu_offset + + offset = reloc->gpu_offset + (radeon_get_ib_value(p, idx+1) & 0xfffffff8) + ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); @@ -2212,7 +2212,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, return -EINVAL; } - offset = reloc->lobj.gpu_offset + + offset = reloc->gpu_offset + (radeon_get_ib_value(p, idx+1) & 0xfffffffc) + ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); @@ -2234,7 +2234,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, return -EINVAL; } - offset = reloc->lobj.gpu_offset + + offset = reloc->gpu_offset + (radeon_get_ib_value(p, idx+1) & 0xfffffffc) + ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); @@ -2302,11 +2302,11 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, } if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { ib[idx+1+(i*8)+1] |= - TEX_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags)); - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) { + TEX_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags)); + if (reloc->tiling_flags & RADEON_TILING_MACRO) { unsigned bankw, bankh, mtaspect, tile_split; - evergreen_tiling_fields(reloc->lobj.tiling_flags, + evergreen_tiling_fields(reloc->tiling_flags, &bankw, &bankh, &mtaspect, &tile_split); ib[idx+1+(i*8)+6] |= TEX_TILE_SPLIT(tile_split); @@ -2318,7 +2318,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, } } texture = reloc->robj; - toffset = (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + toffset = (u32)((reloc->gpu_offset >> 8) & 0xffffffff); /* tex mip base */ tex_dim = ib[idx+1+(i*8)+0] & 0x7; @@ -2337,7 +2337,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, DRM_ERROR("bad SET_RESOURCE (tex)\n"); return -EINVAL; } - moffset = (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + moffset = (u32)((reloc->gpu_offset >> 8) & 0xffffffff); mipmap = reloc->robj; } @@ -2364,7 +2364,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, ib[idx+1+(i*8)+1] = radeon_bo_size(reloc->robj) - offset; } - offset64 = reloc->lobj.gpu_offset + offset; + offset64 = reloc->gpu_offset + offset; ib[idx+1+(i*8)+0] = offset64; ib[idx+1+(i*8)+2] = (ib[idx+1+(i*8)+2] & 0xffffff00) | (upper_32_bits(offset64) & 0xff); @@ -2445,7 +2445,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, offset + 4, radeon_bo_size(reloc->robj)); return -EINVAL; } - offset += reloc->lobj.gpu_offset; + offset += reloc->gpu_offset; ib[idx+1] = offset; ib[idx+2] = upper_32_bits(offset) & 0xff; } @@ -2464,7 +2464,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, offset + 4, radeon_bo_size(reloc->robj)); return -EINVAL; } - offset += reloc->lobj.gpu_offset; + offset += reloc->gpu_offset; ib[idx+3] = offset; ib[idx+4] = upper_32_bits(offset) & 0xff; } @@ -2493,7 +2493,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, offset + 8, radeon_bo_size(reloc->robj)); return -EINVAL; } - offset += reloc->lobj.gpu_offset; + offset += reloc->gpu_offset; ib[idx+0] = offset; ib[idx+1] = upper_32_bits(offset) & 0xff; break; @@ -2518,7 +2518,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, offset + 4, radeon_bo_size(reloc->robj)); return -EINVAL; } - offset += reloc->lobj.gpu_offset; + offset += reloc->gpu_offset; ib[idx+1] = offset; ib[idx+2] = upper_32_bits(offset) & 0xff; } else { @@ -2542,7 +2542,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, offset + 4, radeon_bo_size(reloc->robj)); return -EINVAL; } - offset += reloc->lobj.gpu_offset; + offset += reloc->gpu_offset; ib[idx+3] = offset; ib[idx+4] = upper_32_bits(offset) & 0xff; } else { @@ -2717,7 +2717,7 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) dst_offset = radeon_get_ib_value(p, idx+1); dst_offset <<= 8; - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); + ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8); p->idx += count + 7; break; /* linear */ @@ -2725,8 +2725,8 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) dst_offset = radeon_get_ib_value(p, idx+1); dst_offset |= ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32; - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+2] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; + ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); + ib[idx+2] += upper_32_bits(dst_reloc->gpu_offset) & 0xff; p->idx += count + 3; break; default: @@ -2768,10 +2768,10 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); return -EINVAL; } - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; - ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; + ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); + ib[idx+2] += (u32)(src_reloc->gpu_offset & 0xfffffffc); + ib[idx+3] += upper_32_bits(dst_reloc->gpu_offset) & 0xff; + ib[idx+4] += upper_32_bits(src_reloc->gpu_offset) & 0xff; p->idx += 5; break; /* Copy L2T/T2L */ @@ -2781,22 +2781,22 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) /* tiled src, linear dst */ src_offset = radeon_get_ib_value(p, idx+1); src_offset <<= 8; - ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8); + ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8); dst_offset = radeon_get_ib_value(p, idx + 7); dst_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32; - ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; + ib[idx+7] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); + ib[idx+8] += upper_32_bits(dst_reloc->gpu_offset) & 0xff; } else { /* linear src, tiled dst */ src_offset = radeon_get_ib_value(p, idx+7); src_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32; - ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; + ib[idx+7] += (u32)(src_reloc->gpu_offset & 0xfffffffc); + ib[idx+8] += upper_32_bits(src_reloc->gpu_offset) & 0xff; dst_offset = radeon_get_ib_value(p, idx+1); dst_offset <<= 8; - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); + ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8); } if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { dev_warn(p->dev, "DMA L2T, src buffer too small (%llu %lu)\n", @@ -2827,10 +2827,10 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) dst_offset + count, radeon_bo_size(dst_reloc->robj)); return -EINVAL; } - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xffffffff); - ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xffffffff); - ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; - ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; + ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xffffffff); + ib[idx+2] += (u32)(src_reloc->gpu_offset & 0xffffffff); + ib[idx+3] += upper_32_bits(dst_reloc->gpu_offset) & 0xff; + ib[idx+4] += upper_32_bits(src_reloc->gpu_offset) & 0xff; p->idx += 5; break; /* Copy L2L, partial */ @@ -2840,10 +2840,10 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) DRM_ERROR("L2L Partial is cayman only !\n"); return -EINVAL; } - ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset & 0xffffffff); - ib[idx+2] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; - ib[idx+4] += (u32)(dst_reloc->lobj.gpu_offset & 0xffffffff); - ib[idx+5] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; + ib[idx+1] += (u32)(src_reloc->gpu_offset & 0xffffffff); + ib[idx+2] += upper_32_bits(src_reloc->gpu_offset) & 0xff; + ib[idx+4] += (u32)(dst_reloc->gpu_offset & 0xffffffff); + ib[idx+5] += upper_32_bits(dst_reloc->gpu_offset) & 0xff; p->idx += 9; break; @@ -2876,12 +2876,12 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); return -EINVAL; } - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+3] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+4] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; - ib[idx+5] += upper_32_bits(dst2_reloc->lobj.gpu_offset) & 0xff; - ib[idx+6] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; + ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); + ib[idx+2] += (u32)(dst2_reloc->gpu_offset & 0xfffffffc); + ib[idx+3] += (u32)(src_reloc->gpu_offset & 0xfffffffc); + ib[idx+4] += upper_32_bits(dst_reloc->gpu_offset) & 0xff; + ib[idx+5] += upper_32_bits(dst2_reloc->gpu_offset) & 0xff; + ib[idx+6] += upper_32_bits(src_reloc->gpu_offset) & 0xff; p->idx += 7; break; /* Copy L2T Frame to Field */ @@ -2916,10 +2916,10 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); return -EINVAL; } - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); - ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8); - ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; + ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8); + ib[idx+2] += (u32)(dst2_reloc->gpu_offset >> 8); + ib[idx+8] += (u32)(src_reloc->gpu_offset & 0xfffffffc); + ib[idx+9] += upper_32_bits(src_reloc->gpu_offset) & 0xff; p->idx += 10; break; /* Copy L2T/T2L, partial */ @@ -2932,16 +2932,16 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) /* detile bit */ if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) { /* tiled src, linear dst */ - ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8); + ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8); - ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; + ib[idx+7] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); + ib[idx+8] += upper_32_bits(dst_reloc->gpu_offset) & 0xff; } else { /* linear src, tiled dst */ - ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; + ib[idx+7] += (u32)(src_reloc->gpu_offset & 0xfffffffc); + ib[idx+8] += upper_32_bits(src_reloc->gpu_offset) & 0xff; - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); + ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8); } p->idx += 12; break; @@ -2978,10 +2978,10 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); return -EINVAL; } - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); - ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8); - ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; + ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8); + ib[idx+2] += (u32)(dst2_reloc->gpu_offset >> 8); + ib[idx+8] += (u32)(src_reloc->gpu_offset & 0xfffffffc); + ib[idx+9] += upper_32_bits(src_reloc->gpu_offset) & 0xff; p->idx += 10; break; /* Copy L2T/T2L (tile units) */ @@ -2992,22 +2992,22 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) /* tiled src, linear dst */ src_offset = radeon_get_ib_value(p, idx+1); src_offset <<= 8; - ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8); + ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8); dst_offset = radeon_get_ib_value(p, idx+7); dst_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32; - ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; + ib[idx+7] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); + ib[idx+8] += upper_32_bits(dst_reloc->gpu_offset) & 0xff; } else { /* linear src, tiled dst */ src_offset = radeon_get_ib_value(p, idx+7); src_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32; - ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; + ib[idx+7] += (u32)(src_reloc->gpu_offset & 0xfffffffc); + ib[idx+8] += upper_32_bits(src_reloc->gpu_offset) & 0xff; dst_offset = radeon_get_ib_value(p, idx+1); dst_offset <<= 8; - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); + ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8); } if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { dev_warn(p->dev, "DMA L2T, T2L src buffer too small (%llu %lu)\n", @@ -3028,8 +3028,8 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) DRM_ERROR("L2T, T2L Partial is cayman only !\n"); return -EINVAL; } - ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8); - ib[idx+4] += (u32)(dst_reloc->lobj.gpu_offset >> 8); + ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8); + ib[idx+4] += (u32)(dst_reloc->gpu_offset >> 8); p->idx += 13; break; /* Copy L2T broadcast (tile units) */ @@ -3065,10 +3065,10 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); return -EINVAL; } - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); - ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8); - ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; + ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8); + ib[idx+2] += (u32)(dst2_reloc->gpu_offset >> 8); + ib[idx+8] += (u32)(src_reloc->gpu_offset & 0xfffffffc); + ib[idx+9] += upper_32_bits(src_reloc->gpu_offset) & 0xff; p->idx += 10; break; default: @@ -3089,8 +3089,8 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) dst_offset, radeon_bo_size(dst_reloc->robj)); return -EINVAL; } - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+3] += (upper_32_bits(dst_reloc->lobj.gpu_offset) << 16) & 0x00ff0000; + ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); + ib[idx+3] += (upper_32_bits(dst_reloc->gpu_offset) << 16) & 0x00ff0000; p->idx += 4; break; case DMA_PACKET_NOP: diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 1690a2dc0721..0a894aee7406 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -1274,12 +1274,12 @@ int r100_reloc_pitch_offset(struct radeon_cs_parser *p, value = radeon_get_ib_value(p, idx); tmp = value & 0x003fffff; - tmp += (((u32)reloc->lobj.gpu_offset) >> 10); + tmp += (((u32)reloc->gpu_offset) >> 10); if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) + if (reloc->tiling_flags & RADEON_TILING_MACRO) tile_flags |= RADEON_DST_TILE_MACRO; - if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) { + if (reloc->tiling_flags & RADEON_TILING_MICRO) { if (reg == RADEON_SRC_PITCH_OFFSET) { DRM_ERROR("Cannot src blit from microtiled surface\n"); radeon_cs_dump_packet(p, pkt); @@ -1325,7 +1325,7 @@ int r100_packet3_load_vbpntr(struct radeon_cs_parser *p, return r; } idx_value = radeon_get_ib_value(p, idx); - ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset); + ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->gpu_offset); track->arrays[i + 0].esize = idx_value >> 8; track->arrays[i + 0].robj = reloc->robj; @@ -1337,7 +1337,7 @@ int r100_packet3_load_vbpntr(struct radeon_cs_parser *p, radeon_cs_dump_packet(p, pkt); return r; } - ib[idx+2] = radeon_get_ib_value(p, idx + 2) + ((u32)reloc->lobj.gpu_offset); + ib[idx+2] = radeon_get_ib_value(p, idx + 2) + ((u32)reloc->gpu_offset); track->arrays[i + 1].robj = reloc->robj; track->arrays[i + 1].esize = idx_value >> 24; track->arrays[i + 1].esize &= 0x7F; @@ -1351,7 +1351,7 @@ int r100_packet3_load_vbpntr(struct radeon_cs_parser *p, return r; } idx_value = radeon_get_ib_value(p, idx); - ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset); + ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->gpu_offset); track->arrays[i + 0].robj = reloc->robj; track->arrays[i + 0].esize = idx_value >> 8; track->arrays[i + 0].esize &= 0x7F; @@ -1594,7 +1594,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p, track->zb.robj = reloc->robj; track->zb.offset = idx_value; track->zb_dirty = true; - ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); + ib[idx] = idx_value + ((u32)reloc->gpu_offset); break; case RADEON_RB3D_COLOROFFSET: r = radeon_cs_packet_next_reloc(p, &reloc, 0); @@ -1607,7 +1607,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p, track->cb[0].robj = reloc->robj; track->cb[0].offset = idx_value; track->cb_dirty = true; - ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); + ib[idx] = idx_value + ((u32)reloc->gpu_offset); break; case RADEON_PP_TXOFFSET_0: case RADEON_PP_TXOFFSET_1: @@ -1621,16 +1621,16 @@ static int r100_packet0_check(struct radeon_cs_parser *p, return r; } if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) + if (reloc->tiling_flags & RADEON_TILING_MACRO) tile_flags |= RADEON_TXO_MACRO_TILE; - if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) + if (reloc->tiling_flags & RADEON_TILING_MICRO) tile_flags |= RADEON_TXO_MICRO_TILE_X2; tmp = idx_value & ~(0x7 << 2); tmp |= tile_flags; - ib[idx] = tmp + ((u32)reloc->lobj.gpu_offset); + ib[idx] = tmp + ((u32)reloc->gpu_offset); } else - ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); + ib[idx] = idx_value + ((u32)reloc->gpu_offset); track->textures[i].robj = reloc->robj; track->tex_dirty = true; break; @@ -1648,7 +1648,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p, return r; } track->textures[0].cube_info[i].offset = idx_value; - ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); + ib[idx] = idx_value + ((u32)reloc->gpu_offset); track->textures[0].cube_info[i].robj = reloc->robj; track->tex_dirty = true; break; @@ -1666,7 +1666,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p, return r; } track->textures[1].cube_info[i].offset = idx_value; - ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); + ib[idx] = idx_value + ((u32)reloc->gpu_offset); track->textures[1].cube_info[i].robj = reloc->robj; track->tex_dirty = true; break; @@ -1684,7 +1684,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p, return r; } track->textures[2].cube_info[i].offset = idx_value; - ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); + ib[idx] = idx_value + ((u32)reloc->gpu_offset); track->textures[2].cube_info[i].robj = reloc->robj; track->tex_dirty = true; break; @@ -1702,9 +1702,9 @@ static int r100_packet0_check(struct radeon_cs_parser *p, return r; } if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) + if (reloc->tiling_flags & RADEON_TILING_MACRO) tile_flags |= RADEON_COLOR_TILE_ENABLE; - if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) + if (reloc->tiling_flags & RADEON_TILING_MICRO) tile_flags |= RADEON_COLOR_MICROTILE_ENABLE; tmp = idx_value & ~(0x7 << 16); @@ -1772,7 +1772,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p, radeon_cs_dump_packet(p, pkt); return r; } - ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); + ib[idx] = idx_value + ((u32)reloc->gpu_offset); break; case RADEON_PP_CNTL: { @@ -1932,7 +1932,7 @@ static int r100_packet3_check(struct radeon_cs_parser *p, radeon_cs_dump_packet(p, pkt); return r; } - ib[idx+1] = radeon_get_ib_value(p, idx+1) + ((u32)reloc->lobj.gpu_offset); + ib[idx+1] = radeon_get_ib_value(p, idx+1) + ((u32)reloc->gpu_offset); r = r100_cs_track_check_pkt3_indx_buffer(p, pkt, reloc->robj); if (r) { return r; @@ -1946,7 +1946,7 @@ static int r100_packet3_check(struct radeon_cs_parser *p, radeon_cs_dump_packet(p, pkt); return r; } - ib[idx] = radeon_get_ib_value(p, idx) + ((u32)reloc->lobj.gpu_offset); + ib[idx] = radeon_get_ib_value(p, idx) + ((u32)reloc->gpu_offset); track->num_arrays = 1; track->vtx_size = r100_get_vtx_size(radeon_get_ib_value(p, idx + 2)); diff --git a/drivers/gpu/drm/radeon/r200.c b/drivers/gpu/drm/radeon/r200.c index b3807edb1936..58f0473aa73f 100644 --- a/drivers/gpu/drm/radeon/r200.c +++ b/drivers/gpu/drm/radeon/r200.c @@ -185,7 +185,7 @@ int r200_packet0_check(struct radeon_cs_parser *p, track->zb.robj = reloc->robj; track->zb.offset = idx_value; track->zb_dirty = true; - ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); + ib[idx] = idx_value + ((u32)reloc->gpu_offset); break; case RADEON_RB3D_COLOROFFSET: r = radeon_cs_packet_next_reloc(p, &reloc, 0); @@ -198,7 +198,7 @@ int r200_packet0_check(struct radeon_cs_parser *p, track->cb[0].robj = reloc->robj; track->cb[0].offset = idx_value; track->cb_dirty = true; - ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); + ib[idx] = idx_value + ((u32)reloc->gpu_offset); break; case R200_PP_TXOFFSET_0: case R200_PP_TXOFFSET_1: @@ -215,16 +215,16 @@ int r200_packet0_check(struct radeon_cs_parser *p, return r; } if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) + if (reloc->tiling_flags & RADEON_TILING_MACRO) tile_flags |= R200_TXO_MACRO_TILE; - if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) + if (reloc->tiling_flags & RADEON_TILING_MICRO) tile_flags |= R200_TXO_MICRO_TILE; tmp = idx_value & ~(0x7 << 2); tmp |= tile_flags; - ib[idx] = tmp + ((u32)reloc->lobj.gpu_offset); + ib[idx] = tmp + ((u32)reloc->gpu_offset); } else - ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); + ib[idx] = idx_value + ((u32)reloc->gpu_offset); track->textures[i].robj = reloc->robj; track->tex_dirty = true; break; @@ -268,7 +268,7 @@ int r200_packet0_check(struct radeon_cs_parser *p, return r; } track->textures[i].cube_info[face - 1].offset = idx_value; - ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); + ib[idx] = idx_value + ((u32)reloc->gpu_offset); track->textures[i].cube_info[face - 1].robj = reloc->robj; track->tex_dirty = true; break; @@ -287,9 +287,9 @@ int r200_packet0_check(struct radeon_cs_parser *p, } if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) + if (reloc->tiling_flags & RADEON_TILING_MACRO) tile_flags |= RADEON_COLOR_TILE_ENABLE; - if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) + if (reloc->tiling_flags & RADEON_TILING_MICRO) tile_flags |= RADEON_COLOR_MICROTILE_ENABLE; tmp = idx_value & ~(0x7 << 16); @@ -362,7 +362,7 @@ int r200_packet0_check(struct radeon_cs_parser *p, radeon_cs_dump_packet(p, pkt); return r; } - ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); + ib[idx] = idx_value + ((u32)reloc->gpu_offset); break; case RADEON_PP_CNTL: { diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c index 7c63ef840e86..41cdf236ee9a 100644 --- a/drivers/gpu/drm/radeon/r300.c +++ b/drivers/gpu/drm/radeon/r300.c @@ -640,7 +640,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p, track->cb[i].robj = reloc->robj; track->cb[i].offset = idx_value; track->cb_dirty = true; - ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); + ib[idx] = idx_value + ((u32)reloc->gpu_offset); break; case R300_ZB_DEPTHOFFSET: r = radeon_cs_packet_next_reloc(p, &reloc, 0); @@ -653,7 +653,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p, track->zb.robj = reloc->robj; track->zb.offset = idx_value; track->zb_dirty = true; - ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); + ib[idx] = idx_value + ((u32)reloc->gpu_offset); break; case R300_TX_OFFSET_0: case R300_TX_OFFSET_0+4: @@ -682,16 +682,16 @@ static int r300_packet0_check(struct radeon_cs_parser *p, if (p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS) { ib[idx] = (idx_value & 31) | /* keep the 1st 5 bits */ - ((idx_value & ~31) + (u32)reloc->lobj.gpu_offset); + ((idx_value & ~31) + (u32)reloc->gpu_offset); } else { - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) + if (reloc->tiling_flags & RADEON_TILING_MACRO) tile_flags |= R300_TXO_MACRO_TILE; - if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) + if (reloc->tiling_flags & RADEON_TILING_MICRO) tile_flags |= R300_TXO_MICRO_TILE; - else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO_SQUARE) + else if (reloc->tiling_flags & RADEON_TILING_MICRO_SQUARE) tile_flags |= R300_TXO_MICRO_TILE_SQUARE; - tmp = idx_value + ((u32)reloc->lobj.gpu_offset); + tmp = idx_value + ((u32)reloc->gpu_offset); tmp |= tile_flags; ib[idx] = tmp; } @@ -753,11 +753,11 @@ static int r300_packet0_check(struct radeon_cs_parser *p, return r; } - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) + if (reloc->tiling_flags & RADEON_TILING_MACRO) tile_flags |= R300_COLOR_TILE_ENABLE; - if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) + if (reloc->tiling_flags & RADEON_TILING_MICRO) tile_flags |= R300_COLOR_MICROTILE_ENABLE; - else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO_SQUARE) + else if (reloc->tiling_flags & RADEON_TILING_MICRO_SQUARE) tile_flags |= R300_COLOR_MICROTILE_SQUARE_ENABLE; tmp = idx_value & ~(0x7 << 16); @@ -838,11 +838,11 @@ static int r300_packet0_check(struct radeon_cs_parser *p, return r; } - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) + if (reloc->tiling_flags & RADEON_TILING_MACRO) tile_flags |= R300_DEPTHMACROTILE_ENABLE; - if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) + if (reloc->tiling_flags & RADEON_TILING_MICRO) tile_flags |= R300_DEPTHMICROTILE_TILED; - else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO_SQUARE) + else if (reloc->tiling_flags & RADEON_TILING_MICRO_SQUARE) tile_flags |= R300_DEPTHMICROTILE_TILED_SQUARE; tmp = idx_value & ~(0x7 << 16); @@ -1052,7 +1052,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p, radeon_cs_dump_packet(p, pkt); return r; } - ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); + ib[idx] = idx_value + ((u32)reloc->gpu_offset); break; case 0x4e0c: /* RB3D_COLOR_CHANNEL_MASK */ @@ -1097,7 +1097,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p, track->aa.robj = reloc->robj; track->aa.offset = idx_value; track->aa_dirty = true; - ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset); + ib[idx] = idx_value + ((u32)reloc->gpu_offset); break; case R300_RB3D_AARESOLVE_PITCH: track->aa.pitch = idx_value & 0x3FFE; @@ -1162,7 +1162,7 @@ static int r300_packet3_check(struct radeon_cs_parser *p, radeon_cs_dump_packet(p, pkt); return r; } - ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset); + ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->gpu_offset); r = r100_cs_track_check_pkt3_indx_buffer(p, pkt, reloc->robj); if (r) { return r; diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c index 2812c7d1ae6f..12511bb5fd6f 100644 --- a/drivers/gpu/drm/radeon/r600_cs.c +++ b/drivers/gpu/drm/radeon/r600_cs.c @@ -1022,7 +1022,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) "0x%04X\n", reg); return -EINVAL; } - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); break; case SQ_CONFIG: track->sq_config = radeon_get_ib_value(p, idx); @@ -1043,7 +1043,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) track->db_depth_info = radeon_get_ib_value(p, idx); ib[idx] &= C_028010_ARRAY_MODE; track->db_depth_info &= C_028010_ARRAY_MODE; - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) { + if (reloc->tiling_flags & RADEON_TILING_MACRO) { ib[idx] |= S_028010_ARRAY_MODE(V_028010_ARRAY_2D_TILED_THIN1); track->db_depth_info |= S_028010_ARRAY_MODE(V_028010_ARRAY_2D_TILED_THIN1); } else { @@ -1084,9 +1084,9 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) } tmp = (reg - VGT_STRMOUT_BUFFER_BASE_0) / 16; track->vgt_strmout_bo_offset[tmp] = radeon_get_ib_value(p, idx) << 8; - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); track->vgt_strmout_bo[tmp] = reloc->robj; - track->vgt_strmout_bo_mc[tmp] = reloc->lobj.gpu_offset; + track->vgt_strmout_bo_mc[tmp] = reloc->gpu_offset; track->streamout_dirty = true; break; case VGT_STRMOUT_BUFFER_SIZE_0: @@ -1105,7 +1105,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) "0x%04X\n", reg); return -EINVAL; } - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); break; case R_028238_CB_TARGET_MASK: track->cb_target_mask = radeon_get_ib_value(p, idx); @@ -1142,10 +1142,10 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) } tmp = (reg - R_0280A0_CB_COLOR0_INFO) / 4; track->cb_color_info[tmp] = radeon_get_ib_value(p, idx); - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) { + if (reloc->tiling_flags & RADEON_TILING_MACRO) { ib[idx] |= S_0280A0_ARRAY_MODE(V_0280A0_ARRAY_2D_TILED_THIN1); track->cb_color_info[tmp] |= S_0280A0_ARRAY_MODE(V_0280A0_ARRAY_2D_TILED_THIN1); - } else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) { + } else if (reloc->tiling_flags & RADEON_TILING_MICRO) { ib[idx] |= S_0280A0_ARRAY_MODE(V_0280A0_ARRAY_1D_TILED_THIN1); track->cb_color_info[tmp] |= S_0280A0_ARRAY_MODE(V_0280A0_ARRAY_1D_TILED_THIN1); } @@ -1214,7 +1214,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) } track->cb_color_frag_bo[tmp] = reloc->robj; track->cb_color_frag_offset[tmp] = (u64)ib[idx] << 8; - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); } if (G_0280A0_TILE_MODE(track->cb_color_info[tmp])) { track->cb_dirty = true; @@ -1245,7 +1245,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) } track->cb_color_tile_bo[tmp] = reloc->robj; track->cb_color_tile_offset[tmp] = (u64)ib[idx] << 8; - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); } if (G_0280A0_TILE_MODE(track->cb_color_info[tmp])) { track->cb_dirty = true; @@ -1281,10 +1281,10 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) } tmp = (reg - CB_COLOR0_BASE) / 4; track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx) << 8; - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); track->cb_color_base_last[tmp] = ib[idx]; track->cb_color_bo[tmp] = reloc->robj; - track->cb_color_bo_mc[tmp] = reloc->lobj.gpu_offset; + track->cb_color_bo_mc[tmp] = reloc->gpu_offset; track->cb_dirty = true; break; case DB_DEPTH_BASE: @@ -1295,9 +1295,9 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) return -EINVAL; } track->db_offset = radeon_get_ib_value(p, idx) << 8; - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); track->db_bo = reloc->robj; - track->db_bo_mc = reloc->lobj.gpu_offset; + track->db_bo_mc = reloc->gpu_offset; track->db_dirty = true; break; case DB_HTILE_DATA_BASE: @@ -1308,7 +1308,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) return -EINVAL; } track->htile_offset = radeon_get_ib_value(p, idx) << 8; - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); track->htile_bo = reloc->robj; track->db_dirty = true; break; @@ -1377,7 +1377,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) "0x%04X\n", reg); return -EINVAL; } - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); break; case SX_MEMORY_EXPORT_BASE: r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm); @@ -1386,7 +1386,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) "0x%04X\n", reg); return -EINVAL; } - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); break; case SX_MISC: track->sx_misc_kill_all_prims = (radeon_get_ib_value(p, idx) & 0x1) != 0; @@ -1672,7 +1672,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, return -EINVAL; } - offset = reloc->lobj.gpu_offset + + offset = reloc->gpu_offset + (idx_value & 0xfffffff0) + ((u64)(tmp & 0xff) << 32); @@ -1713,7 +1713,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, return -EINVAL; } - offset = reloc->lobj.gpu_offset + + offset = reloc->gpu_offset + idx_value + ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32); @@ -1765,7 +1765,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, return -EINVAL; } - offset = reloc->lobj.gpu_offset + + offset = reloc->gpu_offset + (radeon_get_ib_value(p, idx+1) & 0xfffffff0) + ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); @@ -1805,7 +1805,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, tmp = radeon_get_ib_value(p, idx) + ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32); - offset = reloc->lobj.gpu_offset + tmp; + offset = reloc->gpu_offset + tmp; if ((tmp + size) > radeon_bo_size(reloc->robj)) { dev_warn(p->dev, "CP DMA src buffer too small (%llu %lu)\n", @@ -1835,7 +1835,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, tmp = radeon_get_ib_value(p, idx+2) + ((u64)(radeon_get_ib_value(p, idx+3) & 0xff) << 32); - offset = reloc->lobj.gpu_offset + tmp; + offset = reloc->gpu_offset + tmp; if ((tmp + size) > radeon_bo_size(reloc->robj)) { dev_warn(p->dev, "CP DMA dst buffer too small (%llu %lu)\n", @@ -1861,7 +1861,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, DRM_ERROR("bad SURFACE_SYNC\n"); return -EINVAL; } - ib[idx+2] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx+2] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); } break; case PACKET3_EVENT_WRITE: @@ -1877,7 +1877,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, DRM_ERROR("bad EVENT_WRITE\n"); return -EINVAL; } - offset = reloc->lobj.gpu_offset + + offset = reloc->gpu_offset + (radeon_get_ib_value(p, idx+1) & 0xfffffff8) + ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); @@ -1899,7 +1899,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, return -EINVAL; } - offset = reloc->lobj.gpu_offset + + offset = reloc->gpu_offset + (radeon_get_ib_value(p, idx+1) & 0xfffffffc) + ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); @@ -1964,11 +1964,11 @@ static int r600_packet3_check(struct radeon_cs_parser *p, DRM_ERROR("bad SET_RESOURCE\n"); return -EINVAL; } - base_offset = (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + base_offset = (u32)((reloc->gpu_offset >> 8) & 0xffffffff); if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) + if (reloc->tiling_flags & RADEON_TILING_MACRO) ib[idx+1+(i*7)+0] |= S_038000_TILE_MODE(V_038000_ARRAY_2D_TILED_THIN1); - else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) + else if (reloc->tiling_flags & RADEON_TILING_MICRO) ib[idx+1+(i*7)+0] |= S_038000_TILE_MODE(V_038000_ARRAY_1D_TILED_THIN1); } texture = reloc->robj; @@ -1978,13 +1978,13 @@ static int r600_packet3_check(struct radeon_cs_parser *p, DRM_ERROR("bad SET_RESOURCE\n"); return -EINVAL; } - mip_offset = (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + mip_offset = (u32)((reloc->gpu_offset >> 8) & 0xffffffff); mipmap = reloc->robj; r = r600_check_texture_resource(p, idx+(i*7)+1, texture, mipmap, base_offset + radeon_get_ib_value(p, idx+1+(i*7)+2), mip_offset + radeon_get_ib_value(p, idx+1+(i*7)+3), - reloc->lobj.tiling_flags); + reloc->tiling_flags); if (r) return r; ib[idx+1+(i*7)+2] += base_offset; @@ -2008,7 +2008,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, ib[idx+1+(i*7)+1] = radeon_bo_size(reloc->robj) - offset; } - offset64 = reloc->lobj.gpu_offset + offset; + offset64 = reloc->gpu_offset + offset; ib[idx+1+(i*8)+0] = offset64; ib[idx+1+(i*8)+2] = (ib[idx+1+(i*8)+2] & 0xffffff00) | (upper_32_bits(offset64) & 0xff); @@ -2118,7 +2118,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, offset + 4, radeon_bo_size(reloc->robj)); return -EINVAL; } - ib[idx+1] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx+1] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); } break; case PACKET3_SURFACE_BASE_UPDATE: @@ -2151,7 +2151,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, offset + 4, radeon_bo_size(reloc->robj)); return -EINVAL; } - offset += reloc->lobj.gpu_offset; + offset += reloc->gpu_offset; ib[idx+1] = offset; ib[idx+2] = upper_32_bits(offset) & 0xff; } @@ -2170,7 +2170,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, offset + 4, radeon_bo_size(reloc->robj)); return -EINVAL; } - offset += reloc->lobj.gpu_offset; + offset += reloc->gpu_offset; ib[idx+3] = offset; ib[idx+4] = upper_32_bits(offset) & 0xff; } @@ -2199,7 +2199,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, offset + 8, radeon_bo_size(reloc->robj)); return -EINVAL; } - offset += reloc->lobj.gpu_offset; + offset += reloc->gpu_offset; ib[idx+0] = offset; ib[idx+1] = upper_32_bits(offset) & 0xff; break; @@ -2224,7 +2224,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, offset + 4, radeon_bo_size(reloc->robj)); return -EINVAL; } - offset += reloc->lobj.gpu_offset; + offset += reloc->gpu_offset; ib[idx+1] = offset; ib[idx+2] = upper_32_bits(offset) & 0xff; } else { @@ -2248,7 +2248,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p, offset + 4, radeon_bo_size(reloc->robj)); return -EINVAL; } - offset += reloc->lobj.gpu_offset; + offset += reloc->gpu_offset; ib[idx+3] = offset; ib[idx+4] = upper_32_bits(offset) & 0xff; } else { @@ -2505,14 +2505,14 @@ int r600_dma_cs_parse(struct radeon_cs_parser *p) dst_offset = radeon_get_ib_value(p, idx+1); dst_offset <<= 8; - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); + ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8); p->idx += count + 5; } else { dst_offset = radeon_get_ib_value(p, idx+1); dst_offset |= ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32; - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+2] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; + ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); + ib[idx+2] += upper_32_bits(dst_reloc->gpu_offset) & 0xff; p->idx += count + 3; } if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { @@ -2539,22 +2539,22 @@ int r600_dma_cs_parse(struct radeon_cs_parser *p) /* tiled src, linear dst */ src_offset = radeon_get_ib_value(p, idx+1); src_offset <<= 8; - ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8); + ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8); dst_offset = radeon_get_ib_value(p, idx+5); dst_offset |= ((u64)(radeon_get_ib_value(p, idx+6) & 0xff)) << 32; - ib[idx+5] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+6] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; + ib[idx+5] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); + ib[idx+6] += upper_32_bits(dst_reloc->gpu_offset) & 0xff; } else { /* linear src, tiled dst */ src_offset = radeon_get_ib_value(p, idx+5); src_offset |= ((u64)(radeon_get_ib_value(p, idx+6) & 0xff)) << 32; - ib[idx+5] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+6] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; + ib[idx+5] += (u32)(src_reloc->gpu_offset & 0xfffffffc); + ib[idx+6] += upper_32_bits(src_reloc->gpu_offset) & 0xff; dst_offset = radeon_get_ib_value(p, idx+1); dst_offset <<= 8; - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); + ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8); } p->idx += 7; } else { @@ -2564,10 +2564,10 @@ int r600_dma_cs_parse(struct radeon_cs_parser *p) dst_offset = radeon_get_ib_value(p, idx+1); dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32; - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; - ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; + ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); + ib[idx+2] += (u32)(src_reloc->gpu_offset & 0xfffffffc); + ib[idx+3] += upper_32_bits(dst_reloc->gpu_offset) & 0xff; + ib[idx+4] += upper_32_bits(src_reloc->gpu_offset) & 0xff; p->idx += 5; } else { src_offset = radeon_get_ib_value(p, idx+2); @@ -2575,10 +2575,10 @@ int r600_dma_cs_parse(struct radeon_cs_parser *p) dst_offset = radeon_get_ib_value(p, idx+1); dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff0000)) << 16; - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+3] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; - ib[idx+3] += (upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff) << 16; + ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); + ib[idx+2] += (u32)(src_reloc->gpu_offset & 0xfffffffc); + ib[idx+3] += upper_32_bits(src_reloc->gpu_offset) & 0xff; + ib[idx+3] += (upper_32_bits(dst_reloc->gpu_offset) & 0xff) << 16; p->idx += 4; } } @@ -2610,8 +2610,8 @@ int r600_dma_cs_parse(struct radeon_cs_parser *p) dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); return -EINVAL; } - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+3] += (upper_32_bits(dst_reloc->lobj.gpu_offset) << 16) & 0x00ff0000; + ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); + ib[idx+3] += (upper_32_bits(dst_reloc->gpu_offset) << 16) & 0x00ff0000; p->idx += 4; break; case DMA_PACKET_NOP: diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index cd6a48099547..111deab2492a 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -479,15 +479,6 @@ struct radeon_bo { }; #define gem_to_radeon_bo(gobj) container_of((gobj), struct radeon_bo, gem_base) -struct radeon_bo_list { - struct ttm_validate_buffer tv; - struct radeon_bo *bo; - uint64_t gpu_offset; - unsigned domain; - unsigned alt_domain; - u32 tiling_flags; -}; - int radeon_gem_debugfs_init(struct radeon_device *rdev); /* sub-allocation manager, it has to be protected by another lock. @@ -987,9 +978,12 @@ void cayman_dma_fini(struct radeon_device *rdev); struct radeon_cs_reloc { struct drm_gem_object *gobj; struct radeon_bo *robj; - struct radeon_bo_list lobj; + struct ttm_validate_buffer tv; + uint64_t gpu_offset; + unsigned domain; + unsigned alt_domain; + uint32_t tiling_flags; uint32_t handle; - uint32_t flags; }; struct radeon_cs_chunk { @@ -1013,7 +1007,7 @@ struct radeon_cs_parser { unsigned nrelocs; struct radeon_cs_reloc *relocs; struct radeon_cs_reloc **relocs_ptr; - struct radeon_bo_list *vm_bos; + struct radeon_cs_reloc *vm_bos; struct list_head validated; unsigned dma_reloc_idx; /* indices of various chunks */ @@ -2803,9 +2797,9 @@ int radeon_vm_manager_init(struct radeon_device *rdev); void radeon_vm_manager_fini(struct radeon_device *rdev); int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm); void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm); -struct radeon_bo_list *radeon_vm_get_bos(struct radeon_device *rdev, - struct radeon_vm *vm, - struct list_head *head); +struct radeon_cs_reloc *radeon_vm_get_bos(struct radeon_device *rdev, + struct radeon_vm *vm, + struct list_head *head); struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, struct radeon_vm *vm, int ring); void radeon_vm_flush(struct radeon_device *rdev, diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 0570e7675fd3..2b6e0ebcc13a 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -125,7 +125,6 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) } p->relocs_ptr[i] = &p->relocs[i]; p->relocs[i].robj = gem_to_radeon_bo(p->relocs[i].gobj); - p->relocs[i].lobj.bo = p->relocs[i].robj; /* The userspace buffer priorities are from 0 to 15. A higher * number means the buffer is more important. @@ -141,10 +140,10 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) if (p->ring == R600_RING_TYPE_UVD_INDEX && (i == 0 || drm_pci_device_is_agp(p->rdev->ddev))) { /* TODO: is this still needed for NI+ ? */ - p->relocs[i].lobj.domain = + p->relocs[i].domain = RADEON_GEM_DOMAIN_VRAM; - p->relocs[i].lobj.alt_domain = + p->relocs[i].alt_domain = RADEON_GEM_DOMAIN_VRAM; /* prioritize this over any other relocation */ @@ -153,16 +152,16 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) uint32_t domain = r->write_domain ? r->write_domain : r->read_domains; - p->relocs[i].lobj.domain = domain; + p->relocs[i].domain = domain; if (domain == RADEON_GEM_DOMAIN_VRAM) domain |= RADEON_GEM_DOMAIN_GTT; - p->relocs[i].lobj.alt_domain = domain; + p->relocs[i].alt_domain = domain; } - p->relocs[i].lobj.tv.bo = &p->relocs[i].robj->tbo; + p->relocs[i].tv.bo = &p->relocs[i].robj->tbo; p->relocs[i].handle = r->handle; - radeon_cs_buckets_add(&buckets, &p->relocs[i].lobj.tv.head, + radeon_cs_buckets_add(&buckets, &p->relocs[i].tv.head, priority); } @@ -356,11 +355,11 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) static int cmp_size_smaller_first(void *priv, struct list_head *a, struct list_head *b) { - struct radeon_bo_list *la = list_entry(a, struct radeon_bo_list, tv.head); - struct radeon_bo_list *lb = list_entry(b, struct radeon_bo_list, tv.head); + struct radeon_cs_reloc *la = list_entry(a, struct radeon_cs_reloc, tv.head); + struct radeon_cs_reloc *lb = list_entry(b, struct radeon_cs_reloc, tv.head); /* Sort A before B if A is smaller. */ - return (int)la->bo->tbo.num_pages - (int)lb->bo->tbo.num_pages; + return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages; } /** @@ -786,9 +785,9 @@ int radeon_cs_packet_next_reloc(struct radeon_cs_parser *p, /* FIXME: we assume reloc size is 4 dwords */ if (nomm) { *cs_reloc = p->relocs; - (*cs_reloc)->lobj.gpu_offset = + (*cs_reloc)->gpu_offset = (u64)relocs_chunk->kdata[idx + 3] << 32; - (*cs_reloc)->lobj.gpu_offset |= relocs_chunk->kdata[idx + 0]; + (*cs_reloc)->gpu_offset |= relocs_chunk->kdata[idx + 0]; } else *cs_reloc = p->relocs_ptr[(idx / 4)]; return 0; diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index ed03f2d15853..ca79431b2c1c 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -422,7 +422,7 @@ int radeon_bo_list_validate(struct radeon_device *rdev, struct ww_acquire_ctx *ticket, struct list_head *head, int ring) { - struct radeon_bo_list *lobj; + struct radeon_cs_reloc *lobj; struct radeon_bo *bo; int r; u64 bytes_moved = 0, initial_bytes_moved; @@ -434,7 +434,7 @@ int radeon_bo_list_validate(struct radeon_device *rdev, } list_for_each_entry(lobj, head, tv.head) { - bo = lobj->bo; + bo = lobj->robj; if (!bo->pin_count) { u32 domain = lobj->domain; u32 current_domain = diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c index ceb7b289b745..6a2e3ff68374 100644 --- a/drivers/gpu/drm/radeon/radeon_uvd.c +++ b/drivers/gpu/drm/radeon/radeon_uvd.c @@ -453,7 +453,7 @@ static int radeon_uvd_cs_reloc(struct radeon_cs_parser *p, } reloc = p->relocs_ptr[(idx / 4)]; - start = reloc->lobj.gpu_offset; + start = reloc->gpu_offset; end = start + radeon_bo_size(reloc->robj); start += offset; diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c index 39ec7d8f33ab..76e9904bc537 100644 --- a/drivers/gpu/drm/radeon/radeon_vce.c +++ b/drivers/gpu/drm/radeon/radeon_vce.c @@ -461,7 +461,7 @@ int radeon_vce_cs_reloc(struct radeon_cs_parser *p, int lo, int hi) return -EINVAL; } - offset += p->relocs_ptr[(idx / 4)]->lobj.gpu_offset; + offset += p->relocs_ptr[(idx / 4)]->gpu_offset; p->ib.ptr[lo] = offset & 0xFFFFFFFF; p->ib.ptr[hi] = offset >> 32; diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index 81d91b513ce1..2aae6ce49d32 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -125,33 +125,39 @@ void radeon_vm_manager_fini(struct radeon_device *rdev) * Add the page directory to the list of BOs to * validate for command submission (cayman+). */ -struct radeon_bo_list *radeon_vm_get_bos(struct radeon_device *rdev, - struct radeon_vm *vm, - struct list_head *head) +struct radeon_cs_reloc *radeon_vm_get_bos(struct radeon_device *rdev, + struct radeon_vm *vm, + struct list_head *head) { - struct radeon_bo_list *list; + struct radeon_cs_reloc *list; unsigned i, idx, size; - size = (radeon_vm_num_pdes(rdev) + 1) * sizeof(struct radeon_bo_list); + size = (radeon_vm_num_pdes(rdev) + 1) * sizeof(struct radeon_cs_reloc); list = kmalloc(size, GFP_KERNEL); if (!list) return NULL; /* add the vm page table to the list */ - list[0].bo = vm->page_directory; + list[0].gobj = NULL; + list[0].robj = vm->page_directory; list[0].domain = RADEON_GEM_DOMAIN_VRAM; list[0].alt_domain = RADEON_GEM_DOMAIN_VRAM; list[0].tv.bo = &vm->page_directory->tbo; + list[0].tiling_flags = 0; + list[0].handle = 0; list_add(&list[0].tv.head, head); for (i = 0, idx = 1; i <= vm->max_pde_used; i++) { if (!vm->page_tables[i].bo) continue; - list[idx].bo = vm->page_tables[i].bo; + list[idx].gobj = NULL; + list[idx].robj = vm->page_tables[i].bo; list[idx].domain = RADEON_GEM_DOMAIN_VRAM; list[idx].alt_domain = RADEON_GEM_DOMAIN_VRAM; - list[idx].tv.bo = &list[idx].bo->tbo; + list[idx].tv.bo = &list[idx].robj->tbo; + list[idx].tiling_flags = 0; + list[idx].handle = 0; list_add(&list[idx++].tv.head, head); } -- cgit From 6796cb16c088905bf3af40548fda68c09e6f6ee5 Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Fri, 3 Jan 2014 14:24:19 +0100 Subject: drm: use anon-inode instead of relying on cdevs DRM drivers share a common address_space across all character-devices of a single DRM device. This allows simple buffer eviction and mapping-control. However, DRM core currently waits for the first ->open() on any char-dev to mark the underlying inode as backing inode of the device. This delayed initialization causes ugly conditions all over the place: if (dev->dev_mapping) do_sth(); To avoid delayed initialization and to stop reusing the inode of the char-dev, we allocate an anonymous inode for each DRM device and reset filp->f_mapping to it on ->open(). Signed-off-by: David Herrmann --- drivers/gpu/drm/radeon/radeon_object.c | 2 +- drivers/gpu/drm/radeon/radeon_ttm.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index ca79431b2c1c..6a7f3c6ffbbe 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -145,7 +145,7 @@ int radeon_bo_create(struct radeon_device *rdev, size = ALIGN(size, PAGE_SIZE); - rdev->mman.bdev.dev_mapping = rdev->ddev->dev_mapping; + rdev->mman.bdev.dev_mapping = rdev->ddev->anon_inode->i_mapping; if (kernel) { type = ttm_bo_type_kernel; } else if (sg) { diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 60dfce889ecf..4663fbcfda28 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -745,7 +745,7 @@ int radeon_ttm_init(struct radeon_device *rdev) } DRM_INFO("radeon: %uM of GTT memory ready.\n", (unsigned)(rdev->mc.gtt_size / (1024 * 1024))); - rdev->mman.bdev.dev_mapping = rdev->ddev->dev_mapping; + rdev->mman.bdev.dev_mapping = rdev->ddev->anon_inode->i_mapping; r = radeon_ttm_debugfs_init(rdev); if (r) { -- cgit From 44d847b7439bdea0b6c5640446427daa3ebcc7fa Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Tue, 13 Aug 2013 19:10:30 +0200 Subject: drm: init TTM dev_mapping in ttm_bo_device_init() With dev->anon_inode we have a global address_space ready for operation right from the beginning. Therefore, there is no need to do a delayed setup with TTM. Instead, set dev_mapping during initialization in ttm_bo_device_init() and remove any "if (dev_mapping)" conditions. Cc: Dave Airlie Cc: Ben Skeggs Cc: Maarten Lankhorst Cc: Alex Deucher Cc: Thomas Hellstrom Signed-off-by: David Herrmann --- drivers/gpu/drm/radeon/radeon_object.c | 1 - drivers/gpu/drm/radeon/radeon_ttm.c | 5 +++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 6a7f3c6ffbbe..1375ff85b08a 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -145,7 +145,6 @@ int radeon_bo_create(struct radeon_device *rdev, size = ALIGN(size, PAGE_SIZE); - rdev->mman.bdev.dev_mapping = rdev->ddev->anon_inode->i_mapping; if (kernel) { type = ttm_bo_type_kernel; } else if (sg) { diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 4663fbcfda28..2db486666d91 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -707,7 +707,9 @@ int radeon_ttm_init(struct radeon_device *rdev) /* No others user of address space so set it to 0 */ r = ttm_bo_device_init(&rdev->mman.bdev, rdev->mman.bo_global_ref.ref.object, - &radeon_bo_driver, DRM_FILE_PAGE_OFFSET, + &radeon_bo_driver, + rdev->ddev->anon_inode->i_mapping, + DRM_FILE_PAGE_OFFSET, rdev->need_dma32); if (r) { DRM_ERROR("failed initializing buffer object driver(%d).\n", r); @@ -745,7 +747,6 @@ int radeon_ttm_init(struct radeon_device *rdev) } DRM_INFO("radeon: %uM of GTT memory ready.\n", (unsigned)(rdev->mc.gtt_size / (1024 * 1024))); - rdev->mman.bdev.dev_mapping = rdev->ddev->anon_inode->i_mapping; r = radeon_ttm_debugfs_init(rdev); if (r) { -- cgit From e84c20aff1ce7493bce26b75f1db363bb3f05979 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 20 Mar 2014 11:08:11 +1000 Subject: drm/radeon/kms: merge conflicted badly Not sure why git didn't flag this, but the result of automerge from 3.14-rc7 screwed up the radeon init procedure. Reported-by: Fireburn on #radeon Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_kms.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index 6f1dfac17507..3e49342a20e6 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -575,10 +575,6 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) if (r) return r; - r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false); - if (r) - return r; - /* map the ib pool buffer read only into * virtual address space */ bo_va = radeon_vm_bo_add(rdev, &fpriv->vm, -- cgit From f3381dfc9745bcd8b6be676ec4f68c52e71d24f1 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 17 Mar 2014 23:48:14 -0400 Subject: drm/radeon/dp: use i2c_get_adapdata rather than casting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Minor code cleanup. Signed-off-by: Alex Deucher Acked-by: Christian König --- drivers/gpu/drm/radeon/atombios_dp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c index 4ad7643fce5f..c79ba8a886bd 100644 --- a/drivers/gpu/drm/radeon/atombios_dp.c +++ b/drivers/gpu/drm/radeon/atombios_dp.c @@ -236,7 +236,7 @@ int radeon_dp_i2c_aux_ch(struct i2c_adapter *adapter, int mode, u8 write_byte, u8 *read_byte) { struct i2c_algo_dp_aux_data *algo_data = adapter->algo_data; - struct radeon_i2c_chan *auxch = (struct radeon_i2c_chan *)adapter; + struct radeon_i2c_chan *auxch = i2c_get_adapdata(adapter); u16 address = algo_data->address; u8 msg[5]; u8 reply[2]; -- cgit From 2953da1589477c9aec85431339d28a8303d47945 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 17 Mar 2014 23:48:15 -0400 Subject: drm/radeon/dp: move sink power control to a separate function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will be used elsewhere. Signed-off-by: Alex Deucher Acked-by: Christian König --- drivers/gpu/drm/radeon/atombios_dp.c | 26 +++++++++++++++++++++----- drivers/gpu/drm/radeon/radeon_mode.h | 2 ++ 2 files changed, 23 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c index c79ba8a886bd..88d399c3f4fd 100644 --- a/drivers/gpu/drm/radeon/atombios_dp.c +++ b/drivers/gpu/drm/radeon/atombios_dp.c @@ -603,6 +603,26 @@ bool radeon_dp_needs_link_train(struct radeon_connector *radeon_connector) return true; } +void radeon_dp_set_rx_power_state(struct drm_connector *connector, + u8 power_state) +{ + struct radeon_connector *radeon_connector = to_radeon_connector(connector); + struct radeon_connector_atom_dig *dig_connector; + + if (!radeon_connector->con_priv) + return; + + dig_connector = radeon_connector->con_priv; + + /* power up/down the sink */ + if (dig_connector->dpcd[0] >= 0x11) { + radeon_write_dpcd_reg(radeon_connector, + DP_SET_POWER, power_state); + usleep_range(1000, 2000); + } +} + + struct radeon_dp_link_train_info { struct radeon_device *rdev; struct drm_encoder *encoder; @@ -673,11 +693,7 @@ static int radeon_dp_link_train_init(struct radeon_dp_link_train_info *dp_info) u8 tmp; /* power up the sink */ - if (dp_info->dpcd[0] >= 0x11) { - radeon_write_dpcd_reg(dp_info->radeon_connector, - DP_SET_POWER, DP_SET_POWER_D0); - usleep_range(1000, 2000); - } + radeon_dp_set_rx_power_state(dp_info->connector, DP_SET_POWER_D0); /* possibly enable downspread on the sink */ if (dp_info->dpcd[3] & 0x1) diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h index 402dbe32c234..e390f559e496 100644 --- a/drivers/gpu/drm/radeon/radeon_mode.h +++ b/drivers/gpu/drm/radeon/radeon_mode.h @@ -690,6 +690,8 @@ extern u8 radeon_dp_getsinktype(struct radeon_connector *radeon_connector); extern bool radeon_dp_getdpcd(struct radeon_connector *radeon_connector); extern int radeon_dp_get_panel_mode(struct drm_encoder *encoder, struct drm_connector *connector); +extern void radeon_dp_set_rx_power_state(struct drm_connector *connector, + u8 power_state); extern void atombios_dig_encoder_setup(struct drm_encoder *encoder, int action, int panel_mode); extern void radeon_atom_encoder_init(struct radeon_device *rdev); extern void radeon_atom_disp_eng_pll_init(struct radeon_device *rdev); -- cgit From 6f50e0758795e21919d0d45e3c9ff441f6139575 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 17 Mar 2014 23:48:16 -0400 Subject: drm/radeon/atom: rework encoder enable/disable sequence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This more closely matches what the vbios does and also adds a quirk for travis lvds displays and powers down the sink on DP displays which saves some power and may fix display issues in some cases. Signed-off-by: Alex Deucher Acked-by: Christian König --- drivers/gpu/drm/radeon/atombios_encoders.c | 82 +++++++++++++----------------- 1 file changed, 34 insertions(+), 48 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c index 607dc14d195e..e6eb5097597f 100644 --- a/drivers/gpu/drm/radeon/atombios_encoders.c +++ b/drivers/gpu/drm/radeon/atombios_encoders.c @@ -1633,10 +1633,16 @@ radeon_atom_encoder_dpms_dig(struct drm_encoder *encoder, int mode) struct drm_connector *connector = radeon_get_connector_for_encoder(encoder); struct radeon_connector *radeon_connector = NULL; struct radeon_connector_atom_dig *radeon_dig_connector = NULL; + bool travis_quirk = false; if (connector) { radeon_connector = to_radeon_connector(connector); radeon_dig_connector = radeon_connector->con_priv; + if ((radeon_connector_encoder_get_dp_bridge_encoder_id(connector) == + ENCODER_OBJECT_ID_TRAVIS) && + (radeon_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) && + !ASIC_IS_DCE5(rdev)) + travis_quirk = true; } switch (mode) { @@ -1657,17 +1663,13 @@ radeon_atom_encoder_dpms_dig(struct drm_encoder *encoder, int mode) atombios_external_encoder_setup(encoder, ext_encoder, EXTERNAL_ENCODER_ACTION_V3_ENCODER_SETUP); } - atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_ENABLE, 0, 0); } else if (ASIC_IS_DCE4(rdev)) { /* setup and enable the encoder */ atombios_dig_encoder_setup(encoder, ATOM_ENCODER_CMD_SETUP, 0); - /* enable the transmitter */ - atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_ENABLE, 0, 0); } else { /* setup and enable the encoder and transmitter */ atombios_dig_encoder_setup(encoder, ATOM_ENABLE, 0); atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_SETUP, 0, 0); - atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_ENABLE, 0, 0); } if (ENCODER_MODE_IS_DP(atombios_get_encoder_mode(encoder)) && connector) { if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) { @@ -1675,68 +1677,56 @@ radeon_atom_encoder_dpms_dig(struct drm_encoder *encoder, int mode) ATOM_TRANSMITTER_ACTION_POWER_ON); radeon_dig_connector->edp_on = true; } + } + /* enable the transmitter */ + atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_ENABLE, 0, 0); + if (ENCODER_MODE_IS_DP(atombios_get_encoder_mode(encoder)) && connector) { + /* DP_SET_POWER_D0 is set in radeon_dp_link_train */ radeon_dp_link_train(encoder, connector); if (ASIC_IS_DCE4(rdev)) atombios_dig_encoder_setup(encoder, ATOM_ENCODER_CMD_DP_VIDEO_ON, 0); } if (radeon_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) - atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_LCD_BLON, 0, 0); + atombios_dig_transmitter_setup(encoder, + ATOM_TRANSMITTER_ACTION_LCD_BLON, 0, 0); + if (ext_encoder) + atombios_external_encoder_setup(encoder, ext_encoder, ATOM_ENABLE); break; case DRM_MODE_DPMS_STANDBY: case DRM_MODE_DPMS_SUSPEND: case DRM_MODE_DPMS_OFF: + if (ASIC_IS_DCE4(rdev)) { + if (ENCODER_MODE_IS_DP(atombios_get_encoder_mode(encoder)) && connector) + atombios_dig_encoder_setup(encoder, ATOM_ENCODER_CMD_DP_VIDEO_OFF, 0); + } + if (ext_encoder) + atombios_external_encoder_setup(encoder, ext_encoder, ATOM_DISABLE); + if (radeon_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) + atombios_dig_transmitter_setup(encoder, + ATOM_TRANSMITTER_ACTION_LCD_BLOFF, 0, 0); + + if (ENCODER_MODE_IS_DP(atombios_get_encoder_mode(encoder)) && + connector && !travis_quirk) + radeon_dp_set_rx_power_state(connector, DP_SET_POWER_D3); if (ASIC_IS_DCE4(rdev)) { /* disable the transmitter */ - atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_DISABLE, 0, 0); + atombios_dig_transmitter_setup(encoder, + ATOM_TRANSMITTER_ACTION_DISABLE, 0, 0); } else { /* disable the encoder and transmitter */ - atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_DISABLE, 0, 0); + atombios_dig_transmitter_setup(encoder, + ATOM_TRANSMITTER_ACTION_DISABLE, 0, 0); atombios_dig_encoder_setup(encoder, ATOM_DISABLE, 0); } if (ENCODER_MODE_IS_DP(atombios_get_encoder_mode(encoder)) && connector) { - if (ASIC_IS_DCE4(rdev)) - atombios_dig_encoder_setup(encoder, ATOM_ENCODER_CMD_DP_VIDEO_OFF, 0); + if (travis_quirk) + radeon_dp_set_rx_power_state(connector, DP_SET_POWER_D3); if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) { atombios_set_edp_panel_power(connector, ATOM_TRANSMITTER_ACTION_POWER_OFF); radeon_dig_connector->edp_on = false; } } - if (radeon_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) - atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_LCD_BLOFF, 0, 0); - break; - } -} - -static void -radeon_atom_encoder_dpms_ext(struct drm_encoder *encoder, - struct drm_encoder *ext_encoder, - int mode) -{ - struct drm_device *dev = encoder->dev; - struct radeon_device *rdev = dev->dev_private; - - switch (mode) { - case DRM_MODE_DPMS_ON: - default: - if (ASIC_IS_DCE41(rdev) || ASIC_IS_DCE61(rdev)) { - atombios_external_encoder_setup(encoder, ext_encoder, - EXTERNAL_ENCODER_ACTION_V3_ENABLE_OUTPUT); - atombios_external_encoder_setup(encoder, ext_encoder, - EXTERNAL_ENCODER_ACTION_V3_ENCODER_BLANKING_OFF); - } else - atombios_external_encoder_setup(encoder, ext_encoder, ATOM_ENABLE); - break; - case DRM_MODE_DPMS_STANDBY: - case DRM_MODE_DPMS_SUSPEND: - case DRM_MODE_DPMS_OFF: - if (ASIC_IS_DCE41(rdev) || ASIC_IS_DCE61(rdev)) { - atombios_external_encoder_setup(encoder, ext_encoder, - EXTERNAL_ENCODER_ACTION_V3_ENCODER_BLANKING); - atombios_external_encoder_setup(encoder, ext_encoder, - EXTERNAL_ENCODER_ACTION_V3_DISABLE_OUTPUT); - } else - atombios_external_encoder_setup(encoder, ext_encoder, ATOM_DISABLE); break; } } @@ -1747,7 +1737,6 @@ radeon_atom_encoder_dpms(struct drm_encoder *encoder, int mode) struct drm_device *dev = encoder->dev; struct radeon_device *rdev = dev->dev_private; struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); - struct drm_encoder *ext_encoder = radeon_get_external_encoder(encoder); DRM_DEBUG_KMS("encoder dpms %d to mode %d, devices %08x, active_devices %08x\n", radeon_encoder->encoder_id, mode, radeon_encoder->devices, @@ -1807,9 +1796,6 @@ radeon_atom_encoder_dpms(struct drm_encoder *encoder, int mode) return; } - if (ext_encoder) - radeon_atom_encoder_dpms_ext(encoder, ext_encoder, mode); - radeon_atombios_encoder_dpms_scratch_regs(encoder, (mode == DRM_MODE_DPMS_ON) ? true : false); } -- cgit From dca0be0d850363782c46ddc0fb408d658153b4b1 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 17 Mar 2014 23:48:17 -0400 Subject: drm/radeon: clarify special handling in i2c over aux MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need a special packet for the start and end of the transaction. Signed-off-by: Alex Deucher Acked-by: Christian König --- drivers/gpu/drm/radeon/atombios_dp.c | 36 ++++++++++++++++-------------------- 1 file changed, 16 insertions(+), 20 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c index 88d399c3f4fd..23189b796eae 100644 --- a/drivers/gpu/drm/radeon/atombios_dp.c +++ b/drivers/gpu/drm/radeon/atombios_dp.c @@ -246,34 +246,30 @@ int radeon_dp_i2c_aux_ch(struct i2c_adapter *adapter, int mode, int ret; u8 ack; - /* Set up the command byte */ - if (mode & MODE_I2C_READ) - msg[2] = DP_AUX_I2C_READ << 4; - else - msg[2] = DP_AUX_I2C_WRITE << 4; - - if (!(mode & MODE_I2C_STOP)) - msg[2] |= DP_AUX_I2C_MOT << 4; - + /* Set up the address */ msg[0] = address; msg[1] = address >> 8; - switch (mode) { - case MODE_I2C_WRITE: + /* Set up the command byte */ + if (mode & MODE_I2C_READ) { + msg[2] = DP_AUX_I2C_READ << 4; + msg_bytes = 4; + msg[3] = msg_bytes << 4; + } else { + msg[2] = DP_AUX_I2C_WRITE << 4; msg_bytes = 5; msg[3] = msg_bytes << 4; msg[4] = write_byte; - break; - case MODE_I2C_READ: - msg_bytes = 4; - msg[3] = msg_bytes << 4; - break; - default: - msg_bytes = 4; - msg[3] = 3 << 4; - break; } + /* special handling for start/stop */ + if (mode & (MODE_I2C_START | MODE_I2C_STOP)) + msg[3] = 3 << 4; + + /* Set MOT bit for all but stop */ + if ((mode & MODE_I2C_STOP) == 0) + msg[2] |= DP_AUX_I2C_MOT << 4; + for (retry = 0; retry < 7; retry++) { ret = radeon_process_aux_ch(auxch, msg, msg_bytes, reply, reply_bytes, 0, &ack); -- cgit From 496263bf2bee13387f6e2a780f0c783c9c377c42 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 21 Mar 2014 10:34:07 -0400 Subject: drm/radeon: use the new drm helpers for dp aux MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Switch to the new dp helpers. The main difference is that the DP helpers don't allow an adjustable delay in the aux transaction, but I don't know that this is necessary. Signed-off-by: Alex Deucher Acked-by: Christian König --- drivers/gpu/drm/radeon/atombios_dp.c | 192 +++++++++++++---------------- drivers/gpu/drm/radeon/radeon_connectors.c | 17 ++- drivers/gpu/drm/radeon/radeon_mode.h | 2 + 3 files changed, 104 insertions(+), 107 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c index 23189b796eae..8d8f84676544 100644 --- a/drivers/gpu/drm/radeon/atombios_dp.c +++ b/drivers/gpu/drm/radeon/atombios_dp.c @@ -142,94 +142,62 @@ static int radeon_process_aux_ch(struct radeon_i2c_chan *chan, return recv_bytes; } -static int radeon_dp_aux_native_write(struct radeon_connector *radeon_connector, - u16 address, u8 *send, u8 send_bytes, u8 delay) -{ - struct radeon_connector_atom_dig *dig_connector = radeon_connector->con_priv; - int ret; - u8 msg[20]; - int msg_bytes = send_bytes + 4; - u8 ack; - unsigned retry; - - if (send_bytes > 16) - return -1; +#define HEADER_SIZE 4 - msg[0] = address; - msg[1] = address >> 8; - msg[2] = DP_AUX_NATIVE_WRITE << 4; - msg[3] = (msg_bytes << 4) | (send_bytes - 1); - memcpy(&msg[4], send, send_bytes); - - for (retry = 0; retry < 7; retry++) { - ret = radeon_process_aux_ch(dig_connector->dp_i2c_bus, - msg, msg_bytes, NULL, 0, delay, &ack); - if (ret == -EBUSY) - continue; - else if (ret < 0) - return ret; - ack >>= 4; - if ((ack & DP_AUX_NATIVE_REPLY_MASK) == DP_AUX_NATIVE_REPLY_ACK) - return send_bytes; - else if ((ack & DP_AUX_NATIVE_REPLY_MASK) == DP_AUX_NATIVE_REPLY_DEFER) - usleep_range(400, 500); - else - return -EIO; - } - - return -EIO; -} - -static int radeon_dp_aux_native_read(struct radeon_connector *radeon_connector, - u16 address, u8 *recv, int recv_bytes, u8 delay) +static ssize_t +radeon_dp_aux_transfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg) { - struct radeon_connector_atom_dig *dig_connector = radeon_connector->con_priv; - u8 msg[4]; - int msg_bytes = 4; - u8 ack; + struct radeon_i2c_chan *chan = + container_of(aux, struct radeon_i2c_chan, aux); int ret; - unsigned retry; - - msg[0] = address; - msg[1] = address >> 8; - msg[2] = DP_AUX_NATIVE_READ << 4; - msg[3] = (msg_bytes << 4) | (recv_bytes - 1); - - for (retry = 0; retry < 7; retry++) { - ret = radeon_process_aux_ch(dig_connector->dp_i2c_bus, - msg, msg_bytes, recv, recv_bytes, delay, &ack); - if (ret == -EBUSY) - continue; - else if (ret < 0) - return ret; - ack >>= 4; - if ((ack & DP_AUX_NATIVE_REPLY_MASK) == DP_AUX_NATIVE_REPLY_ACK) - return ret; - else if ((ack & DP_AUX_NATIVE_REPLY_MASK) == DP_AUX_NATIVE_REPLY_DEFER) - usleep_range(400, 500); - else if (ret == 0) - return -EPROTO; - else - return -EIO; + u8 tx_buf[20]; + size_t tx_size; + u8 ack, delay = 0; + + if (WARN_ON(msg->size > 16)) + return -E2BIG; + + tx_buf[0] = msg->address & 0xff; + tx_buf[1] = msg->address >> 8; + tx_buf[2] = msg->request << 4; + tx_buf[3] = msg->size - 1; + + switch (msg->request & ~DP_AUX_I2C_MOT) { + case DP_AUX_NATIVE_WRITE: + case DP_AUX_I2C_WRITE: + tx_size = HEADER_SIZE + msg->size; + tx_buf[3] |= tx_size << 4; + memcpy(tx_buf + HEADER_SIZE, msg->buffer, msg->size); + ret = radeon_process_aux_ch(chan, + tx_buf, tx_size, NULL, 0, delay, &ack); + if (ret >= 0) + /* Return payload size. */ + ret = msg->size; + break; + case DP_AUX_NATIVE_READ: + case DP_AUX_I2C_READ: + tx_size = HEADER_SIZE; + tx_buf[3] |= tx_size << 4; + ret = radeon_process_aux_ch(chan, + tx_buf, tx_size, msg->buffer, msg->size, delay, &ack); + break; + default: + ret = -EINVAL; + break; } - return -EIO; -} + if (ret > 0) + msg->reply = ack >> 4; -static void radeon_write_dpcd_reg(struct radeon_connector *radeon_connector, - u16 reg, u8 val) -{ - radeon_dp_aux_native_write(radeon_connector, reg, &val, 1, 0); + return ret; } -static u8 radeon_read_dpcd_reg(struct radeon_connector *radeon_connector, - u16 reg) +void radeon_dp_aux_init(struct radeon_connector *radeon_connector) { - u8 val = 0; - - radeon_dp_aux_native_read(radeon_connector, reg, &val, 1, 0); + struct radeon_connector_atom_dig *dig_connector = radeon_connector->con_priv; - return val; + dig_connector->dp_i2c_bus->aux.dev = radeon_connector->base.kdev; + dig_connector->dp_i2c_bus->aux.transfer = radeon_dp_aux_transfer; } int radeon_dp_i2c_aux_ch(struct i2c_adapter *adapter, int mode, @@ -468,11 +436,11 @@ static void radeon_dp_probe_oui(struct radeon_connector *radeon_connector) if (!(dig_connector->dpcd[DP_DOWN_STREAM_PORT_COUNT] & DP_OUI_SUPPORT)) return; - if (radeon_dp_aux_native_read(radeon_connector, DP_SINK_OUI, buf, 3, 0)) + if (drm_dp_dpcd_read(&dig_connector->dp_i2c_bus->aux, DP_SINK_OUI, buf, 3)) DRM_DEBUG_KMS("Sink OUI: %02hx%02hx%02hx\n", buf[0], buf[1], buf[2]); - if (radeon_dp_aux_native_read(radeon_connector, DP_BRANCH_OUI, buf, 3, 0)) + if (drm_dp_dpcd_read(&dig_connector->dp_i2c_bus->aux, DP_BRANCH_OUI, buf, 3)) DRM_DEBUG_KMS("Branch OUI: %02hx%02hx%02hx\n", buf[0], buf[1], buf[2]); } @@ -483,8 +451,8 @@ bool radeon_dp_getdpcd(struct radeon_connector *radeon_connector) u8 msg[DP_DPCD_SIZE]; int ret, i; - ret = radeon_dp_aux_native_read(radeon_connector, DP_DPCD_REV, msg, - DP_DPCD_SIZE, 0); + ret = drm_dp_dpcd_read(&dig_connector->dp_i2c_bus->aux, DP_DPCD_REV, msg, + DP_DPCD_SIZE); if (ret > 0) { memcpy(dig_connector->dpcd, msg, DP_DPCD_SIZE); DRM_DEBUG_KMS("DPCD: "); @@ -506,6 +474,7 @@ int radeon_dp_get_panel_mode(struct drm_encoder *encoder, struct drm_device *dev = encoder->dev; struct radeon_device *rdev = dev->dev_private; struct radeon_connector *radeon_connector = to_radeon_connector(connector); + struct radeon_connector_atom_dig *dig_connector; int panel_mode = DP_PANEL_MODE_EXTERNAL_DP_MODE; u16 dp_bridge = radeon_connector_encoder_get_dp_bridge_encoder_id(connector); u8 tmp; @@ -513,9 +482,15 @@ int radeon_dp_get_panel_mode(struct drm_encoder *encoder, if (!ASIC_IS_DCE4(rdev)) return panel_mode; + if (!radeon_connector->con_priv) + return panel_mode; + + dig_connector = radeon_connector->con_priv; + if (dp_bridge != ENCODER_OBJECT_ID_NONE) { /* DP bridge chips */ - tmp = radeon_read_dpcd_reg(radeon_connector, DP_EDP_CONFIGURATION_CAP); + drm_dp_dpcd_readb(&dig_connector->dp_i2c_bus->aux, + DP_EDP_CONFIGURATION_CAP, &tmp); if (tmp & 1) panel_mode = DP_PANEL_MODE_INTERNAL_DP2_MODE; else if ((dp_bridge == ENCODER_OBJECT_ID_NUTMEG) || @@ -525,7 +500,8 @@ int radeon_dp_get_panel_mode(struct drm_encoder *encoder, panel_mode = DP_PANEL_MODE_EXTERNAL_DP_MODE; } else if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) { /* eDP */ - tmp = radeon_read_dpcd_reg(radeon_connector, DP_EDP_CONFIGURATION_CAP); + drm_dp_dpcd_readb(&dig_connector->dp_i2c_bus->aux, + DP_EDP_CONFIGURATION_CAP, &tmp); if (tmp & 1) panel_mode = DP_PANEL_MODE_INTERNAL_DP2_MODE; } @@ -576,9 +552,15 @@ int radeon_dp_mode_valid_helper(struct drm_connector *connector, static bool radeon_dp_get_link_status(struct radeon_connector *radeon_connector, u8 link_status[DP_LINK_STATUS_SIZE]) { + struct radeon_connector_atom_dig *dig_connector; int ret; - ret = radeon_dp_aux_native_read(radeon_connector, DP_LANE0_1_STATUS, - link_status, DP_LINK_STATUS_SIZE, 100); + + if (!radeon_connector->con_priv) + return false; + dig_connector = radeon_connector->con_priv; + + ret = drm_dp_dpcd_read(&dig_connector->dp_i2c_bus->aux, DP_LANE0_1_STATUS, + link_status, DP_LINK_STATUS_SIZE); if (ret <= 0) { return false; } @@ -612,7 +594,7 @@ void radeon_dp_set_rx_power_state(struct drm_connector *connector, /* power up/down the sink */ if (dig_connector->dpcd[0] >= 0x11) { - radeon_write_dpcd_reg(radeon_connector, + drm_dp_dpcd_writeb(&dig_connector->dp_i2c_bus->aux, DP_SET_POWER, power_state); usleep_range(1000, 2000); } @@ -633,6 +615,7 @@ struct radeon_dp_link_train_info { u8 link_status[DP_LINK_STATUS_SIZE]; u8 tries; bool use_dpencoder; + struct drm_dp_aux *aux; }; static void radeon_dp_update_vs_emph(struct radeon_dp_link_train_info *dp_info) @@ -643,8 +626,8 @@ static void radeon_dp_update_vs_emph(struct radeon_dp_link_train_info *dp_info) 0, dp_info->train_set[0]); /* sets all lanes at once */ /* set the vs/emph on the sink */ - radeon_dp_aux_native_write(dp_info->radeon_connector, DP_TRAINING_LANE0_SET, - dp_info->train_set, dp_info->dp_lane_count, 0); + drm_dp_dpcd_write(dp_info->aux, DP_TRAINING_LANE0_SET, + dp_info->train_set, dp_info->dp_lane_count); } static void radeon_dp_set_tp(struct radeon_dp_link_train_info *dp_info, int tp) @@ -679,7 +662,7 @@ static void radeon_dp_set_tp(struct radeon_dp_link_train_info *dp_info, int tp) } /* enable training pattern on the sink */ - radeon_write_dpcd_reg(dp_info->radeon_connector, DP_TRAINING_PATTERN_SET, tp); + drm_dp_dpcd_writeb(dp_info->aux, DP_TRAINING_PATTERN_SET, tp); } static int radeon_dp_link_train_init(struct radeon_dp_link_train_info *dp_info) @@ -693,26 +676,26 @@ static int radeon_dp_link_train_init(struct radeon_dp_link_train_info *dp_info) /* possibly enable downspread on the sink */ if (dp_info->dpcd[3] & 0x1) - radeon_write_dpcd_reg(dp_info->radeon_connector, - DP_DOWNSPREAD_CTRL, DP_SPREAD_AMP_0_5); + drm_dp_dpcd_writeb(dp_info->aux, + DP_DOWNSPREAD_CTRL, DP_SPREAD_AMP_0_5); else - radeon_write_dpcd_reg(dp_info->radeon_connector, - DP_DOWNSPREAD_CTRL, 0); + drm_dp_dpcd_writeb(dp_info->aux, + DP_DOWNSPREAD_CTRL, 0); if ((dp_info->connector->connector_type == DRM_MODE_CONNECTOR_eDP) && (dig->panel_mode == DP_PANEL_MODE_INTERNAL_DP2_MODE)) { - radeon_write_dpcd_reg(dp_info->radeon_connector, DP_EDP_CONFIGURATION_SET, 1); + drm_dp_dpcd_writeb(dp_info->aux, DP_EDP_CONFIGURATION_SET, 1); } /* set the lane count on the sink */ tmp = dp_info->dp_lane_count; if (drm_dp_enhanced_frame_cap(dp_info->dpcd)) tmp |= DP_LANE_COUNT_ENHANCED_FRAME_EN; - radeon_write_dpcd_reg(dp_info->radeon_connector, DP_LANE_COUNT_SET, tmp); + drm_dp_dpcd_writeb(dp_info->aux, DP_LANE_COUNT_SET, tmp); /* set the link rate on the sink */ tmp = drm_dp_link_rate_to_bw_code(dp_info->dp_clock); - radeon_write_dpcd_reg(dp_info->radeon_connector, DP_LINK_BW_SET, tmp); + drm_dp_dpcd_writeb(dp_info->aux, DP_LINK_BW_SET, tmp); /* start training on the source */ if (ASIC_IS_DCE4(dp_info->rdev) || !dp_info->use_dpencoder) @@ -723,9 +706,9 @@ static int radeon_dp_link_train_init(struct radeon_dp_link_train_info *dp_info) dp_info->dp_clock, dp_info->enc_id, 0); /* disable the training pattern on the sink */ - radeon_write_dpcd_reg(dp_info->radeon_connector, - DP_TRAINING_PATTERN_SET, - DP_TRAINING_PATTERN_DISABLE); + drm_dp_dpcd_writeb(dp_info->aux, + DP_TRAINING_PATTERN_SET, + DP_TRAINING_PATTERN_DISABLE); return 0; } @@ -735,9 +718,9 @@ static int radeon_dp_link_train_finish(struct radeon_dp_link_train_info *dp_info udelay(400); /* disable the training pattern on the sink */ - radeon_write_dpcd_reg(dp_info->radeon_connector, - DP_TRAINING_PATTERN_SET, - DP_TRAINING_PATTERN_DISABLE); + drm_dp_dpcd_writeb(dp_info->aux, + DP_TRAINING_PATTERN_SET, + DP_TRAINING_PATTERN_DISABLE); /* disable the training pattern on the source */ if (ASIC_IS_DCE4(dp_info->rdev) || !dp_info->use_dpencoder) @@ -914,7 +897,7 @@ void radeon_dp_link_train(struct drm_encoder *encoder, else dp_info.enc_id |= ATOM_DP_CONFIG_LINK_A; - tmp = radeon_read_dpcd_reg(radeon_connector, DP_MAX_LANE_COUNT); + drm_dp_dpcd_readb(&dig_connector->dp_i2c_bus->aux, DP_MAX_LANE_COUNT, &tmp); if (ASIC_IS_DCE5(rdev) && (tmp & DP_TPS3_SUPPORTED)) dp_info.tp3_supported = true; else @@ -927,6 +910,7 @@ void radeon_dp_link_train(struct drm_encoder *encoder, dp_info.radeon_connector = radeon_connector; dp_info.dp_lane_count = dig_connector->dp_lane_count; dp_info.dp_clock = dig_connector->dp_clock; + dp_info.aux = &dig_connector->dp_i2c_bus->aux; if (radeon_dp_link_train_init(&dp_info)) goto done; diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index 82d4f865546e..ec958e86fd8b 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -1595,6 +1595,7 @@ radeon_add_atom_connector(struct drm_device *dev, uint32_t subpixel_order = SubPixelNone; bool shared_ddc = false; bool is_dp_bridge = false; + bool has_aux = false; if (connector_type == DRM_MODE_CONNECTOR_Unknown) return; @@ -1672,7 +1673,9 @@ radeon_add_atom_connector(struct drm_device *dev, radeon_dig_connector->dp_i2c_bus = radeon_i2c_create_dp(dev, i2c_bus, "eDP-auxch"); else radeon_dig_connector->dp_i2c_bus = radeon_i2c_create_dp(dev, i2c_bus, "DP-auxch"); - if (!radeon_dig_connector->dp_i2c_bus) + if (radeon_dig_connector->dp_i2c_bus) + has_aux = true; + else DRM_ERROR("DP: Failed to assign dp ddc bus! Check dmesg for i2c errors.\n"); radeon_connector->ddc_bus = radeon_i2c_lookup(rdev, i2c_bus); if (!radeon_connector->ddc_bus) @@ -1895,7 +1898,9 @@ radeon_add_atom_connector(struct drm_device *dev, if (!radeon_dig_connector->dp_i2c_bus) DRM_ERROR("DP: Failed to assign dp ddc bus! Check dmesg for i2c errors.\n"); radeon_connector->ddc_bus = radeon_i2c_lookup(rdev, i2c_bus); - if (!radeon_connector->ddc_bus) + if (radeon_connector->ddc_bus) + has_aux = true; + else DRM_ERROR("DP: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); } subpixel_order = SubPixelHorizontalRGB; @@ -1939,7 +1944,9 @@ radeon_add_atom_connector(struct drm_device *dev, if (i2c_bus->valid) { /* add DP i2c bus */ radeon_dig_connector->dp_i2c_bus = radeon_i2c_create_dp(dev, i2c_bus, "eDP-auxch"); - if (!radeon_dig_connector->dp_i2c_bus) + if (radeon_dig_connector->dp_i2c_bus) + has_aux = true; + else DRM_ERROR("DP: Failed to assign dp ddc bus! Check dmesg for i2c errors.\n"); radeon_connector->ddc_bus = radeon_i2c_lookup(rdev, i2c_bus); if (!radeon_connector->ddc_bus) @@ -2000,6 +2007,10 @@ radeon_add_atom_connector(struct drm_device *dev, connector->display_info.subpixel_order = subpixel_order; drm_sysfs_connector_add(connector); + + if (has_aux) + radeon_dp_aux_init(radeon_connector); + return; failed: diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h index e390f559e496..832d9fa1a4c4 100644 --- a/drivers/gpu/drm/radeon/radeon_mode.h +++ b/drivers/gpu/drm/radeon/radeon_mode.h @@ -192,6 +192,7 @@ struct radeon_i2c_chan { struct i2c_algo_dp_aux_data dp; } algo; struct radeon_i2c_bus_rec rec; + struct drm_dp_aux aux; }; /* mostly for macs, but really any system without connector tables */ @@ -692,6 +693,7 @@ extern int radeon_dp_get_panel_mode(struct drm_encoder *encoder, struct drm_connector *connector); extern void radeon_dp_set_rx_power_state(struct drm_connector *connector, u8 power_state); +extern void radeon_dp_aux_init(struct radeon_connector *radeon_connector); extern void atombios_dig_encoder_setup(struct drm_encoder *encoder, int action, int panel_mode); extern void radeon_atom_encoder_init(struct radeon_device *rdev); extern void radeon_atom_disp_eng_pll_init(struct radeon_device *rdev); -- cgit From ab8f1a2a0a7a9882e1214e4f5107e2a02705d11e Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 21 Mar 2014 10:34:08 -0400 Subject: drm/radeon: use drm_dp_dpcd_read_link_status() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the radeon specific version with the generic version. Signed-off-by: Alex Deucher Acked-by: Christian König --- drivers/gpu/drm/radeon/atombios_dp.c | 30 +++++------------------------- 1 file changed, 5 insertions(+), 25 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c index 8d8f84676544..8b0ab170cef9 100644 --- a/drivers/gpu/drm/radeon/atombios_dp.c +++ b/drivers/gpu/drm/radeon/atombios_dp.c @@ -549,32 +549,12 @@ int radeon_dp_mode_valid_helper(struct drm_connector *connector, return MODE_OK; } -static bool radeon_dp_get_link_status(struct radeon_connector *radeon_connector, - u8 link_status[DP_LINK_STATUS_SIZE]) -{ - struct radeon_connector_atom_dig *dig_connector; - int ret; - - if (!radeon_connector->con_priv) - return false; - dig_connector = radeon_connector->con_priv; - - ret = drm_dp_dpcd_read(&dig_connector->dp_i2c_bus->aux, DP_LANE0_1_STATUS, - link_status, DP_LINK_STATUS_SIZE); - if (ret <= 0) { - return false; - } - - DRM_DEBUG_KMS("link status %6ph\n", link_status); - return true; -} - bool radeon_dp_needs_link_train(struct radeon_connector *radeon_connector) { u8 link_status[DP_LINK_STATUS_SIZE]; struct radeon_connector_atom_dig *dig = radeon_connector->con_priv; - if (!radeon_dp_get_link_status(radeon_connector, link_status)) + if (drm_dp_dpcd_read_link_status(&dig->dp_i2c_bus->aux, link_status) <= 0) return false; if (drm_dp_channel_eq_ok(link_status, dig->dp_lane_count)) return false; @@ -605,7 +585,6 @@ struct radeon_dp_link_train_info { struct radeon_device *rdev; struct drm_encoder *encoder; struct drm_connector *connector; - struct radeon_connector *radeon_connector; int enc_id; int dp_clock; int dp_lane_count; @@ -752,7 +731,8 @@ static int radeon_dp_link_train_cr(struct radeon_dp_link_train_info *dp_info) while (1) { drm_dp_link_train_clock_recovery_delay(dp_info->dpcd); - if (!radeon_dp_get_link_status(dp_info->radeon_connector, dp_info->link_status)) { + if (drm_dp_dpcd_read_link_status(dp_info->aux, + dp_info->link_status) <= 0) { DRM_ERROR("displayport link status failed\n"); break; } @@ -814,7 +794,8 @@ static int radeon_dp_link_train_ce(struct radeon_dp_link_train_info *dp_info) while (1) { drm_dp_link_train_channel_eq_delay(dp_info->dpcd); - if (!radeon_dp_get_link_status(dp_info->radeon_connector, dp_info->link_status)) { + if (drm_dp_dpcd_read_link_status(dp_info->aux, + dp_info->link_status) <= 0) { DRM_ERROR("displayport link status failed\n"); break; } @@ -907,7 +888,6 @@ void radeon_dp_link_train(struct drm_encoder *encoder, dp_info.rdev = rdev; dp_info.encoder = encoder; dp_info.connector = connector; - dp_info.radeon_connector = radeon_connector; dp_info.dp_lane_count = dig_connector->dp_lane_count; dp_info.dp_clock = dig_connector->dp_clock; dp_info.aux = &dig_connector->dp_i2c_bus->aux; -- cgit From 020ff5467603483a97042625d12696c9b39922cf Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sat, 22 Mar 2014 16:20:43 +0100 Subject: drm/radeon: set PIPE_CONFIG for 1D and linear tiling modes on CIK MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes fast color clear with 1D-tiled single-sample surfaces and Hyper-Z corruption with 1D-tiled depth surfaces. Even though it seems it is not needed for 1D tiling, CMASK and HTILE are always 2D-tiled, thus the hw needs to know the actual pipe configuration for CMASK and HTILE addressing no matter what the tiling mode of the surface is. Signed-off-by: Marek Olšák Signed-off-by: Alex Deucher Signed-off-by: Christian König --- drivers/gpu/drm/radeon/cik.c | 27 ++++++++++++++++++++++++--- drivers/gpu/drm/radeon/radeon_drv.c | 3 ++- 2 files changed, 26 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 0ae991d3289a..62fefbbaf263 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -2029,6 +2029,7 @@ static void cik_tiling_mode_table_init(struct radeon_device *rdev) break; case 5: gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); break; case 6: @@ -2049,6 +2050,7 @@ static void cik_tiling_mode_table_init(struct radeon_device *rdev) break; case 9: gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING)); break; case 10: @@ -2071,6 +2073,7 @@ static void cik_tiling_mode_table_init(struct radeon_device *rdev) break; case 13: gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING)); break; case 14: @@ -2093,6 +2096,7 @@ static void cik_tiling_mode_table_init(struct radeon_device *rdev) break; case 27: gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING)); break; case 28: @@ -2247,6 +2251,7 @@ static void cik_tiling_mode_table_init(struct radeon_device *rdev) break; case 5: gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); break; case 6: @@ -2267,6 +2272,7 @@ static void cik_tiling_mode_table_init(struct radeon_device *rdev) break; case 9: gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING)); break; case 10: @@ -2289,6 +2295,7 @@ static void cik_tiling_mode_table_init(struct radeon_device *rdev) break; case 13: gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING)); break; case 14: @@ -2311,6 +2318,7 @@ static void cik_tiling_mode_table_init(struct radeon_device *rdev) break; case 27: gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING)); break; case 28: @@ -2467,6 +2475,7 @@ static void cik_tiling_mode_table_init(struct radeon_device *rdev) break; case 5: gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); break; case 6: @@ -2487,6 +2496,7 @@ static void cik_tiling_mode_table_init(struct radeon_device *rdev) break; case 9: gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING)); break; case 10: @@ -2509,6 +2519,7 @@ static void cik_tiling_mode_table_init(struct radeon_device *rdev) break; case 13: gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING)); break; case 14: @@ -2531,6 +2542,7 @@ static void cik_tiling_mode_table_init(struct radeon_device *rdev) break; case 27: gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING)); break; case 28: @@ -2593,6 +2605,7 @@ static void cik_tiling_mode_table_init(struct radeon_device *rdev) break; case 5: gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); break; case 6: @@ -2613,6 +2626,7 @@ static void cik_tiling_mode_table_init(struct radeon_device *rdev) break; case 9: gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING)); break; case 10: @@ -2635,6 +2649,7 @@ static void cik_tiling_mode_table_init(struct radeon_device *rdev) break; case 13: gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING)); break; case 14: @@ -2657,6 +2672,7 @@ static void cik_tiling_mode_table_init(struct radeon_device *rdev) break; case 27: gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING)); break; case 28: @@ -2813,6 +2829,7 @@ static void cik_tiling_mode_table_init(struct radeon_device *rdev) break; case 5: gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); break; case 6: @@ -2828,11 +2845,13 @@ static void cik_tiling_mode_table_init(struct radeon_device *rdev) TILE_SPLIT(split_equal_to_row_size)); break; case 8: - gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED); + gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | + PIPE_CONFIG(ADDR_SURF_P2); break; case 9: gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | - MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING)); + MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P2)); break; case 10: gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | @@ -2854,6 +2873,7 @@ static void cik_tiling_mode_table_init(struct radeon_device *rdev) break; case 13: gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING)); break; case 14: @@ -2876,7 +2896,8 @@ static void cik_tiling_mode_table_init(struct radeon_device *rdev) break; case 27: gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | - MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING)); + MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | + PIPE_CONFIG(ADDR_SURF_P2)); break; case 28: gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 4392b7c95ee6..e8b0284e34bb 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -79,7 +79,8 @@ * 2.35.0 - Add CIK macrotile mode array query * 2.36.0 - Fix CIK DCE tiling setup * 2.37.0 - allow GS ring setup on r6xx/r7xx - * 2.38.0 - RADEON_GEM_OP (GET_INITIAL_DOMAIN, SET_INITIAL_DOMAIN) + * 2.38.0 - RADEON_GEM_OP (GET_INITIAL_DOMAIN, SET_INITIAL_DOMAIN), + * CIK: 1D and linear tiling modes contain valid PIPE_CONFIG */ #define KMS_DRIVER_MAJOR 2 #define KMS_DRIVER_MINOR 38 -- cgit From f4510a2752b75ad5847b7935b68c233cab497f97 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 1 Apr 2014 15:22:40 -0700 Subject: drm: Replace crtc fb with primary plane fb (v3) Now that CRTC's have a primary plane, there's no need to track the framebuffer in the CRTC. Replace all references to the CRTC fb with the primary plane's fb. This patch was generated by the Coccinelle semantic patching tool using the following rules: @@ struct drm_crtc C; @@ - (C).fb + C.primary->fb @@ struct drm_crtc *C; @@ - (C)->fb + C->primary->fb v3: Generate patch via coccinelle. Actual removal of crtc->fb has been moved to a subsequent patch. v2: Fixup several lingering crtc->fb instances that were missed in the first patch iteration. [Rob Clark] Signed-off-by: Matt Roper Reviewed-by: Rob Clark --- drivers/gpu/drm/radeon/atombios_crtc.c | 20 ++++++++++---------- drivers/gpu/drm/radeon/r100.c | 4 ++-- drivers/gpu/drm/radeon/radeon_connectors.c | 2 +- drivers/gpu/drm/radeon/radeon_device.c | 2 +- drivers/gpu/drm/radeon/radeon_display.c | 4 ++-- drivers/gpu/drm/radeon/radeon_legacy_crtc.c | 16 ++++++++-------- 6 files changed, 24 insertions(+), 24 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c index daa4dd375ab1..fb187c78978f 100644 --- a/drivers/gpu/drm/radeon/atombios_crtc.c +++ b/drivers/gpu/drm/radeon/atombios_crtc.c @@ -1106,7 +1106,7 @@ static int dce4_crtc_do_set_base(struct drm_crtc *crtc, int r; /* no fb bound */ - if (!atomic && !crtc->fb) { + if (!atomic && !crtc->primary->fb) { DRM_DEBUG_KMS("No FB bound\n"); return 0; } @@ -1116,8 +1116,8 @@ static int dce4_crtc_do_set_base(struct drm_crtc *crtc, target_fb = fb; } else { - radeon_fb = to_radeon_framebuffer(crtc->fb); - target_fb = crtc->fb; + radeon_fb = to_radeon_framebuffer(crtc->primary->fb); + target_fb = crtc->primary->fb; } /* If atomic, assume fb object is pinned & idle & fenced and @@ -1316,7 +1316,7 @@ static int dce4_crtc_do_set_base(struct drm_crtc *crtc, /* set pageflip to happen anywhere in vblank interval */ WREG32(EVERGREEN_MASTER_UPDATE_MODE + radeon_crtc->crtc_offset, 0); - if (!atomic && fb && fb != crtc->fb) { + if (!atomic && fb && fb != crtc->primary->fb) { radeon_fb = to_radeon_framebuffer(fb); rbo = gem_to_radeon_bo(radeon_fb->obj); r = radeon_bo_reserve(rbo, false); @@ -1350,7 +1350,7 @@ static int avivo_crtc_do_set_base(struct drm_crtc *crtc, int r; /* no fb bound */ - if (!atomic && !crtc->fb) { + if (!atomic && !crtc->primary->fb) { DRM_DEBUG_KMS("No FB bound\n"); return 0; } @@ -1360,8 +1360,8 @@ static int avivo_crtc_do_set_base(struct drm_crtc *crtc, target_fb = fb; } else { - radeon_fb = to_radeon_framebuffer(crtc->fb); - target_fb = crtc->fb; + radeon_fb = to_radeon_framebuffer(crtc->primary->fb); + target_fb = crtc->primary->fb; } obj = radeon_fb->obj; @@ -1485,7 +1485,7 @@ static int avivo_crtc_do_set_base(struct drm_crtc *crtc, /* set pageflip to happen anywhere in vblank interval */ WREG32(AVIVO_D1MODE_MASTER_UPDATE_MODE + radeon_crtc->crtc_offset, 0); - if (!atomic && fb && fb != crtc->fb) { + if (!atomic && fb && fb != crtc->primary->fb) { radeon_fb = to_radeon_framebuffer(fb); rbo = gem_to_radeon_bo(radeon_fb->obj); r = radeon_bo_reserve(rbo, false); @@ -1972,12 +1972,12 @@ static void atombios_crtc_disable(struct drm_crtc *crtc) int i; atombios_crtc_dpms(crtc, DRM_MODE_DPMS_OFF); - if (crtc->fb) { + if (crtc->primary->fb) { int r; struct radeon_framebuffer *radeon_fb; struct radeon_bo *rbo; - radeon_fb = to_radeon_framebuffer(crtc->fb); + radeon_fb = to_radeon_framebuffer(crtc->primary->fb); rbo = gem_to_radeon_bo(radeon_fb->obj); r = radeon_bo_reserve(rbo, false); if (unlikely(r)) diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 030f8e49c5ee..b6c32640df20 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -3220,12 +3220,12 @@ void r100_bandwidth_update(struct radeon_device *rdev) if (rdev->mode_info.crtcs[0]->base.enabled) { mode1 = &rdev->mode_info.crtcs[0]->base.mode; - pixel_bytes1 = rdev->mode_info.crtcs[0]->base.fb->bits_per_pixel / 8; + pixel_bytes1 = rdev->mode_info.crtcs[0]->base.primary->fb->bits_per_pixel / 8; } if (!(rdev->flags & RADEON_SINGLE_CRTC)) { if (rdev->mode_info.crtcs[1]->base.enabled) { mode2 = &rdev->mode_info.crtcs[1]->base.mode; - pixel_bytes2 = rdev->mode_info.crtcs[1]->base.fb->bits_per_pixel / 8; + pixel_bytes2 = rdev->mode_info.crtcs[1]->base.primary->fb->bits_per_pixel / 8; } } diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index ec958e86fd8b..c566b486ca08 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -89,7 +89,7 @@ static void radeon_property_change_mode(struct drm_encoder *encoder) if (crtc && crtc->enabled) { drm_crtc_helper_set_mode(crtc, &crtc->mode, - crtc->x, crtc->y, crtc->fb); + crtc->x, crtc->y, crtc->primary->fb); } } diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 2e72dcd94b13..15f954cd81cb 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1424,7 +1424,7 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend, bool fbcon) /* unpin the front buffers */ list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { - struct radeon_framebuffer *rfb = to_radeon_framebuffer(crtc->fb); + struct radeon_framebuffer *rfb = to_radeon_framebuffer(crtc->primary->fb); struct radeon_bo *robj; if (rfb == NULL || rfb->obj == NULL) { diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index fbd8b930f2be..5701fbb36b3c 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -369,7 +369,7 @@ static int radeon_crtc_page_flip(struct drm_crtc *crtc, work->event = event; work->rdev = rdev; work->crtc_id = radeon_crtc->crtc_id; - old_radeon_fb = to_radeon_framebuffer(crtc->fb); + old_radeon_fb = to_radeon_framebuffer(crtc->primary->fb); new_radeon_fb = to_radeon_framebuffer(fb); /* schedule unpin of the old buffer */ obj = old_radeon_fb->obj; @@ -460,7 +460,7 @@ static int radeon_crtc_page_flip(struct drm_crtc *crtc, spin_unlock_irqrestore(&dev->event_lock, flags); /* update crtc fb */ - crtc->fb = fb; + crtc->primary->fb = fb; r = drm_vblank_get(dev, radeon_crtc->crtc_id); if (r) { diff --git a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c index 0b158f98d287..cafb1ccf2ec3 100644 --- a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c +++ b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c @@ -385,7 +385,7 @@ int radeon_crtc_do_set_base(struct drm_crtc *crtc, DRM_DEBUG_KMS("\n"); /* no fb bound */ - if (!atomic && !crtc->fb) { + if (!atomic && !crtc->primary->fb) { DRM_DEBUG_KMS("No FB bound\n"); return 0; } @@ -395,8 +395,8 @@ int radeon_crtc_do_set_base(struct drm_crtc *crtc, target_fb = fb; } else { - radeon_fb = to_radeon_framebuffer(crtc->fb); - target_fb = crtc->fb; + radeon_fb = to_radeon_framebuffer(crtc->primary->fb); + target_fb = crtc->primary->fb; } switch (target_fb->bits_per_pixel) { @@ -444,7 +444,7 @@ retry: * We don't shutdown the display controller because new buffer * will end up in same spot. */ - if (!atomic && fb && fb != crtc->fb) { + if (!atomic && fb && fb != crtc->primary->fb) { struct radeon_bo *old_rbo; unsigned long nsize, osize; @@ -555,7 +555,7 @@ retry: WREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset, crtc_offset); WREG32(RADEON_CRTC_PITCH + radeon_crtc->crtc_offset, crtc_pitch); - if (!atomic && fb && fb != crtc->fb) { + if (!atomic && fb && fb != crtc->primary->fb) { radeon_fb = to_radeon_framebuffer(fb); rbo = gem_to_radeon_bo(radeon_fb->obj); r = radeon_bo_reserve(rbo, false); @@ -599,7 +599,7 @@ static bool radeon_set_crtc_timing(struct drm_crtc *crtc, struct drm_display_mod } } - switch (crtc->fb->bits_per_pixel) { + switch (crtc->primary->fb->bits_per_pixel) { case 8: format = 2; break; @@ -1087,12 +1087,12 @@ static void radeon_crtc_commit(struct drm_crtc *crtc) static void radeon_crtc_disable(struct drm_crtc *crtc) { radeon_crtc_dpms(crtc, DRM_MODE_DPMS_OFF); - if (crtc->fb) { + if (crtc->primary->fb) { int r; struct radeon_framebuffer *radeon_fb; struct radeon_bo *rbo; - radeon_fb = to_radeon_framebuffer(crtc->fb); + radeon_fb = to_radeon_framebuffer(crtc->primary->fb); rbo = gem_to_radeon_bo(radeon_fb->obj); r = radeon_bo_reserve(rbo, false); if (unlikely(r)) -- cgit From 06a139f7a0885fa2c84962300edd181821ddc2c9 Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 25 Mar 2014 11:41:40 +0100 Subject: drm/radeon: clear needs_reset flag if IB test fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the IB test fails we don't want to reset the card over and over again, just accept that it isn't working. Bug: https://bugs.freedesktop.org/show_bug.cgi?id=76501 Signed-off-by: Christian König Reviewed-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/radeon_ring.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 8b0dfdd23793..f8050f5429e2 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -262,6 +262,7 @@ int radeon_ib_ring_tests(struct radeon_device *rdev) r = radeon_ib_test(rdev, i, ring); if (r) { ring->ready = false; + rdev->needs_reset = false; if (i == RADEON_RING_TYPE_GFX_INDEX) { /* oh, oh, that's really bad */ -- cgit From 1d8eec8ba4a38fcee9d30e4fb5b3d67a9bff9db3 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 27 Mar 2014 14:09:18 +1000 Subject: drm/radeon: fix runtime suspend breaking secondary GPUs Same fix as for nouveau, when we fail with EINVAL, subsequent gets fail hard, causing the device not to open. Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_drv.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index e8b0284e34bb..d0eba48dd74e 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -405,11 +405,15 @@ static int radeon_pmops_runtime_suspend(struct device *dev) struct drm_device *drm_dev = pci_get_drvdata(pdev); int ret; - if (radeon_runtime_pm == 0) - return -EINVAL; + if (radeon_runtime_pm == 0) { + pm_runtime_forbid(dev); + return -EBUSY; + } - if (radeon_runtime_pm == -1 && !radeon_is_px()) - return -EINVAL; + if (radeon_runtime_pm == -1 && !radeon_is_px()) { + pm_runtime_forbid(dev); + return -EBUSY; + } drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; drm_kms_helper_poll_disable(drm_dev); @@ -458,12 +462,15 @@ static int radeon_pmops_runtime_idle(struct device *dev) struct drm_device *drm_dev = pci_get_drvdata(pdev); struct drm_crtc *crtc; - if (radeon_runtime_pm == 0) + if (radeon_runtime_pm == 0) { + pm_runtime_forbid(dev); return -EBUSY; + } /* are we PX enabled? */ if (radeon_runtime_pm == -1 && !radeon_is_px()) { DRM_DEBUG_DRIVER("failing to power off - not px\n"); + pm_runtime_forbid(dev); return -EBUSY; } -- cgit From ec9954fc26719482c041991607f4329d563c4bbb Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 27 Mar 2014 14:09:19 +1000 Subject: drm/radeon: fix resuming mode in pm runtime resume path For runtime pm we'd never suspend with the modesetting hw turned on, so don't try and resume the modesetting hw, as that path will take locks that the interface that is causing us to wake up might also take. Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_device.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 15f954cd81cb..835516d2d257 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1551,10 +1551,12 @@ int radeon_resume_kms(struct drm_device *dev, bool resume, bool fbcon) /* reset hpd state */ radeon_hpd_init(rdev); /* blat the mode back in */ - drm_helper_resume_force_mode(dev); - /* turn on display hw */ - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { - drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON); + if (fbcon) { + drm_helper_resume_force_mode(dev); + /* turn on display hw */ + list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON); + } } drm_kms_helper_poll_enable(dev); -- cgit From 32167016076f714f0e35e287fbead7de0f1fb179 Mon Sep 17 00:00:00 2001 From: Christian König Date: Fri, 28 Mar 2014 18:55:10 +0100 Subject: drm/radeon: rework finding display PLL numbers v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This completely reworks how the PLL parameters are generated and should result in better matching dot clock frequencies. Probably needs quite a bit of testing. bugs: https://bugs.freedesktop.org/show_bug.cgi?id=76564 v2: more cleanup and comments. Signed-off-by: Christian König Reviewed-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_display.c | 243 ++++++++++++++++++++------------ 1 file changed, 153 insertions(+), 90 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index 5701fbb36b3c..63d54ef758fc 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -34,6 +34,8 @@ #include #include +#include + static void avivo_crtc_load_lut(struct drm_crtc *crtc) { struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); @@ -799,66 +801,57 @@ int radeon_ddc_get_modes(struct radeon_connector *radeon_connector) } /* avivo */ -static void avivo_get_fb_div(struct radeon_pll *pll, - u32 target_clock, - u32 post_div, - u32 ref_div, - u32 *fb_div, - u32 *frac_fb_div) -{ - u32 tmp = post_div * ref_div; - tmp *= target_clock; - *fb_div = tmp / pll->reference_freq; - *frac_fb_div = tmp % pll->reference_freq; - - if (*fb_div > pll->max_feedback_div) - *fb_div = pll->max_feedback_div; - else if (*fb_div < pll->min_feedback_div) - *fb_div = pll->min_feedback_div; -} - -static u32 avivo_get_post_div(struct radeon_pll *pll, - u32 target_clock) +/** + * avivo_reduce_ratio - fractional number reduction + * + * @nom: nominator + * @den: denominator + * @nom_min: minimum value for nominator + * @den_min: minimum value for denominator + * + * Find the greatest common divisor and apply it on both nominator and + * denominator, but make nominator and denominator are at least as large + * as their minimum values. + */ +static void avivo_reduce_ratio(unsigned *nom, unsigned *den, + unsigned nom_min, unsigned den_min) { - u32 vco, post_div, tmp; - - if (pll->flags & RADEON_PLL_USE_POST_DIV) - return pll->post_div; - - if (pll->flags & RADEON_PLL_PREFER_MINM_OVER_MAXP) { - if (pll->flags & RADEON_PLL_IS_LCD) - vco = pll->lcd_pll_out_min; - else - vco = pll->pll_out_min; - } else { - if (pll->flags & RADEON_PLL_IS_LCD) - vco = pll->lcd_pll_out_max; - else - vco = pll->pll_out_max; + unsigned tmp; + + /* reduce the numbers to a simpler ratio */ + tmp = gcd(*nom, *den); + *nom /= tmp; + *den /= tmp; + + /* make sure nominator is large enough */ + if (*nom < nom_min) { + tmp = (nom_min + *nom - 1) / *nom; + *nom *= tmp; + *den *= tmp; } - post_div = vco / target_clock; - tmp = vco % target_clock; - - if (pll->flags & RADEON_PLL_PREFER_MINM_OVER_MAXP) { - if (tmp) - post_div++; - } else { - if (!tmp) - post_div--; + /* make sure the denominator is large enough */ + if (*den < den_min) { + tmp = (den_min + *den - 1) / *den; + *nom *= tmp; + *den *= tmp; } - - if (post_div > pll->max_post_div) - post_div = pll->max_post_div; - else if (post_div < pll->min_post_div) - post_div = pll->min_post_div; - - return post_div; } -#define MAX_TOLERANCE 10 - +/** + * radeon_compute_pll_avivo - compute PLL paramaters + * + * @pll: information about the PLL + * @dot_clock_p: resulting pixel clock + * fb_div_p: resulting feedback divider + * frac_fb_div_p: fractional part of the feedback divider + * ref_div_p: resulting reference divider + * post_div_p: resulting reference divider + * + * Try to calculate the PLL parameters to generate the given frequency: + * dot_clock = (ref_freq * feedback_div) / (ref_div * post_div) + */ void radeon_compute_pll_avivo(struct radeon_pll *pll, u32 freq, u32 *dot_clock_p, @@ -867,53 +860,123 @@ void radeon_compute_pll_avivo(struct radeon_pll *pll, u32 *ref_div_p, u32 *post_div_p) { - u32 target_clock = freq / 10; - u32 post_div = avivo_get_post_div(pll, target_clock); - u32 ref_div = pll->min_ref_div; - u32 fb_div = 0, frac_fb_div = 0, tmp; + unsigned fb_div_min, fb_div_max, fb_div; + unsigned post_div_min, post_div_max, post_div; + unsigned ref_div_min, ref_div_max, ref_div; + unsigned post_div_best, diff_best; + unsigned nom, den, tmp; - if (pll->flags & RADEON_PLL_USE_REF_DIV) - ref_div = pll->reference_div; + /* determine allowed feedback divider range */ + fb_div_min = pll->min_feedback_div; + fb_div_max = pll->max_feedback_div; if (pll->flags & RADEON_PLL_USE_FRAC_FB_DIV) { - avivo_get_fb_div(pll, target_clock, post_div, ref_div, &fb_div, &frac_fb_div); - frac_fb_div = (100 * frac_fb_div) / pll->reference_freq; - if (frac_fb_div >= 5) { - frac_fb_div -= 5; - frac_fb_div = frac_fb_div / 10; - frac_fb_div++; + fb_div_min *= 10; + fb_div_max *= 10; + } + + /* determine allowed ref divider range */ + if (pll->flags & RADEON_PLL_USE_REF_DIV) + ref_div_min = pll->reference_div; + else + ref_div_min = pll->min_ref_div; + ref_div_max = pll->max_ref_div; + + /* determine allowed post divider range */ + if (pll->flags & RADEON_PLL_USE_POST_DIV) { + post_div_min = pll->post_div; + post_div_max = pll->post_div; + } else { + unsigned target_clock = freq / 10; + unsigned vco_min, vco_max; + + if (pll->flags & RADEON_PLL_IS_LCD) { + vco_min = pll->lcd_pll_out_min; + vco_max = pll->lcd_pll_out_max; + } else { + vco_min = pll->pll_out_min; + vco_max = pll->pll_out_max; } - if (frac_fb_div >= 10) { - fb_div++; - frac_fb_div = 0; + + post_div_min = vco_min / target_clock; + if ((target_clock * post_div_min) < vco_min) + ++post_div_min; + if (post_div_min < pll->min_post_div) + post_div_min = pll->min_post_div; + + post_div_max = vco_max / target_clock; + if ((target_clock * post_div_max) > vco_max) + --post_div_max; + if (post_div_max > pll->max_post_div) + post_div_max = pll->max_post_div; + } + + /* represent the searched ratio as fractional number */ + nom = pll->flags & RADEON_PLL_USE_FRAC_FB_DIV ? freq : freq / 10; + den = pll->reference_freq; + + /* reduce the numbers to a simpler ratio */ + avivo_reduce_ratio(&nom, &den, fb_div_min, post_div_min); + + /* now search for a post divider */ + if (pll->flags & RADEON_PLL_PREFER_MINM_OVER_MAXP) + post_div_best = post_div_min; + else + post_div_best = post_div_max; + diff_best = ~0; + + for (post_div = post_div_min; post_div <= post_div_max; ++post_div) { + unsigned diff = abs(den - den / post_div * post_div); + if (diff < diff_best || (diff == diff_best && + !(pll->flags & RADEON_PLL_PREFER_MINM_OVER_MAXP))) { + + post_div_best = post_div; + diff_best = diff; } + } + post_div = post_div_best; + + /* get matching reference and feedback divider */ + ref_div = max(den / post_div, 1u); + fb_div = nom; + + /* we're almost done, but reference and feedback + divider might be to large now */ + + tmp = ref_div; + + if (fb_div > fb_div_max) { + ref_div = ref_div * fb_div_max / fb_div; + fb_div = fb_div_max; + } + + if (ref_div > ref_div_max) { + ref_div = ref_div_max; + fb_div = nom * ref_div_max / tmp; + } + + /* reduce the numbers to a simpler ratio once more */ + /* this also makes sure that the reference divider is large enough */ + avivo_reduce_ratio(&fb_div, &ref_div, fb_div_min, ref_div_min); + + /* and finally save the result */ + if (pll->flags & RADEON_PLL_USE_FRAC_FB_DIV) { + *fb_div_p = fb_div / 10; + *frac_fb_div_p = fb_div % 10; } else { - while (ref_div <= pll->max_ref_div) { - avivo_get_fb_div(pll, target_clock, post_div, ref_div, - &fb_div, &frac_fb_div); - if (frac_fb_div >= (pll->reference_freq / 2)) - fb_div++; - frac_fb_div = 0; - tmp = (pll->reference_freq * fb_div) / (post_div * ref_div); - tmp = (tmp * 10000) / target_clock; - - if (tmp > (10000 + MAX_TOLERANCE)) - ref_div++; - else if (tmp >= (10000 - MAX_TOLERANCE)) - break; - else - ref_div++; - } + *fb_div_p = fb_div; + *frac_fb_div_p = 0; } - *dot_clock_p = ((pll->reference_freq * fb_div * 10) + (pll->reference_freq * frac_fb_div)) / - (ref_div * post_div * 10); - *fb_div_p = fb_div; - *frac_fb_div_p = frac_fb_div; + *dot_clock_p = ((pll->reference_freq * *fb_div_p * 10) + + (pll->reference_freq * *frac_fb_div_p)) / + (ref_div * post_div * 10); *ref_div_p = ref_div; *post_div_p = post_div; - DRM_DEBUG_KMS("%d, pll dividers - fb: %d.%d ref: %d, post %d\n", - *dot_clock_p, fb_div, frac_fb_div, ref_div, post_div); + + DRM_DEBUG_KMS("%d - %d, pll dividers - fb: %d.%d ref: %d, post %d\n", + freq, *dot_clock_p, *fb_div_p, *frac_fb_div_p, + ref_div, post_div); } /* pre-avivo */ -- cgit From 16086279353cbfecbb3ead474072dced17b97ddc Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 31 Mar 2014 11:19:46 -0400 Subject: drm/radeon: call drm_edid_to_eld when we update the edid MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This needs to be done to update some of the fields in the connector structure used by the audio code. Noticed by several users on irc. Signed-off-by: Alex Deucher Signed-off-by: Christian König Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/radeon_display.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index 63d54ef758fc..386cfa4c194d 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -794,6 +794,7 @@ int radeon_ddc_get_modes(struct radeon_connector *radeon_connector) if (radeon_connector->edid) { drm_mode_connector_update_edid_property(&radeon_connector->base, radeon_connector->edid); ret = drm_add_edid_modes(&radeon_connector->base, radeon_connector->edid); + drm_edid_to_eld(&radeon_connector->base, radeon_connector->edid); return ret; } drm_mode_connector_update_edid_property(&radeon_connector->base, NULL); -- cgit From a8947f576728a66bd3aac629bd8ca021a010c808 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 2 Apr 2014 08:42:48 -0400 Subject: drm/radeon: fix endian swap on hawaii clear state buffer setup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Need to swap on BE. Signed-off-by: Alex Deucher Reviewed-by: Christian König Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/cik.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 62fefbbaf263..581f9e06d354 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -6542,8 +6542,8 @@ void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer) buffer[count++] = cpu_to_le32(0x00000000); break; case CHIP_HAWAII: - buffer[count++] = 0x3a00161a; - buffer[count++] = 0x0000002e; + buffer[count++] = cpu_to_le32(0x3a00161a); + buffer[count++] = cpu_to_le32(0x0000002e); break; default: buffer[count++] = cpu_to_le32(0x00000000); -- cgit From f1553174a207f68a4ec19d436003097e0a4dc405 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 2 Apr 2014 08:42:49 -0400 Subject: drm/radeon: fix typo in spectre_golden_registers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Alex Deucher Reviewed-by: Christian König Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/cik.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 581f9e06d354..745143c2358f 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -1096,7 +1096,7 @@ static const u32 spectre_golden_registers[] = 0x8a14, 0xf000003f, 0x00000007, 0x8b24, 0xffffffff, 0x00ffffff, 0x28350, 0x3f3f3fff, 0x00000082, - 0x28355, 0x0000003f, 0x00000000, + 0x28354, 0x0000003f, 0x00000000, 0x3e78, 0x00000001, 0x00000002, 0x913c, 0xffff03df, 0x00000004, 0xc768, 0x00000008, 0x00000008, -- cgit From deadcb36f49bee9b3010382ffe4fe4f5c439f1c5 Mon Sep 17 00:00:00 2001 From: Lauri Kasanen Date: Wed, 2 Apr 2014 20:33:42 +0300 Subject: drm/radeon: Use two-ended allocation by size, v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This decreases eviction by up to 20%, by improving the fragmentation quality. No harm in normal cases that fit VRAM fully (PTS gaming suite). In some cases, even the VRAM-fitting cases improved slightly (openarena, urban terror). 512kb was measured as the most optimal threshold for 3d workloads common to radeon. Other drivers may need different thresholds according to their workloads. v2: Nicer formatting Signed-off-by: Lauri Kasanen Reviewed-by: Christian König Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_object.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/radeon') diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 1375ff85b08a..19bec0dbfa38 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -104,7 +104,7 @@ bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo) void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain) { - u32 c = 0; + u32 c = 0, i; rbo->placement.fpfn = 0; rbo->placement.lpfn = 0; @@ -131,6 +131,17 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain) rbo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM; rbo->placement.num_placement = c; rbo->placement.num_busy_placement = c; + + /* + * Use two-ended allocation depending on the buffer size to + * improve fragmentation quality. + * 512kb was measured as the most optimal number. + */ + if (rbo->tbo.mem.size > 512 * 1024) { + for (i = 0; i < c; i++) { + rbo->placements[i] |= TTM_PL_FLAG_TOPDOWN; + } + } } int radeon_bo_create(struct radeon_device *rdev, -- cgit