summaryrefslogtreecommitdiff
path: root/drivers/gpu
diff options
context:
space:
mode:
authorJani Nikula <jani.nikula@intel.com>2019-12-11 10:35:37 +0200
committerJani Nikula <jani.nikula@intel.com>2019-12-11 11:13:50 +0200
commit023265ed75d8792ca1d555430a8985511d3f8788 (patch)
tree0940ba6f005f102062e5842b4ec8073ecd583ac0 /drivers/gpu
parent2b68392e638dfa5cf4f7b558f62e3ea4def2e605 (diff)
parente42617b825f8073569da76dc4510bfa019b1c35a (diff)
Merge drm/drm-next into drm-intel-next-queued
Sync up with v5.5-rc1 to get the updated lock_release() API among other things. Fix the conflict reported by Stephen Rothwell [1]. [1] http://lore.kernel.org/r/20191210093957.5120f717@canb.auug.org.au Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/drm/Kconfig1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c24
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_display.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c444
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h53
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.h13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c67
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c145
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik.c102
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c178
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c40
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c50
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c19
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c60
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c57
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nv.c24
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si.c97
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_ih.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vi.c7
-rw-r--r--drivers/gpu/drm/amd/amdkfd/Kconfig2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c2
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c2
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c10
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c19
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c74
-rw-r--r--drivers/gpu/drm/amd/powerplay/amdgpu_smu.c64
-rw-r--r--drivers/gpu/drm/amd/powerplay/arcturus_ppt.c24
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c9
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c30
-rw-r--r--drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h9
-rw-r--r--drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h17
-rw-r--r--drivers/gpu/drm/amd/powerplay/inc/smu_v11_0_pptable.h2
-rw-r--r--drivers/gpu/drm/amd/powerplay/inc/smu_v12_0.h5
-rw-r--r--drivers/gpu/drm/amd/powerplay/navi10_ppt.c370
-rw-r--r--drivers/gpu/drm/amd/powerplay/navi10_ppt.h5
-rw-r--r--drivers/gpu/drm/amd/powerplay/renoir_ppt.c1
-rw-r--r--drivers/gpu/drm/amd/powerplay/smu_internal.h4
-rw-r--r--drivers/gpu/drm/amd/powerplay/smu_v11_0.c96
-rw-r--r--drivers/gpu/drm/amd/powerplay/smu_v12_0.c28
-rw-r--r--drivers/gpu/drm/amd/powerplay/vega20_ppt.c2
-rw-r--r--drivers/gpu/drm/bridge/synopsys/dw-hdmi-i2s-audio.c11
-rw-r--r--drivers/gpu/drm/bridge/synopsys/dw-hdmi.c41
-rw-r--r--drivers/gpu/drm/bridge/ti-tfp410.c4
-rw-r--r--drivers/gpu/drm/drm_connector.c2
-rw-r--r--drivers/gpu/drm/drm_dp_mst_topology.c6
-rw-r--r--drivers/gpu/drm/drm_gem.c24
-rw-r--r--drivers/gpu/drm/drm_gem_ttm_helper.c13
-rw-r--r--drivers/gpu/drm/drm_property.c2
-rw-r--r--drivers/gpu/drm/i915/Kconfig.debug2
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_context.c2
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_shrinker.c4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pm.c2
-rw-r--r--drivers/gpu/drm/i915/gvt/cmd_parser.c6
-rw-r--r--drivers/gpu/drm/i915/gvt/dmabuf.c4
-rw-r--r--drivers/gpu/drm/i915/gvt/handlers.c9
-rw-r--r--drivers/gpu/drm/i915/i915_request.c2
-rw-r--r--drivers/gpu/drm/mgag200/mgag200_drv.c36
-rw-r--r--drivers/gpu/drm/mgag200/mgag200_drv.h18
-rw-r--r--drivers/gpu/drm/mgag200/mgag200_main.c3
-rw-r--r--drivers/gpu/drm/msm/Kconfig1
-rw-r--r--drivers/gpu/drm/msm/adreno/a3xx_gpu.c28
-rw-r--r--drivers/gpu/drm/msm/adreno/a3xx_gpu.h3
-rw-r--r--drivers/gpu/drm/msm/adreno/a4xx_gpu.c25
-rw-r--r--drivers/gpu/drm/msm/adreno/a4xx_gpu.h3
-rw-r--r--drivers/gpu/drm/msm/adreno/a5xx_gpu.c79
-rw-r--r--drivers/gpu/drm/msm/adreno/a5xx_power.c7
-rw-r--r--drivers/gpu/drm/msm/adreno/adreno_device.c15
-rw-r--r--drivers/gpu/drm/msm/adreno/adreno_gpu.c40
-rw-r--r--drivers/gpu/drm/msm/adreno/adreno_gpu.h15
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_core_irq.c43
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c21
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c20
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c39
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c15
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c7
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c60
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h4
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_vbif.c6
-rw-r--r--drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c10
-rw-r--r--drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c114
-rw-r--r--drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c3
-rw-r--r--drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c23
-rw-r--r--drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.h2
-rw-r--r--drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.c2
-rw-r--r--drivers/gpu/drm/msm/dsi/dsi_cfg.c28
-rw-r--r--drivers/gpu/drm/msm/dsi/dsi_cfg.h1
-rw-r--r--drivers/gpu/drm/msm/dsi/dsi_host.c3
-rw-r--r--drivers/gpu/drm/msm/dsi/phy/dsi_phy.c8
-rw-r--r--drivers/gpu/drm/msm/dsi/phy/dsi_phy.h1
-rw-r--r--drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c60
-rw-r--r--drivers/gpu/drm/msm/hdmi/hdmi_phy.c8
-rw-r--r--drivers/gpu/drm/msm/msm_gpu.c6
-rw-r--r--drivers/gpu/drm/msm/msm_gpummu.c6
-rw-r--r--drivers/gpu/drm/msm/msm_iommu.c6
-rw-r--r--drivers/gpu/drm/msm/msm_mmu.h4
-rw-r--r--drivers/gpu/drm/msm/msm_rd.c16
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_svm.c230
-rw-r--r--drivers/gpu/drm/omapdrm/omap_gem.c4
-rw-r--r--drivers/gpu/drm/radeon/cik.c98
-rw-r--r--drivers/gpu/drm/radeon/r100.c4
-rw-r--r--drivers/gpu/drm/radeon/r200.c4
-rw-r--r--drivers/gpu/drm/radeon/r600.c4
-rw-r--r--drivers/gpu/drm/radeon/radeon.h9
-rw-r--r--drivers/gpu/drm/radeon/radeon_drv.c9
-rw-r--r--drivers/gpu/drm/radeon/radeon_mn.c218
-rw-r--r--drivers/gpu/drm/radeon/si.c101
-rw-r--r--drivers/gpu/drm/sun4i/sun4i_tcon.c2
-rw-r--r--drivers/gpu/drm/tegra/dc.c18
-rw-r--r--drivers/gpu/drm/tegra/drm.c7
-rw-r--r--drivers/gpu/drm/tegra/gem.c50
-rw-r--r--drivers/gpu/drm/tegra/hub.c3
-rw-r--r--drivers/gpu/drm/tegra/plane.c11
-rw-r--r--drivers/gpu/drm/tegra/sor.c38
-rw-r--r--drivers/gpu/drm/tegra/vic.c7
-rw-r--r--drivers/gpu/drm/ttm/ttm_bo_vm.c174
-rw-r--r--drivers/gpu/drm/vmwgfx/Kconfig1
-rw-r--r--drivers/gpu/drm/vmwgfx/Makefile2
-rw-r--r--drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h233
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_bo.c10
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_drv.h44
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c1
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c488
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_resource.c193
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_resource_priv.h13
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_surface.c395
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c15
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_validation.c74
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_validation.h16
152 files changed, 3893 insertions, 1767 deletions
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 76db8bc0dd1f..c87262997dcb 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -99,6 +99,7 @@ config DRM_KMS_FB_HELPER
config DRM_DEBUG_DP_MST_TOPOLOGY_REFS
bool "Enable refcount backtrace history in the DP MST helpers"
+ depends on STACKTRACE_SUPPORT
select STACKDEPOT
depends on DRM_KMS_HELPER
depends on DEBUG_KERNEL
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index bcc5d40a8d5f..0c229a92a24b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -967,6 +967,8 @@ struct amdgpu_device {
struct mutex lock_reset;
struct amdgpu_doorbell_index doorbell_index;
+ struct mutex notifier_lock;
+
int asic_reset_res;
struct work_struct xgmi_reset_work;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index ae6f5446262c..888209eb8cec 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -105,11 +105,24 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
(kfd_mem_limit.max_ttm_mem_limit >> 20));
}
+/* Estimate page table size needed to represent a given memory size
+ *
+ * With 4KB pages, we need one 8 byte PTE for each 4KB of memory
+ * (factor 512, >> 9). With 2MB pages, we need one 8 byte PTE for 2MB
+ * of memory (factor 256K, >> 18). ROCm user mode tries to optimize
+ * for 2MB pages for TLB efficiency. However, small allocations and
+ * fragmented system memory still need some 4KB pages. We choose a
+ * compromise that should work in most cases without reserving too
+ * much memory for page tables unnecessarily (factor 16K, >> 14).
+ */
+#define ESTIMATE_PT_SIZE(mem_size) ((mem_size) >> 14)
+
static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
uint64_t size, u32 domain, bool sg)
{
+ uint64_t reserved_for_pt =
+ ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
size_t acc_size, system_mem_needed, ttm_mem_needed, vram_needed;
- uint64_t reserved_for_pt = amdgpu_amdkfd_total_mem_size >> 9;
int ret = 0;
acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size,
@@ -505,8 +518,7 @@ static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem,
*
* Returns 0 for success, negative errno for errors.
*/
-static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
- uint64_t user_addr)
+static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr)
{
struct amdkfd_process_info *process_info = mem->process_info;
struct amdgpu_bo *bo = mem->bo;
@@ -1199,7 +1211,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr);
if (user_addr) {
- ret = init_user_pages(*mem, current->mm, user_addr);
+ ret = init_user_pages(*mem, user_addr);
if (ret)
goto allocate_init_user_pages_failed;
}
@@ -1744,6 +1756,10 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
return ret;
}
+ /*
+ * FIXME: Cannot ignore the return code, must hold
+ * notifier_lock
+ */
amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
/* Mark the BO as valid unless it was invalidated
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index a169ff16277f..5ca905b4a0fb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -538,8 +538,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
e->tv.num_shared = 2;
amdgpu_bo_list_get_list(p->bo_list, &p->validated);
- if (p->bo_list->first_userptr != p->bo_list->num_entries)
- p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX);
INIT_LIST_HEAD(&duplicates);
amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd);
@@ -1219,11 +1217,11 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
if (r)
goto error_unlock;
- /* No memory allocation is allowed while holding the mn lock.
- * p->mn is hold until amdgpu_cs_submit is finished and fence is added
- * to BOs.
+ /* No memory allocation is allowed while holding the notifier lock.
+ * The lock is held until amdgpu_cs_submit is finished and fence is
+ * added to BOs.
*/
- amdgpu_mn_lock(p->mn);
+ mutex_lock(&p->adev->notifier_lock);
/* If userptr are invalidated after amdgpu_cs_parser_bos(), return
* -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl.
@@ -1266,13 +1264,13 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
- amdgpu_mn_unlock(p->mn);
+ mutex_unlock(&p->adev->notifier_lock);
return 0;
error_abort:
drm_sched_job_cleanup(&job->base);
- amdgpu_mn_unlock(p->mn);
+ mutex_unlock(&p->adev->notifier_lock);
error_unlock:
amdgpu_job_free(job);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 58f6b3b92831..c17505fba988 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2794,6 +2794,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
mutex_init(&adev->virt.vf_errors.lock);
hash_init(adev->mn_hash);
mutex_init(&adev->lock_reset);
+ mutex_init(&adev->notifier_lock);
mutex_init(&adev->virt.dpm_mutex);
mutex_init(&adev->psp.mutex);
@@ -3109,7 +3110,9 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
int r;
DRM_INFO("amdgpu: finishing device.\n");
+ flush_delayed_work(&adev->delayed_init_work);
adev->shutdown = true;
+
/* disable all interrupts */
amdgpu_irq_disable_all(adev);
if (adev->mode_info.mode_config_initialized){
@@ -3127,7 +3130,6 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
adev->firmware.gpu_info_fw = NULL;
}
adev->accel_working = false;
- cancel_delayed_work_sync(&adev->delayed_init_work);
/* free i2c buses */
if (!amdgpu_device_has_dc_support(adev))
amdgpu_i2c_fini(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index d2dd59a95e8a..3cadb0b76f22 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -514,7 +514,7 @@ uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev,
* Also, don't allow GTT domain if the BO doens't have USWC falg set.
*/
if (adev->asic_type >= CHIP_CARRIZO &&
- adev->asic_type <= CHIP_RAVEN &&
+ adev->asic_type < CHIP_RAVEN &&
(adev->flags & AMD_IS_APU) &&
(bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) &&
amdgpu_bo_support_uswc(bo_flags) &&
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 557be89421a8..0ffc9447b573 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -998,10 +998,10 @@ static const struct pci_device_id pciidlist[] = {
{0x1002, 0x731B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},
{0x1002, 0x731F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},
/* Navi14 */
- {0x1002, 0x7340, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14|AMD_EXP_HW_SUPPORT},
- {0x1002, 0x7341, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14|AMD_EXP_HW_SUPPORT},
- {0x1002, 0x7347, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14|AMD_EXP_HW_SUPPORT},
- {0x1002, 0x734F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14|AMD_EXP_HW_SUPPORT},
+ {0x1002, 0x7340, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14},
+ {0x1002, 0x7341, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14},
+ {0x1002, 0x7347, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14},
+ {0x1002, 0x734F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14},
/* Renoir */
{0x1002, 0x1636, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU|AMD_EXP_HW_SUPPORT},
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index c9d1fada6188..e00b46180d2e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -454,8 +454,6 @@ void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev)
}
ring = &adev->gfx.kiq.ring;
- if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring)
- kfree(adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS]);
kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]);
amdgpu_bo_free_kernel(&ring->mqd_obj,
&ring->mqd_gpu_addr,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index a74ecd449775..0ae0a2715b0d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -225,7 +225,7 @@ struct amdgpu_me {
uint32_t num_me;
uint32_t num_pipe_per_me;
uint32_t num_queue_per_pipe;
- void *mqd_backup[AMDGPU_MAX_GFX_RINGS + 1];
+ void *mqd_backup[AMDGPU_MAX_GFX_RINGS];
/* These are the resources for which amdgpu takes ownership */
DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index 406736a1bd3d..b499a3de8bb6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -77,6 +77,7 @@ struct amdgpu_gmc_fault {
struct amdgpu_vmhub {
uint32_t ctx0_ptb_addr_lo32;
uint32_t ctx0_ptb_addr_hi32;
+ uint32_t vm_inv_eng0_sem;
uint32_t vm_inv_eng0_req;
uint32_t vm_inv_eng0_ack;
uint32_t vm_context0_cntl;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 82c46c4faaad..b6db28a570c2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -655,15 +655,19 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
return -ENOMEM;
alloc_size = info->read_mmr_reg.count * sizeof(*regs);
- for (i = 0; i < info->read_mmr_reg.count; i++)
+ amdgpu_gfx_off_ctrl(adev, false);
+ for (i = 0; i < info->read_mmr_reg.count; i++) {
if (amdgpu_asic_read_register(adev, se_num, sh_num,
info->read_mmr_reg.dword_offset + i,
&regs[i])) {
DRM_DEBUG_KMS("unallowed offset %#x\n",
info->read_mmr_reg.dword_offset + i);
kfree(regs);
+ amdgpu_gfx_off_ctrl(adev, true);
return -EFAULT;
}
+ }
+ amdgpu_gfx_off_ctrl(adev, true);
n = copy_to_user(out, regs, min(size, alloc_size));
kfree(regs);
return n ? -EFAULT : 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index 392300f77b13..828b5167ff12 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -51,439 +51,107 @@
#include "amdgpu_amdkfd.h"
/**
- * struct amdgpu_mn_node
+ * amdgpu_mn_invalidate_gfx - callback to notify about mm change
*
- * @it: interval node defining start-last of the affected address range
- * @bos: list of all BOs in the affected address range
- *
- * Manages all BOs which are affected of a certain range of address space.
- */
-struct amdgpu_mn_node {
- struct interval_tree_node it;
- struct list_head bos;
-};
-
-/**
- * amdgpu_mn_destroy - destroy the HMM mirror
- *
- * @work: previously sheduled work item
- *
- * Lazy destroys the notifier from a work item
- */
-static void amdgpu_mn_destroy(struct work_struct *work)
-{
- struct amdgpu_mn *amn = container_of(work, struct amdgpu_mn, work);
- struct amdgpu_device *adev = amn->adev;
- struct amdgpu_mn_node *node, *next_node;
- struct amdgpu_bo *bo, *next_bo;
-
- mutex_lock(&adev->mn_lock);
- down_write(&amn->lock);
- hash_del(&amn->node);
- rbtree_postorder_for_each_entry_safe(node, next_node,
- &amn->objects.rb_root, it.rb) {
- list_for_each_entry_safe(bo, next_bo, &node->bos, mn_list) {
- bo->mn = NULL;
- list_del_init(&bo->mn_list);
- }
- kfree(node);
- }
- up_write(&amn->lock);
- mutex_unlock(&adev->mn_lock);
-
- hmm_mirror_unregister(&amn->mirror);
- kfree(amn);
-}
-
-/**
- * amdgpu_hmm_mirror_release - callback to notify about mm destruction
- *
- * @mirror: the HMM mirror (mm) this callback is about
- *
- * Shedule a work item to lazy destroy HMM mirror.
- */
-static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror)
-{
- struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
-
- INIT_WORK(&amn->work, amdgpu_mn_destroy);
- schedule_work(&amn->work);
-}
-
-/**
- * amdgpu_mn_lock - take the write side lock for this notifier
- *
- * @mn: our notifier
- */
-void amdgpu_mn_lock(struct amdgpu_mn *mn)
-{
- if (mn)
- down_write(&mn->lock);
-}
-
-/**
- * amdgpu_mn_unlock - drop the write side lock for this notifier
- *
- * @mn: our notifier
- */
-void amdgpu_mn_unlock(struct amdgpu_mn *mn)
-{
- if (mn)
- up_write(&mn->lock);
-}
-
-/**
- * amdgpu_mn_read_lock - take the read side lock for this notifier
- *
- * @amn: our notifier
- * @blockable: is the notifier blockable
- */
-static int amdgpu_mn_read_lock(struct amdgpu_mn *amn, bool blockable)
-{
- if (blockable)
- down_read(&amn->lock);
- else if (!down_read_trylock(&amn->lock))
- return -EAGAIN;
-
- return 0;
-}
-
-/**
- * amdgpu_mn_read_unlock - drop the read side lock for this notifier
- *
- * @amn: our notifier
- */
-static void amdgpu_mn_read_unlock(struct amdgpu_mn *amn)
-{
- up_read(&amn->lock);
-}
-
-/**
- * amdgpu_mn_invalidate_node - unmap all BOs of a node
- *
- * @node: the node with the BOs to unmap
- * @start: start of address range affected
- * @end: end of address range affected
+ * @mni: the range (mm) is about to update
+ * @range: details on the invalidation
+ * @cur_seq: Value to pass to mmu_interval_set_seq()
*
* Block for operations on BOs to finish and mark pages as accessed and
* potentially dirty.
*/
-static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
- unsigned long start,
- unsigned long end)
+static bool amdgpu_mn_invalidate_gfx(struct mmu_interval_notifier *mni,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq)
{
- struct amdgpu_bo *bo;
+ struct amdgpu_bo *bo = container_of(mni, struct amdgpu_bo, notifier);
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
long r;
- list_for_each_entry(bo, &node->bos, mn_list) {
-
- if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, start, end))
- continue;
-
- r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv,
- true, false, MAX_SCHEDULE_TIMEOUT);
- if (r <= 0)
- DRM_ERROR("(%ld) failed to wait for user bo\n", r);
- }
-}
-
-/**
- * amdgpu_mn_sync_pagetables_gfx - callback to notify about mm change
- *
- * @mirror: the hmm_mirror (mm) is about to update
- * @update: the update start, end address
- *
- * Block for operations on BOs to finish and mark pages as accessed and
- * potentially dirty.
- */
-static int
-amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror,
- const struct mmu_notifier_range *update)
-{
- struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
- unsigned long start = update->start;
- unsigned long end = update->end;
- bool blockable = mmu_notifier_range_blockable(update);
- struct interval_tree_node *it;
+ if (!mmu_notifier_range_blockable(range))
+ return false;
- /* notification is exclusive, but interval is inclusive */
- end -= 1;
+ mutex_lock(&adev->notifier_lock);
- /* TODO we should be able to split locking for interval tree and
- * amdgpu_mn_invalidate_node
- */
- if (amdgpu_mn_read_lock(amn, blockable))
- return -EAGAIN;
+ mmu_interval_set_seq(mni, cur_seq);
- it = interval_tree_iter_first(&amn->objects, start, end);
- while (it) {
- struct amdgpu_mn_node *node;
-
- if (!blockable) {
- amdgpu_mn_read_unlock(amn);
- return -EAGAIN;
- }
-
- node = container_of(it, struct amdgpu_mn_node, it);
- it = interval_tree_iter_next(it, start, end);
-
- amdgpu_mn_invalidate_node(node, start, end);
- }
-
- amdgpu_mn_read_unlock(amn);
-
- return 0;
-}
-
-/**
- * amdgpu_mn_sync_pagetables_hsa - callback to notify about mm change
- *
- * @mirror: the hmm_mirror (mm) is about to update
- * @update: the update start, end address
- *
- * We temporarily evict all BOs between start and end. This
- * necessitates evicting all user-mode queues of the process. The BOs
- * are restorted in amdgpu_mn_invalidate_range_end_hsa.
- */
-static int
-amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror,
- const struct mmu_notifier_range *update)
-{
- struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
- unsigned long start = update->start;
- unsigned long end = update->end;
- bool blockable = mmu_notifier_range_blockable(update);
- struct interval_tree_node *it;
-
- /* notification is exclusive, but interval is inclusive */
- end -= 1;
-
- if (amdgpu_mn_read_lock(amn, blockable))
- return -EAGAIN;
-
- it = interval_tree_iter_first(&amn->objects, start, end);
- while (it) {
- struct amdgpu_mn_node *node;
- struct amdgpu_bo *bo;
-
- if (!blockable) {
- amdgpu_mn_read_unlock(amn);
- return -EAGAIN;
- }
-
- node = container_of(it, struct amdgpu_mn_node, it);
- it = interval_tree_iter_next(it, start, end);
-
- list_for_each_entry(bo, &node->bos, mn_list) {
- struct kgd_mem *mem = bo->kfd_bo;
-
- if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
- start, end))
- amdgpu_amdkfd_evict_userptr(mem, amn->mm);
- }
- }
-
- amdgpu_mn_read_unlock(amn);
-
- return 0;
+ r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv, true, false,
+ MAX_SCHEDULE_TIMEOUT);
+ mutex_unlock(&adev->notifier_lock);
+ if (r <= 0)
+ DRM_ERROR("(%ld) failed to wait for user bo\n", r);
+ return true;
}
-/* Low bits of any reasonable mm pointer will be unused due to struct
- * alignment. Use these bits to make a unique key from the mm pointer
- * and notifier type.
- */
-#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
-
-static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = {
- [AMDGPU_MN_TYPE_GFX] = {
- .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_gfx,
- .release = amdgpu_hmm_mirror_release
- },
- [AMDGPU_MN_TYPE_HSA] = {
- .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_hsa,
- .release = amdgpu_hmm_mirror_release
- },
+static const struct mmu_interval_notifier_ops amdgpu_mn_gfx_ops = {
+ .invalidate = amdgpu_mn_invalidate_gfx,
};
/**
- * amdgpu_mn_get - create HMM mirror context
+ * amdgpu_mn_invalidate_hsa - callback to notify about mm change
*
- * @adev: amdgpu device pointer
- * @type: type of MMU notifier context
+ * @mni: the range (mm) is about to update
+ * @range: details on the invalidation
+ * @cur_seq: Value to pass to mmu_interval_set_seq()
*
- * Creates a HMM mirror context for current->mm.
+ * We temporarily evict the BO attached to this range. This necessitates
+ * evicting all user-mode queues of the process.
*/
-struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
- enum amdgpu_mn_type type)
+static bool amdgpu_mn_invalidate_hsa(struct mmu_interval_notifier *mni,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq)
{
- struct mm_struct *mm = current->mm;
- struct amdgpu_mn *amn;
- unsigned long key = AMDGPU_MN_KEY(mm, type);
- int r;
-
- mutex_lock(&adev->mn_lock);
- if (down_write_killable(&mm->mmap_sem)) {
- mutex_unlock(&adev->mn_lock);
- return ERR_PTR(-EINTR);
- }
-
- hash_for_each_possible(adev->mn_hash, amn, node, key)
- if (AMDGPU_MN_KEY(amn->mm, amn->type) == key)
- goto release_locks;
-
- amn = kzalloc(sizeof(*amn), GFP_KERNEL);
- if (!amn) {
- amn = ERR_PTR(-ENOMEM);
- goto release_locks;
- }
-
- amn->adev = adev;
- amn->mm = mm;
- init_rwsem(&amn->lock);
- amn->type = type;
- amn->objects = RB_ROOT_CACHED;
-
- amn->mirror.ops = &amdgpu_hmm_mirror_ops[type];
- r = hmm_mirror_register(&amn->mirror, mm);
- if (r)
- goto free_amn;
+ struct amdgpu_bo *bo = container_of(mni, struct amdgpu_bo, notifier);
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- hash_add(adev->mn_hash, &amn->node, AMDGPU_MN_KEY(mm, type));
+ if (!mmu_notifier_range_blockable(range))
+ return false;
-release_locks:
- up_write(&mm->mmap_sem);
- mutex_unlock(&adev->mn_lock);
+ mutex_lock(&adev->notifier_lock);
- return amn;
+ mmu_interval_set_seq(mni, cur_seq);
-free_amn:
- up_write(&mm->mmap_sem);
- mutex_unlock(&adev->mn_lock);
- kfree(amn);
+ amdgpu_amdkfd_evict_userptr(bo->kfd_bo, bo->notifier.mm);
+ mutex_unlock(&adev->notifier_lock);
- return ERR_PTR(r);
+ return true;
}
+static const struct mmu_interval_notifier_ops amdgpu_mn_hsa_ops = {
+ .invalidate = amdgpu_mn_invalidate_hsa,
+};
+
/**
* amdgpu_mn_register - register a BO for notifier updates
*
* @bo: amdgpu buffer object
* @addr: userptr addr we should monitor
*
- * Registers an HMM mirror for the given BO at the specified address.
+ * Registers a mmu_notifier for the given BO at the specified address.
* Returns 0 on success, -ERRNO if anything goes wrong.
*/
int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
{
- unsigned long end = addr + amdgpu_bo_size(bo) - 1;
- struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- enum amdgpu_mn_type type =
- bo->kfd_bo ? AMDGPU_MN_TYPE_HSA : AMDGPU_MN_TYPE_GFX;
- struct amdgpu_mn *amn;
- struct amdgpu_mn_node *node = NULL, *new_node;
- struct list_head bos;
- struct interval_tree_node *it;
-
- amn = amdgpu_mn_get(adev, type);
- if (IS_ERR(amn))
- return PTR_ERR(amn);
-
- new_node = kmalloc(sizeof(*new_node), GFP_KERNEL);
- if (!new_node)
- return -ENOMEM;
-
- INIT_LIST_HEAD(&bos);
-
- down_write(&amn->lock);
-
- while ((it = interval_tree_iter_first(&amn->objects, addr, end))) {
- kfree(node);
- node = container_of(it, struct amdgpu_mn_node, it);
- interval_tree_remove(&node->it, &amn->objects);
- addr = min(it->start, addr);
- end = max(it->last, end);
- list_splice(&node->bos, &bos);
- }
-
- if (!node)
- node = new_node;
- else
- kfree(new_node);
-
- bo->mn = amn;
-
- node->it.start = addr;
- node->it.last = end;
- INIT_LIST_HEAD(&node->bos);
- list_splice(&bos, &node->bos);
- list_add(&bo->mn_list, &node->bos);
-
- interval_tree_insert(&node->it, &amn->objects);
-
- up_write(&amn->lock);
-
- return 0;
+ if (bo->kfd_bo)
+ return mmu_interval_notifier_insert(&bo->notifier, current->mm,
+ addr, amdgpu_bo_size(bo),
+ &amdgpu_mn_hsa_ops);
+ return mmu_interval_notifier_insert(&bo->notifier, current->mm, addr,
+ amdgpu_bo_size(bo),
+ &amdgpu_mn_gfx_ops);
}
/**
- * amdgpu_mn_unregister - unregister a BO for HMM mirror updates
+ * amdgpu_mn_unregister - unregister a BO for notifier updates
*
* @bo: amdgpu buffer object
*
- * Remove any registration of HMM mirror updates from the buffer object.
+ * Remove any registration of mmu notifier updates from the buffer object.
*/
void amdgpu_mn_unregister(struct amdgpu_bo *bo)
{
- struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- struct amdgpu_mn *amn;
- struct list_head *head;
-
- mutex_lock(&adev->mn_lock);
-
- amn = bo->mn;
- if (amn == NULL) {
- mutex_unlock(&adev->mn_lock);
+ if (!bo->notifier.mm)
return;
- }
-
- down_write(&amn->lock);
-
- /* save the next list entry for later */
- head = bo->mn_list.next;
-
- bo->mn = NULL;
- list_del_init(&bo->mn_list);
-
- if (list_empty(head)) {
- struct amdgpu_mn_node *node;
-
- node = container_of(head, struct amdgpu_mn_node, bos);
- interval_tree_remove(&node->it, &amn->objects);
- kfree(node);
- }
-
- up_write(&amn->lock);
- mutex_unlock(&adev->mn_lock);
-}
-
-/* flags used by HMM internal, not related to CPU/GPU PTE flags */
-static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = {
- (1 << 0), /* HMM_PFN_VALID */
- (1 << 1), /* HMM_PFN_WRITE */
- 0 /* HMM_PFN_DEVICE_PRIVATE */
-};
-
-static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = {
- 0xfffffffffffffffeUL, /* HMM_PFN_ERROR */
- 0, /* HMM_PFN_NONE */
- 0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */
-};
-
-void amdgpu_hmm_init_range(struct hmm_range *range)
-{
- if (range) {
- range->flags = hmm_range_flags;
- range->values = hmm_range_values;
- range->pfn_shift = PAGE_SHIFT;
- }
+ mmu_interval_notifier_remove(&bo->notifier);
+ bo->notifier.mm = NULL;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
index b8ed68943625..a292238f75eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
@@ -30,63 +30,10 @@
#include <linux/workqueue.h>
#include <linux/interval_tree.h>
-enum amdgpu_mn_type {
- AMDGPU_MN_TYPE_GFX,
- AMDGPU_MN_TYPE_HSA,
-};
-
-/**
- * struct amdgpu_mn
- *
- * @adev: amdgpu device pointer
- * @mm: process address space
- * @type: type of MMU notifier
- * @work: destruction work item
- * @node: hash table node to find structure by adev and mn
- * @lock: rw semaphore protecting the notifier nodes
- * @objects: interval tree containing amdgpu_mn_nodes
- * @mirror: HMM mirror function support
- *
- * Data for each amdgpu device and process address space.
- */
-struct amdgpu_mn {
- /* constant after initialisation */
- struct amdgpu_device *adev;
- struct mm_struct *mm;
- enum amdgpu_mn_type type;
-
- /* only used on destruction */
- struct work_struct work;
-
- /* protected by adev->mn_lock */
- struct hlist_node node;
-
- /* objects protected by lock */
- struct rw_semaphore lock;
- struct rb_root_cached objects;
-
-#ifdef CONFIG_HMM_MIRROR
- /* HMM mirror */
- struct hmm_mirror mirror;
-#endif
-};
-
#if defined(CONFIG_HMM_MIRROR)
-void amdgpu_mn_lock(struct amdgpu_mn *mn);
-void amdgpu_mn_unlock(struct amdgpu_mn *mn);
-struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
- enum amdgpu_mn_type type);
int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);
void amdgpu_mn_unregister(struct amdgpu_bo *bo);
-void amdgpu_hmm_init_range(struct hmm_range *range);
#else
-static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {}
-static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {}
-static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
- enum amdgpu_mn_type type)
-{
- return NULL;
-}
static inline int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
{
DRM_WARN_ONCE("HMM_MIRROR kernel config option is not enabled, "
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 7e99f6c58c48..36dec51d1ef1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -30,6 +30,9 @@
#include <drm/amdgpu_drm.h>
#include "amdgpu.h"
+#ifdef CONFIG_MMU_NOTIFIER
+#include <linux/mmu_notifier.h>
+#endif
#define AMDGPU_BO_INVALID_OFFSET LONG_MAX
#define AMDGPU_BO_MAX_PLACEMENTS 3
@@ -101,10 +104,12 @@ struct amdgpu_bo {
struct ttm_bo_kmap_obj dma_buf_vmap;
struct amdgpu_mn *mn;
- union {
- struct list_head mn_list;
- struct list_head shadow_list;
- };
+
+#ifdef CONFIG_MMU_NOTIFIER
+ struct mmu_interval_notifier notifier;
+#endif
+
+ struct list_head shadow_list;
struct kgd_mem *kfd_bo;
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index bbe9ac7e843f..44be3a45b25e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -567,7 +567,9 @@ static int psp_xgmi_initialize(struct psp_context *psp)
struct ta_xgmi_shared_memory *xgmi_cmd;
int ret;
- if (!psp->adev->psp.ta_fw)
+ if (!psp->adev->psp.ta_fw ||
+ !psp->adev->psp.ta_xgmi_ucode_size ||
+ !psp->adev->psp.ta_xgmi_start_addr)
return -ENOENT;
if (!psp->xgmi_context.initialized) {
@@ -756,6 +758,12 @@ static int psp_ras_terminate(struct psp_context *psp)
{
int ret;
+ /*
+ * TODO: bypass the terminate in sriov for now
+ */
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
if (!psp->ras.ras_initialized)
return 0;
@@ -777,6 +785,18 @@ static int psp_ras_initialize(struct psp_context *psp)
{
int ret;
+ /*
+ * TODO: bypass the initialize in sriov for now
+ */
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
+ if (!psp->adev->psp.ta_ras_ucode_size ||
+ !psp->adev->psp.ta_ras_start_addr) {
+ dev_warn(psp->adev->dev, "RAS: ras ta ucode is not available\n");
+ return 0;
+ }
+
if (!psp->ras.ras_initialized) {
ret = psp_ras_init_shared_buf(psp);
if (ret)
@@ -866,6 +886,18 @@ static int psp_hdcp_initialize(struct psp_context *psp)
{
int ret;
+ /*
+ * TODO: bypass the initialize in sriov for now
+ */
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
+ if (!psp->adev->psp.ta_hdcp_ucode_size ||
+ !psp->adev->psp.ta_hdcp_start_addr) {
+ dev_warn(psp->adev->dev, "HDCP: hdcp ta ucode is not available\n");
+ return 0;
+ }
+
if (!psp->hdcp_context.hdcp_initialized) {
ret = psp_hdcp_init_shared_buf(psp);
if (ret)
@@ -948,6 +980,12 @@ static int psp_hdcp_terminate(struct psp_context *psp)
{
int ret;
+ /*
+ * TODO: bypass the terminate in sriov for now
+ */
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
if (!psp->hdcp_context.hdcp_initialized)
return 0;
@@ -1039,6 +1077,18 @@ static int psp_dtm_initialize(struct psp_context *psp)
{
int ret;
+ /*
+ * TODO: bypass the initialize in sriov for now
+ */
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
+ if (!psp->adev->psp.ta_dtm_ucode_size ||
+ !psp->adev->psp.ta_dtm_start_addr) {
+ dev_warn(psp->adev->dev, "DTM: dtm ta ucode is not available\n");
+ return 0;
+ }
+
if (!psp->dtm_context.dtm_initialized) {
ret = psp_dtm_init_shared_buf(psp);
if (ret)
@@ -1091,6 +1141,12 @@ static int psp_dtm_terminate(struct psp_context *psp)
{
int ret;
+ /*
+ * TODO: bypass the terminate in sriov for now
+ */
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
if (!psp->dtm_context.dtm_initialized)
return 0;
@@ -1411,7 +1467,10 @@ out:
|| ucode->ucode_id == AMDGPU_UCODE_ID_SDMA5
|| ucode->ucode_id == AMDGPU_UCODE_ID_SDMA6
|| ucode->ucode_id == AMDGPU_UCODE_ID_SDMA7
- || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_G))
+ || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_G
+ || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL
+ || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM
+ || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM))
/*skip ucode loading in SRIOV VF */
continue;
@@ -1428,8 +1487,8 @@ out:
return ret;
/* Start rlc autoload after psp recieved all the gfx firmware */
- if (psp->autoload_supported && ucode->ucode_id ==
- AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM) {
+ if (psp->autoload_supported && ucode->ucode_id == (amdgpu_sriov_vf(adev) ?
+ AMDGPU_UCODE_ID_CP_MEC2 : AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM)) {
ret = psp_rlc_autoload(psp);
if (ret) {
DRM_ERROR("Failed to start rlc autoload\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
index 7de16c0c2f20..2a8e04895595 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
@@ -27,7 +27,8 @@
#include <linux/bits.h>
#include "smu_v11_0_i2c.h"
-#define EEPROM_I2C_TARGET_ADDR 0xA0
+#define EEPROM_I2C_TARGET_ADDR_ARCTURUS 0xA8
+#define EEPROM_I2C_TARGET_ADDR_VEGA20 0xA0
/*
* The 2 macros bellow represent the actual size in bytes that
@@ -83,7 +84,7 @@ static int __update_table_header(struct amdgpu_ras_eeprom_control *control,
{
int ret = 0;
struct i2c_msg msg = {
- .addr = EEPROM_I2C_TARGET_ADDR,
+ .addr = 0,
.flags = 0,
.len = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE,
.buf = buff,
@@ -93,6 +94,8 @@ static int __update_table_header(struct amdgpu_ras_eeprom_control *control,
*(uint16_t *)buff = EEPROM_HDR_START;
__encode_table_header_to_buff(&control->tbl_hdr, buff + EEPROM_ADDRESS_SIZE);
+ msg.addr = control->i2c_address;
+
ret = i2c_transfer(&control->eeprom_accessor, &msg, 1);
if (ret < 1)
DRM_ERROR("Failed to write EEPROM table header, ret:%d", ret);
@@ -203,7 +206,7 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control)
unsigned char buff[EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE] = { 0 };
struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
struct i2c_msg msg = {
- .addr = EEPROM_I2C_TARGET_ADDR,
+ .addr = 0,
.flags = I2C_M_RD,
.len = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE,
.buf = buff,
@@ -213,10 +216,12 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control)
switch (adev->asic_type) {
case CHIP_VEGA20:
+ control->i2c_address = EEPROM_I2C_TARGET_ADDR_VEGA20;
ret = smu_v11_0_i2c_eeprom_control_init(&control->eeprom_accessor);
break;
case CHIP_ARCTURUS:
+ control->i2c_address = EEPROM_I2C_TARGET_ADDR_ARCTURUS;
ret = smu_i2c_eeprom_init(&adev->smu, &control->eeprom_accessor);
break;
@@ -229,6 +234,8 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control)
return ret;
}
+ msg.addr = control->i2c_address;
+
/* Read/Create table header from EEPROM address 0 */
ret = i2c_transfer(&control->eeprom_accessor, &msg, 1);
if (ret < 1) {
@@ -408,8 +415,8 @@ int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control,
* Update bits 16,17 of EEPROM address in I2C address by setting them
* to bits 1,2 of Device address byte
*/
- msg->addr = EEPROM_I2C_TARGET_ADDR |
- ((control->next_addr & EEPROM_ADDR_MSB_MASK) >> 15);
+ msg->addr = control->i2c_address |
+ ((control->next_addr & EEPROM_ADDR_MSB_MASK) >> 15);
msg->flags = write ? 0 : I2C_M_RD;
msg->len = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE;
msg->buf = buff;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h
index 622269957c1b..ca78f812d436 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h
@@ -50,6 +50,7 @@ struct amdgpu_ras_eeprom_control {
struct mutex tbl_mutex;
bool bus_locked;
uint32_t tbl_byte_sum;
+ uint16_t i2c_address; // 8-bit represented address
};
/*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
index c8793e6cc3c5..6373bfb47d55 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
@@ -124,13 +124,12 @@ int amdgpu_gfx_rlc_init_sr(struct amdgpu_device *adev, u32 dws)
*/
int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev)
{
- volatile u32 *dst_ptr;
u32 dws;
int r;
/* allocate clear state block */
adev->gfx.rlc.clear_state_size = dws = adev->gfx.rlc.funcs->get_csb_size(adev);
- r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
+ r = amdgpu_bo_create_kernel(adev, dws * 4, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM,
&adev->gfx.rlc.clear_state_obj,
&adev->gfx.rlc.clear_state_gpu_addr,
@@ -141,13 +140,6 @@ int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev)
return r;
}
- /* set up the cs buffer */
- dst_ptr = adev->gfx.rlc.cs_ptr;
- adev->gfx.rlc.funcs->get_csb_buffer(adev, dst_ptr);
- amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
- amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
- amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
-
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index f940526c5889..63e734a125fb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -473,7 +473,7 @@ TRACE_EVENT(amdgpu_ib_pipe_sync,
TP_PROTO(struct amdgpu_job *sched_job, struct dma_fence *fence),
TP_ARGS(sched_job, fence),
TP_STRUCT__entry(
- __string(ring, sched_job->base.sched->name);
+ __string(ring, sched_job->base.sched->name)
__field(uint64_t, id)
__field(struct dma_fence *, fence)
__field(uint64_t, ctx)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 61d9b7774d42..2616e2eafdeb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -35,6 +35,7 @@
#include <linux/hmm.h>
#include <linux/pagemap.h>
#include <linux/sched/task.h>
+#include <linux/sched/mm.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/swap.h>
@@ -769,6 +770,20 @@ struct amdgpu_ttm_tt {
#endif
};
+#ifdef CONFIG_DRM_AMDGPU_USERPTR
+/* flags used by HMM internal, not related to CPU/GPU PTE flags */
+static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = {
+ (1 << 0), /* HMM_PFN_VALID */
+ (1 << 1), /* HMM_PFN_WRITE */
+ 0 /* HMM_PFN_DEVICE_PRIVATE */
+};
+
+static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = {
+ 0xfffffffffffffffeUL, /* HMM_PFN_ERROR */
+ 0, /* HMM_PFN_NONE */
+ 0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */
+};
+
/**
* amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user
* memory and start HMM tracking CPU page table update
@@ -776,85 +791,89 @@ struct amdgpu_ttm_tt {
* Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only
* once afterwards to stop HMM tracking
*/
-#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
-
-#define MAX_RETRY_HMM_RANGE_FAULT 16
-
int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
{
- struct hmm_mirror *mirror = bo->mn ? &bo->mn->mirror : NULL;
struct ttm_tt *ttm = bo->tbo.ttm;
struct amdgpu_ttm_tt *gtt = (void *)ttm;
- struct mm_struct *mm = gtt->usertask->mm;
unsigned long start = gtt->userptr;
struct vm_area_struct *vma;
struct hmm_range *range;
+ unsigned long timeout;
+ struct mm_struct *mm;
unsigned long i;
- uint64_t *pfns;
int r = 0;
- if (!mm) /* Happens during process shutdown */
- return -ESRCH;
-
- if (unlikely(!mirror)) {
- DRM_DEBUG_DRIVER("Failed to get hmm_mirror\n");
- r = -EFAULT;
- goto out;
+ mm = bo->notifier.mm;
+ if (unlikely(!mm)) {
+ DRM_DEBUG_DRIVER("BO is not registered?\n");
+ return -EFAULT;
}
- vma = find_vma(mm, start);
- if (unlikely(!vma || start < vma->vm_start)) {
- r = -EFAULT;
- goto out;
- }
- if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
- vma->vm_file)) {
- r = -EPERM;
- goto out;
- }
+ /* Another get_user_pages is running at the same time?? */
+ if (WARN_ON(gtt->range))
+ return -EFAULT;
+
+ if (!mmget_not_zero(mm)) /* Happens during process shutdown */
+ return -ESRCH;
range = kzalloc(sizeof(*range), GFP_KERNEL);
if (unlikely(!range)) {
r = -ENOMEM;
goto out;
}
+ range->notifier = &bo->notifier;
+ range->flags = hmm_range_flags;
+ range->values = hmm_range_values;
+ range->pfn_shift = PAGE_SHIFT;
+ range->start = bo->notifier.interval_tree.start;
+ range->end = bo->notifier.interval_tree.last + 1;
+ range->default_flags = hmm_range_flags[HMM_PFN_VALID];
+ if (!amdgpu_ttm_tt_is_readonly(ttm))
+ range->default_flags |= range->flags[HMM_PFN_WRITE];
- pfns = kvmalloc_array(ttm->num_pages, sizeof(*pfns), GFP_KERNEL);
- if (unlikely(!pfns)) {
+ range->pfns = kvmalloc_array(ttm->num_pages, sizeof(*range->pfns),
+ GFP_KERNEL);
+ if (unlikely(!range->pfns)) {
r = -ENOMEM;
goto out_free_ranges;
}
- amdgpu_hmm_init_range(range);
- range->default_flags = range->flags[HMM_PFN_VALID];
- range->default_flags |= amdgpu_ttm_tt_is_readonly(ttm) ?
- 0 : range->flags[HMM_PFN_WRITE];
- range->pfn_flags_mask = 0;
- range->pfns = pfns;
- range->start = start;
- range->end = start + ttm->num_pages * PAGE_SIZE;
-
- hmm_range_register(range, mirror);
+ down_read(&mm->mmap_sem);
+ vma = find_vma(mm, start);
+ if (unlikely(!vma || start < vma->vm_start)) {
+ r = -EFAULT;
+ goto out_unlock;
+ }
+ if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
+ vma->vm_file)) {
+ r = -EPERM;
+ goto out_unlock;
+ }
+ up_read(&mm->mmap_sem);
+ timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
- /*
- * Just wait for range to be valid, safe to ignore return value as we
- * will use the return value of hmm_range_fault() below under the
- * mmap_sem to ascertain the validity of the range.
- */
- hmm_range_wait_until_valid(range, HMM_RANGE_DEFAULT_TIMEOUT);
+retry:
+ range->notifier_seq = mmu_interval_read_begin(&bo->notifier);
down_read(&mm->mmap_sem);
r = hmm_range_fault(range, 0);
up_read(&mm->mmap_sem);
-
- if (unlikely(r < 0))
+ if (unlikely(r <= 0)) {
+ /*
+ * FIXME: This timeout should encompass the retry from
+ * mmu_interval_read_retry() as well.
+ */
+ if ((r == 0 || r == -EBUSY) && !time_after(jiffies, timeout))
+ goto retry;
goto out_free_pfns;
+ }
for (i = 0; i < ttm->num_pages; i++) {
- pages[i] = hmm_device_entry_to_page(range, pfns[i]);
+ /* FIXME: The pages cannot be touched outside the notifier_lock */
+ pages[i] = hmm_device_entry_to_page(range, range->pfns[i]);
if (unlikely(!pages[i])) {
pr_err("Page fault failed for pfn[%lu] = 0x%llx\n",
- i, pfns[i]);
+ i, range->pfns[i]);
r = -ENOMEM;
goto out_free_pfns;
@@ -862,15 +881,18 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
}
gtt->range = range;
+ mmput(mm);
return 0;
+out_unlock:
+ up_read(&mm->mmap_sem);
out_free_pfns:
- hmm_range_unregister(range);
- kvfree(pfns);
+ kvfree(range->pfns);
out_free_ranges:
kfree(range);
out:
+ mmput(mm);
return r;
}
@@ -895,15 +917,18 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
"No user pages to check\n");
if (gtt->range) {
- r = hmm_range_valid(gtt->range);
- hmm_range_unregister(gtt->range);
-
+ /*
+ * FIXME: Must always hold notifier_lock for this, and must
+ * not ignore the return code.
+ */
+ r = mmu_interval_read_retry(gtt->range->notifier,
+ gtt->range->notifier_seq);
kvfree(gtt->range->pfns);
kfree(gtt->range);
gtt->range = NULL;
}
- return r;
+ return !r;
}
#endif
@@ -984,10 +1009,18 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
sg_free_table(ttm->sg);
#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
- if (gtt->range &&
- ttm->pages[0] == hmm_device_entry_to_page(gtt->range,
- gtt->range->pfns[0]))
- WARN_ONCE(1, "Missing get_user_page_done\n");
+ if (gtt->range) {
+ unsigned long i;
+
+ for (i = 0; i < ttm->num_pages; i++) {
+ if (ttm->pages[i] !=
+ hmm_device_entry_to_page(gtt->range,
+ gtt->range->pfns[i]))
+ break;
+ }
+
+ WARN((i == ttm->num_pages), "Missing get_user_page_done\n");
+ }
#endif
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index b4dd89af6f09..e324bfe6c58f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -299,6 +299,7 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
{
int i, j;
+ cancel_delayed_work_sync(&adev->uvd.idle_work);
drm_sched_entity_destroy(&adev->uvd.entity);
for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index 703677f2ff6f..46b590af2fd2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -216,6 +216,7 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev)
if (adev->vce.vcpu_bo == NULL)
return 0;
+ cancel_delayed_work_sync(&adev->vce.idle_work);
drm_sched_entity_destroy(&adev->vce.entity);
amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev->vce.gpu_addr,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 3199e4a5ff12..9d870444d7d6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -193,6 +193,8 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
{
int i, j;
+ cancel_delayed_work_sync(&adev->vcn.idle_work);
+
if (adev->vcn.indirect_sram) {
amdgpu_bo_free_kernel(&adev->vcn.dpg_sram_bo,
&adev->vcn.dpg_sram_gpu_addr,
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index 2d64d270725d..1befdee9f0f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -1346,10 +1346,13 @@ static int cik_asic_reset(struct amdgpu_device *adev)
{
int r;
- if (cik_asic_reset_method(adev) == AMD_RESET_METHOD_BACO)
+ if (cik_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
+ if (!adev->in_suspend)
+ amdgpu_inc_vram_lost(adev);
r = smu7_asic_baco_reset(adev);
- else
+ } else {
r = cik_asic_pci_config_reset(adev);
+ }
return r;
}
@@ -1441,7 +1444,6 @@ static int cik_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk)
static void cik_pcie_gen3_enable(struct amdgpu_device *adev)
{
struct pci_dev *root = adev->pdev->bus->self;
- int bridge_pos, gpu_pos;
u32 speed_cntl, current_data_rate;
int i;
u16 tmp16;
@@ -1476,12 +1478,7 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev)
DRM_INFO("enabling PCIE gen 2 link speeds, disable with amdgpu.pcie_gen2=0\n");
}
- bridge_pos = pci_pcie_cap(root);
- if (!bridge_pos)
- return;
-
- gpu_pos = pci_pcie_cap(adev->pdev);
- if (!gpu_pos)
+ if (!pci_is_pcie(root) || !pci_is_pcie(adev->pdev))
return;
if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) {
@@ -1491,14 +1488,17 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev)
u16 bridge_cfg2, gpu_cfg2;
u32 max_lw, current_lw, tmp;
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+ &bridge_cfg);
+ pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL,
+ &gpu_cfg);
tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
- pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
+ pcie_capability_write_word(root, PCI_EXP_LNKCTL, tmp16);
tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
- pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
+ pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL,
+ tmp16);
tmp = RREG32_PCIE(ixPCIE_LC_STATUS1);
max_lw = (tmp & PCIE_LC_STATUS1__LC_DETECTED_LINK_WIDTH_MASK) >>
@@ -1522,15 +1522,23 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev)
for (i = 0; i < 10; i++) {
/* check status */
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
+ pcie_capability_read_word(adev->pdev,
+ PCI_EXP_DEVSTA,
+ &tmp16);
if (tmp16 & PCI_EXP_DEVSTA_TRPND)
break;
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+ &bridge_cfg);
+ pcie_capability_read_word(adev->pdev,
+ PCI_EXP_LNKCTL,
+ &gpu_cfg);
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
+ &bridge_cfg2);
+ pcie_capability_read_word(adev->pdev,
+ PCI_EXP_LNKCTL2,
+ &gpu_cfg2);
tmp = RREG32_PCIE(ixPCIE_LC_CNTL4);
tmp |= PCIE_LC_CNTL4__LC_SET_QUIESCE_MASK;
@@ -1543,26 +1551,45 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev)
msleep(100);
/* linkctl */
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+ &tmp16);
tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
- pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
+ pcie_capability_write_word(root, PCI_EXP_LNKCTL,
+ tmp16);
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
+ pcie_capability_read_word(adev->pdev,
+ PCI_EXP_LNKCTL,
+ &tmp16);
tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
- pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
+ pcie_capability_write_word(adev->pdev,
+ PCI_EXP_LNKCTL,
+ tmp16);
/* linkctl2 */
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
- tmp16 &= ~((1 << 4) | (7 << 9));
- tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
- pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
-
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
- tmp16 &= ~((1 << 4) | (7 << 9));
- tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
- pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
+ &tmp16);
+ tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN);
+ tmp16 |= (bridge_cfg2 &
+ (PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN));
+ pcie_capability_write_word(root,
+ PCI_EXP_LNKCTL2,
+ tmp16);
+
+ pcie_capability_read_word(adev->pdev,
+ PCI_EXP_LNKCTL2,
+ &tmp16);
+ tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN);
+ tmp16 |= (gpu_cfg2 &
+ (PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN));
+ pcie_capability_write_word(adev->pdev,
+ PCI_EXP_LNKCTL2,
+ tmp16);
tmp = RREG32_PCIE(ixPCIE_LC_CNTL4);
tmp &= ~PCIE_LC_CNTL4__LC_SET_QUIESCE_MASK;
@@ -1577,15 +1604,16 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev)
speed_cntl &= ~PCIE_LC_SPEED_CNTL__LC_FORCE_DIS_SW_SPEED_CHANGE_MASK;
WREG32_PCIE(ixPCIE_LC_SPEED_CNTL, speed_cntl);
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
- tmp16 &= ~0xf;
+ pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL2, &tmp16);
+ tmp16 &= ~PCI_EXP_LNKCTL2_TLS;
+
if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)
- tmp16 |= 3; /* gen3 */
+ tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */
else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2)
- tmp16 |= 2; /* gen2 */
+ tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */
else
- tmp16 |= 1; /* gen1 */
- pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
+ tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */
+ pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL2, tmp16);
speed_cntl = RREG32_PCIE(ixPCIE_LC_SPEED_CNTL);
speed_cntl |= PCIE_LC_SPEED_CNTL__LC_INITIATE_LINK_SPEED_CHANGE_MASK;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index a93dd3dc0902..f2c1b026397b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -690,59 +690,61 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
- err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
- if (err)
- goto out;
- err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
- rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
- version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
- version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
- if (version_major == 2 && version_minor == 1)
- adev->gfx.rlc.is_rlc_v2_1 = true;
-
- adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
- adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
- adev->gfx.rlc.save_and_restore_offset =
+ if (!amdgpu_sriov_vf(adev)) {
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
+ err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
+ if (err)
+ goto out;
+ err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
+ rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+ version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
+ version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
+ if (version_major == 2 && version_minor == 1)
+ adev->gfx.rlc.is_rlc_v2_1 = true;
+
+ adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
+ adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
+ adev->gfx.rlc.save_and_restore_offset =
le32_to_cpu(rlc_hdr->save_and_restore_offset);
- adev->gfx.rlc.clear_state_descriptor_offset =
+ adev->gfx.rlc.clear_state_descriptor_offset =
le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
- adev->gfx.rlc.avail_scratch_ram_locations =
+ adev->gfx.rlc.avail_scratch_ram_locations =
le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
- adev->gfx.rlc.reg_restore_list_size =
+ adev->gfx.rlc.reg_restore_list_size =
le32_to_cpu(rlc_hdr->reg_restore_list_size);
- adev->gfx.rlc.reg_list_format_start =
+ adev->gfx.rlc.reg_list_format_start =
le32_to_cpu(rlc_hdr->reg_list_format_start);
- adev->gfx.rlc.reg_list_format_separate_start =
+ adev->gfx.rlc.reg_list_format_separate_start =
le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
- adev->gfx.rlc.starting_offsets_start =
+ adev->gfx.rlc.starting_offsets_start =
le32_to_cpu(rlc_hdr->starting_offsets_start);
- adev->gfx.rlc.reg_list_format_size_bytes =
+ adev->gfx.rlc.reg_list_format_size_bytes =
le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
- adev->gfx.rlc.reg_list_size_bytes =
+ adev->gfx.rlc.reg_list_size_bytes =
le32_to_cpu(rlc_hdr->reg_list_size_bytes);
- adev->gfx.rlc.register_list_format =
+ adev->gfx.rlc.register_list_format =
kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
- adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
- if (!adev->gfx.rlc.register_list_format) {
- err = -ENOMEM;
- goto out;
- }
+ adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
+ if (!adev->gfx.rlc.register_list_format) {
+ err = -ENOMEM;
+ goto out;
+ }
- tmp = (unsigned int *)((uintptr_t)rlc_hdr +
- le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
- for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
- adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
+ tmp = (unsigned int *)((uintptr_t)rlc_hdr +
+ le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
+ for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
+ adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
- adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
+ adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
- tmp = (unsigned int *)((uintptr_t)rlc_hdr +
- le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
- for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
- adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
+ tmp = (unsigned int *)((uintptr_t)rlc_hdr +
+ le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
+ for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
+ adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
- if (adev->gfx.rlc.is_rlc_v2_1)
- gfx_v10_0_init_rlc_ext_microcode(adev);
+ if (adev->gfx.rlc.is_rlc_v2_1)
+ gfx_v10_0_init_rlc_ext_microcode(adev);
+ }
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec%s.bin", chip_name, wks);
err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
@@ -993,39 +995,6 @@ static int gfx_v10_0_rlc_init(struct amdgpu_device *adev)
return 0;
}
-static int gfx_v10_0_csb_vram_pin(struct amdgpu_device *adev)
-{
- int r;
-
- r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
- if (unlikely(r != 0))
- return r;
-
- r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
- AMDGPU_GEM_DOMAIN_VRAM);
- if (!r)
- adev->gfx.rlc.clear_state_gpu_addr =
- amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
-
- amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
-
- return r;
-}
-
-static void gfx_v10_0_csb_vram_unpin(struct amdgpu_device *adev)
-{
- int r;
-
- if (!adev->gfx.rlc.clear_state_obj)
- return;
-
- r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
- if (likely(r == 0)) {
- amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
- amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
- }
-}
-
static void gfx_v10_0_mec_fini(struct amdgpu_device *adev)
{
amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
@@ -1785,27 +1754,18 @@ static void gfx_v10_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
}
-static void gfx_v10_0_init_csb(struct amdgpu_device *adev)
+static int gfx_v10_0_init_csb(struct amdgpu_device *adev)
{
+ adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
+
/* csib */
WREG32_SOC15(GC, 0, mmRLC_CSIB_ADDR_HI,
adev->gfx.rlc.clear_state_gpu_addr >> 32);
WREG32_SOC15(GC, 0, mmRLC_CSIB_ADDR_LO,
adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
WREG32_SOC15(GC, 0, mmRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size);
-}
-
-static void gfx_v10_0_init_pg(struct amdgpu_device *adev)
-{
- int i;
- gfx_v10_0_init_csb(adev);
-
- for (i = 0; i < adev->num_vmhubs; i++)
- amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
-
- /* TODO: init power gating */
- return;
+ return 0;
}
void gfx_v10_0_rlc_stop(struct amdgpu_device *adev)
@@ -1900,18 +1860,16 @@ static int gfx_v10_0_rlc_resume(struct amdgpu_device *adev)
{
int r;
- if (amdgpu_sriov_vf(adev))
- return 0;
-
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+
r = gfx_v10_0_wait_for_rlc_autoload_complete(adev);
if (r)
return r;
- gfx_v10_0_init_pg(adev);
- /* enable RLC SRM */
- gfx_v10_0_rlc_enable_srm(adev);
+ gfx_v10_0_init_csb(adev);
+ if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */
+ gfx_v10_0_rlc_enable_srm(adev);
} else {
adev->gfx.rlc.funcs->stop(adev);
@@ -1933,7 +1891,8 @@ static int gfx_v10_0_rlc_resume(struct amdgpu_device *adev)
return r;
}
- gfx_v10_0_init_pg(adev);
+ gfx_v10_0_init_csb(adev);
+
adev->gfx.rlc.funcs->start(adev);
if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
@@ -2400,7 +2359,7 @@ static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev)
return 0;
}
-static void gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
+static int gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
{
int i;
u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
@@ -2413,7 +2372,17 @@ static void gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
adev->gfx.gfx_ring[i].sched.ready = false;
}
WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp);
- udelay(50);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (RREG32_SOC15(GC, 0, mmCP_STAT) == 0)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ DRM_ERROR("failed to %s cp gfx\n", enable ? "unhalt" : "halt");
+
+ return 0;
}
static int gfx_v10_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev)
@@ -2784,7 +2753,7 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev)
/* Init gfx ring 0 for pipe 0 */
mutex_lock(&adev->srbm_mutex);
gfx_v10_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
- mutex_unlock(&adev->srbm_mutex);
+
/* Set ring buffer size */
ring = &adev->gfx.gfx_ring[0];
rb_bufsz = order_base_2(ring->ring_size / 8);
@@ -2822,11 +2791,11 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, mmCP_RB_ACTIVE, 1);
gfx_v10_0_cp_gfx_set_doorbell(adev, ring);
+ mutex_unlock(&adev->srbm_mutex);
/* Init gfx ring 1 for pipe 1 */
mutex_lock(&adev->srbm_mutex);
gfx_v10_0_cp_gfx_switch_pipe(adev, PIPE_ID1);
- mutex_unlock(&adev->srbm_mutex);
ring = &adev->gfx.gfx_ring[1];
rb_bufsz = order_base_2(ring->ring_size / 8);
tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz);
@@ -2856,6 +2825,7 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, mmCP_RB1_ACTIVE, 1);
gfx_v10_0_cp_gfx_set_doorbell(adev, ring);
+ mutex_unlock(&adev->srbm_mutex);
/* Switch to pipe 0 */
mutex_lock(&adev->srbm_mutex);
@@ -3114,6 +3084,7 @@ static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
struct v10_gfx_mqd *mqd = ring->mqd_ptr;
+ int mqd_idx = ring - &adev->gfx.gfx_ring[0];
if (!adev->in_gpu_reset && !adev->in_suspend) {
memset((void *)mqd, 0, sizeof(*mqd));
@@ -3125,12 +3096,12 @@ static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring)
#endif
nv_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
- if (adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS])
- memcpy(adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS], mqd, sizeof(*mqd));
+ if (adev->gfx.me.mqd_backup[mqd_idx])
+ memcpy(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
} else if (adev->in_gpu_reset) {
/* reset mqd with the backup copy */
- if (adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS])
- memcpy(mqd, adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS], sizeof(*mqd));
+ if (adev->gfx.me.mqd_backup[mqd_idx])
+ memcpy(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
/* reset the ring */
ring->wptr = 0;
adev->wb.wb[ring->wptr_offs] = 0;
@@ -3733,10 +3704,6 @@ static int gfx_v10_0_hw_init(void *handle)
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = gfx_v10_0_csb_vram_pin(adev);
- if (r)
- return r;
-
if (!amdgpu_emu_mode)
gfx_v10_0_init_golden_registers(adev);
@@ -3819,12 +3786,11 @@ static int gfx_v10_0_hw_fini(void *handle)
if (amdgpu_gfx_disable_kcq(adev))
DRM_ERROR("KCQ disable failed\n");
if (amdgpu_sriov_vf(adev)) {
- pr_debug("For SRIOV client, shouldn't do anything.\n");
+ gfx_v10_0_cp_gfx_enable(adev, false);
return 0;
}
gfx_v10_0_cp_enable(adev, false);
gfx_v10_0_enable_gui_idle_interrupt(adev, false);
- gfx_v10_0_csb_vram_unpin(adev);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 791ba398f007..d92e92e5d50b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -4554,6 +4554,8 @@ static int gfx_v7_0_hw_init(void *handle)
gfx_v7_0_constants_init(adev);
+ /* init CSB */
+ adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
/* init rlc */
r = adev->gfx.rlc.funcs->resume(adev);
if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index ffbde9136372..983db77999e7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -1321,39 +1321,6 @@ static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
return 0;
}
-static int gfx_v8_0_csb_vram_pin(struct amdgpu_device *adev)
-{
- int r;
-
- r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
- if (unlikely(r != 0))
- return r;
-
- r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
- AMDGPU_GEM_DOMAIN_VRAM);
- if (!r)
- adev->gfx.rlc.clear_state_gpu_addr =
- amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
-
- amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
-
- return r;
-}
-
-static void gfx_v8_0_csb_vram_unpin(struct amdgpu_device *adev)
-{
- int r;
-
- if (!adev->gfx.rlc.clear_state_obj)
- return;
-
- r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
- if (likely(r == 0)) {
- amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
- amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
- }
-}
-
static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
{
amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
@@ -3917,6 +3884,7 @@ static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
{
+ adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
/* csib */
WREG32(mmRLC_CSIB_ADDR_HI,
adev->gfx.rlc.clear_state_gpu_addr >> 32);
@@ -4837,10 +4805,6 @@ static int gfx_v8_0_hw_init(void *handle)
gfx_v8_0_init_golden_registers(adev);
gfx_v8_0_constants_init(adev);
- r = gfx_v8_0_csb_vram_pin(adev);
- if (r)
- return r;
-
r = adev->gfx.rlc.funcs->resume(adev);
if (r)
return r;
@@ -4958,8 +4922,6 @@ static int gfx_v8_0_hw_fini(void *handle)
pr_err("rlc is busy, skip halt rlc\n");
amdgpu_gfx_rlc_exit_safe_mode(adev);
- gfx_v8_0_csb_vram_unpin(adev);
-
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 3ebd5c20dfd3..66328ffa395a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -704,6 +704,7 @@ static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
};
static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
@@ -1051,8 +1052,13 @@ static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
case CHIP_VEGA20:
break;
case CHIP_RAVEN:
- if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
- &&((adev->gfx.rlc_fw_version != 106 &&
+ /* Disable GFXOFF on original raven. There are combinations
+ * of sbios and platforms that are not stable.
+ */
+ if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8))
+ adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
+ else if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
+ &&((adev->gfx.rlc_fw_version != 106 &&
adev->gfx.rlc_fw_version < 531) ||
(adev->gfx.rlc_fw_version == 53815) ||
(adev->gfx.rlc_feature_version < 1) ||
@@ -1689,39 +1695,6 @@ static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
return 0;
}
-static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev)
-{
- int r;
-
- r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
- if (unlikely(r != 0))
- return r;
-
- r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
- AMDGPU_GEM_DOMAIN_VRAM);
- if (!r)
- adev->gfx.rlc.clear_state_gpu_addr =
- amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
-
- amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
-
- return r;
-}
-
-static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev)
-{
- int r;
-
- if (!adev->gfx.rlc.clear_state_obj)
- return;
-
- r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
- if (likely(r == 0)) {
- amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
- amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
- }
-}
-
static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
{
amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
@@ -2409,6 +2382,7 @@ static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
{
+ adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
/* csib */
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
adev->gfx.rlc.clear_state_gpu_addr >> 32);
@@ -3700,10 +3674,6 @@ static int gfx_v9_0_hw_init(void *handle)
gfx_v9_0_constants_init(adev);
- r = gfx_v9_0_csb_vram_pin(adev);
- if (r)
- return r;
-
r = adev->gfx.rlc.funcs->resume(adev);
if (r)
return r;
@@ -3785,8 +3755,6 @@ static int gfx_v9_0_hw_fini(void *handle)
gfx_v9_0_cp_enable(adev, false);
adev->gfx.rlc.funcs->stop(adev);
- gfx_v9_0_csb_vram_unpin(adev);
-
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
index 9ec4297e61e5..e91bd7945777 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
@@ -367,6 +367,8 @@ void gfxhub_v1_0_init(struct amdgpu_device *adev)
hub->ctx0_ptb_addr_hi32 =
SOC15_REG_OFFSET(GC, 0,
mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_sem =
+ SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_SEM);
hub->vm_inv_eng0_req =
SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_REQ);
hub->vm_inv_eng0_ack =
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c
index 5e9ab8eb214a..c0ab71df0d90 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c
@@ -33,16 +33,31 @@ int gfxhub_v1_1_get_xgmi_info(struct amdgpu_device *adev)
u32 xgmi_lfb_cntl = RREG32_SOC15(GC, 0, mmMC_VM_XGMI_LFB_CNTL);
u32 max_region =
REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL, PF_MAX_REGION);
+ u32 max_num_physical_nodes = 0;
+ u32 max_physical_node_id = 0;
+
+ switch (adev->asic_type) {
+ case CHIP_VEGA20:
+ max_num_physical_nodes = 4;
+ max_physical_node_id = 3;
+ break;
+ case CHIP_ARCTURUS:
+ max_num_physical_nodes = 8;
+ max_physical_node_id = 7;
+ break;
+ default:
+ return -EINVAL;
+ }
/* PF_MAX_REGION=0 means xgmi is disabled */
if (max_region) {
adev->gmc.xgmi.num_physical_nodes = max_region + 1;
- if (adev->gmc.xgmi.num_physical_nodes > 4)
+ if (adev->gmc.xgmi.num_physical_nodes > max_num_physical_nodes)
return -EINVAL;
adev->gmc.xgmi.physical_node_id =
REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL, PF_LFB_REGION);
- if (adev->gmc.xgmi.physical_node_id > 3)
+ if (adev->gmc.xgmi.physical_node_id > max_physical_node_id)
return -EINVAL;
adev->gmc.xgmi.node_segment_size = REG_GET_FIELD(
RREG32_SOC15(GC, 0, mmMC_VM_XGMI_LFB_SIZE),
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
index b4f32d853ca1..b70c7b483c24 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
@@ -356,6 +356,8 @@ void gfxhub_v2_0_init(struct amdgpu_device *adev)
hub->ctx0_ptb_addr_hi32 =
SOC15_REG_OFFSET(GC, 0,
mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_sem =
+ SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_SEM);
hub->vm_inv_eng0_req =
SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_REQ);
hub->vm_inv_eng0_ack =
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 27f68d32bfec..232469507446 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -235,6 +235,29 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
const unsigned eng = 17;
unsigned int i;
+ spin_lock(&adev->gmc.invalidate_lock);
+ /*
+ * It may lose gpuvm invalidate acknowldege state across power-gating
+ * off cycle, add semaphore acquire before invalidation and semaphore
+ * release after invalidation to avoid entering power gated state
+ * to WA the Issue
+ */
+
+ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+ if (vmhub == AMDGPU_MMHUB_0 ||
+ vmhub == AMDGPU_MMHUB_1) {
+ for (i = 0; i < adev->usec_timeout; i++) {
+ /* a read return value of 1 means semaphore acuqire */
+ tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng);
+ if (tmp & 0x1)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
+ }
+
WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
/*
@@ -254,6 +277,17 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
udelay(1);
}
+ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+ if (vmhub == AMDGPU_MMHUB_0 ||
+ vmhub == AMDGPU_MMHUB_1)
+ /*
+ * add semaphore release after invalidation,
+ * write with 0 means semaphore release
+ */
+ WREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng, 0);
+
+ spin_unlock(&adev->gmc.invalidate_lock);
+
if (i < adev->usec_timeout)
return;
@@ -292,7 +326,8 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
if (!adev->mman.buffer_funcs_enabled ||
!adev->ib_pool_ready ||
- adev->in_gpu_reset) {
+ adev->in_gpu_reset ||
+ ring->sched.ready == false) {
gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB_0, 0);
mutex_unlock(&adev->mman.gtt_window_lock);
return;
@@ -338,6 +373,20 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
uint32_t req = gmc_v10_0_get_invalidate_req(vmid, 0);
unsigned eng = ring->vm_inv_eng;
+ /*
+ * It may lose gpuvm invalidate acknowldege state across power-gating
+ * off cycle, add semaphore acquire before invalidation and semaphore
+ * release after invalidation to avoid entering power gated state
+ * to WA the Issue
+ */
+
+ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+ if (ring->funcs->vmhub == AMDGPU_MMHUB_0 ||
+ ring->funcs->vmhub == AMDGPU_MMHUB_1)
+ /* a read return value of 1 means semaphore acuqire */
+ amdgpu_ring_emit_reg_wait(ring,
+ hub->vm_inv_eng0_sem + eng, 0x1, 0x1);
+
amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid),
lower_32_bits(pd_addr));
@@ -348,6 +397,15 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
hub->vm_inv_eng0_ack + eng,
req, 1 << vmid);
+ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+ if (ring->funcs->vmhub == AMDGPU_MMHUB_0 ||
+ ring->funcs->vmhub == AMDGPU_MMHUB_1)
+ /*
+ * add semaphore release after invalidation,
+ * write with 0 means semaphore release
+ */
+ amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem + eng, 0);
+
return pd_addr;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 9f2a893871ec..3c355fb5d2b4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -459,6 +459,29 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
}
spin_lock(&adev->gmc.invalidate_lock);
+
+ /*
+ * It may lose gpuvm invalidate acknowldege state across power-gating
+ * off cycle, add semaphore acquire before invalidation and semaphore
+ * release after invalidation to avoid entering power gated state
+ * to WA the Issue
+ */
+
+ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+ if (vmhub == AMDGPU_MMHUB_0 ||
+ vmhub == AMDGPU_MMHUB_1) {
+ for (j = 0; j < adev->usec_timeout; j++) {
+ /* a read return value of 1 means semaphore acuqire */
+ tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng);
+ if (tmp & 0x1)
+ break;
+ udelay(1);
+ }
+
+ if (j >= adev->usec_timeout)
+ DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
+ }
+
WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
/*
@@ -474,7 +497,18 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
break;
udelay(1);
}
+
+ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+ if (vmhub == AMDGPU_MMHUB_0 ||
+ vmhub == AMDGPU_MMHUB_1)
+ /*
+ * add semaphore release after invalidation,
+ * write with 0 means semaphore release
+ */
+ WREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng, 0);
+
spin_unlock(&adev->gmc.invalidate_lock);
+
if (j < adev->usec_timeout)
return;
@@ -489,6 +523,20 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
uint32_t req = gmc_v9_0_get_invalidate_req(vmid, 0);
unsigned eng = ring->vm_inv_eng;
+ /*
+ * It may lose gpuvm invalidate acknowldege state across power-gating
+ * off cycle, add semaphore acquire before invalidation and semaphore
+ * release after invalidation to avoid entering power gated state
+ * to WA the Issue
+ */
+
+ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+ if (ring->funcs->vmhub == AMDGPU_MMHUB_0 ||
+ ring->funcs->vmhub == AMDGPU_MMHUB_1)
+ /* a read return value of 1 means semaphore acuqire */
+ amdgpu_ring_emit_reg_wait(ring,
+ hub->vm_inv_eng0_sem + eng, 0x1, 0x1);
+
amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid),
lower_32_bits(pd_addr));
@@ -499,6 +547,15 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
hub->vm_inv_eng0_ack + eng,
req, 1 << vmid);
+ /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
+ if (ring->funcs->vmhub == AMDGPU_MMHUB_0 ||
+ ring->funcs->vmhub == AMDGPU_MMHUB_1)
+ /*
+ * add semaphore release after invalidation,
+ * write with 0 means semaphore release
+ */
+ amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem + eng, 0);
+
return pd_addr;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
index 6965e1e6fa9e..28105e4af507 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
@@ -420,6 +420,8 @@ void mmhub_v1_0_init(struct amdgpu_device *adev)
hub->ctx0_ptb_addr_hi32 =
SOC15_REG_OFFSET(MMHUB, 0,
mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_sem =
+ SOC15_REG_OFFSET(MMHUB, 0, mmVM_INVALIDATE_ENG0_SEM);
hub->vm_inv_eng0_req =
SOC15_REG_OFFSET(MMHUB, 0, mmVM_INVALIDATE_ENG0_REQ);
hub->vm_inv_eng0_ack =
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
index 945533634711..a7cb185d639a 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
@@ -348,6 +348,8 @@ void mmhub_v2_0_init(struct amdgpu_device *adev)
hub->ctx0_ptb_addr_hi32 =
SOC15_REG_OFFSET(MMHUB, 0,
mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_sem =
+ SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_SEM);
hub->vm_inv_eng0_req =
SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_REQ);
hub->vm_inv_eng0_ack =
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
index 2c5adfe803a2..66efe2f7bd76 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
@@ -504,6 +504,10 @@ void mmhub_v9_4_init(struct amdgpu_device *adev)
SOC15_REG_OFFSET(MMHUB, 0,
mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) +
i * MMHUB_INSTANCE_REGISTER_OFFSET;
+ hub[i]->vm_inv_eng0_sem =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ mmVML2VC0_VM_INVALIDATE_ENG0_SEM) +
+ i * MMHUB_INSTANCE_REGISTER_OFFSET;
hub[i]->vm_inv_eng0_req =
SOC15_REG_OFFSET(MMHUB, 0,
mmVML2VC0_VM_INVALIDATE_ENG0_REQ) +
diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c
index af68f9815f28..0ba66bef5746 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -40,6 +40,7 @@
#include "gc/gc_10_1_0_sh_mask.h"
#include "hdp/hdp_5_0_0_offset.h"
#include "hdp/hdp_5_0_0_sh_mask.h"
+#include "smuio/smuio_11_0_0_offset.h"
#include "soc15.h"
#include "soc15_common.h"
@@ -156,8 +157,27 @@ static bool nv_read_disabled_bios(struct amdgpu_device *adev)
static bool nv_read_bios_from_rom(struct amdgpu_device *adev,
u8 *bios, u32 length_bytes)
{
- /* TODO: will implement it when SMU header is available */
- return false;
+ u32 *dw_ptr;
+ u32 i, length_dw;
+
+ if (bios == NULL)
+ return false;
+ if (length_bytes == 0)
+ return false;
+ /* APU vbios image is part of sbios image */
+ if (adev->flags & AMD_IS_APU)
+ return false;
+
+ dw_ptr = (u32 *)bios;
+ length_dw = ALIGN(length_bytes, 4) / 4;
+
+ /* set rom index to 0 */
+ WREG32(SOC15_REG_OFFSET(SMUIO, 0, mmROM_INDEX), 0);
+ /* read out the rom data */
+ for (i = 0; i < length_dw; i++)
+ dw_ptr[i] = RREG32(SOC15_REG_OFFSET(SMUIO, 0, mmROM_DATA));
+
+ return true;
}
static struct soc15_allowed_register_entry nv_allowed_read_registers[] = {
diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c
index 29024e64c886..f2d70a47a3af 100644
--- a/drivers/gpu/drm/amd/amdgpu/si.c
+++ b/drivers/gpu/drm/amd/amdgpu/si.c
@@ -1644,7 +1644,6 @@ static void si_init_golden_registers(struct amdgpu_device *adev)
static void si_pcie_gen3_enable(struct amdgpu_device *adev)
{
struct pci_dev *root = adev->pdev->bus->self;
- int bridge_pos, gpu_pos;
u32 speed_cntl, current_data_rate;
int i;
u16 tmp16;
@@ -1679,12 +1678,7 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
DRM_INFO("enabling PCIE gen 2 link speeds, disable with amdgpu.pcie_gen2=0\n");
}
- bridge_pos = pci_pcie_cap(root);
- if (!bridge_pos)
- return;
-
- gpu_pos = pci_pcie_cap(adev->pdev);
- if (!gpu_pos)
+ if (!pci_is_pcie(root) || !pci_is_pcie(adev->pdev))
return;
if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) {
@@ -1693,14 +1687,17 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
u16 bridge_cfg2, gpu_cfg2;
u32 max_lw, current_lw, tmp;
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+ &bridge_cfg);
+ pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL,
+ &gpu_cfg);
tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
- pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
+ pcie_capability_write_word(root, PCI_EXP_LNKCTL, tmp16);
tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
- pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
+ pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL,
+ tmp16);
tmp = RREG32_PCIE(PCIE_LC_STATUS1);
max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
@@ -1717,15 +1714,23 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
}
for (i = 0; i < 10; i++) {
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
+ pcie_capability_read_word(adev->pdev,
+ PCI_EXP_DEVSTA,
+ &tmp16);
if (tmp16 & PCI_EXP_DEVSTA_TRPND)
break;
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+ &bridge_cfg);
+ pcie_capability_read_word(adev->pdev,
+ PCI_EXP_LNKCTL,
+ &gpu_cfg);
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
+ &bridge_cfg2);
+ pcie_capability_read_word(adev->pdev,
+ PCI_EXP_LNKCTL2,
+ &gpu_cfg2);
tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
tmp |= LC_SET_QUIESCE;
@@ -1737,25 +1742,44 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
mdelay(100);
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+ &tmp16);
tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
- pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
+ pcie_capability_write_word(root, PCI_EXP_LNKCTL,
+ tmp16);
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
+ pcie_capability_read_word(adev->pdev,
+ PCI_EXP_LNKCTL,
+ &tmp16);
tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
- pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
-
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
- tmp16 &= ~((1 << 4) | (7 << 9));
- tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
- pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
-
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
- tmp16 &= ~((1 << 4) | (7 << 9));
- tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
- pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
+ pcie_capability_write_word(adev->pdev,
+ PCI_EXP_LNKCTL,
+ tmp16);
+
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
+ &tmp16);
+ tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN);
+ tmp16 |= (bridge_cfg2 &
+ (PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN));
+ pcie_capability_write_word(root,
+ PCI_EXP_LNKCTL2,
+ tmp16);
+
+ pcie_capability_read_word(adev->pdev,
+ PCI_EXP_LNKCTL2,
+ &tmp16);
+ tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN);
+ tmp16 |= (gpu_cfg2 &
+ (PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN));
+ pcie_capability_write_word(adev->pdev,
+ PCI_EXP_LNKCTL2,
+ tmp16);
tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
tmp &= ~LC_SET_QUIESCE;
@@ -1768,15 +1792,16 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
- tmp16 &= ~0xf;
+ pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL2, &tmp16);
+ tmp16 &= ~PCI_EXP_LNKCTL2_TLS;
+
if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)
- tmp16 |= 3;
+ tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */
else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2)
- tmp16 |= 2;
+ tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */
else
- tmp16 |= 1;
- pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
+ tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */
+ pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL2, tmp16);
speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c
index 57bb5f9e08b2..88ae27a5a03d 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c
@@ -64,7 +64,8 @@ static int si_ih_irq_init(struct amdgpu_device *adev)
u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
si_ih_disable_interrupts(adev);
- WREG32(INTERRUPT_CNTL2, adev->irq.ih.gpu_addr >> 8);
+ /* set dummy read address to dummy page address */
+ WREG32(INTERRUPT_CNTL2, adev->dummy_page_addr >> 8);
interrupt_cntl = RREG32(INTERRUPT_CNTL);
interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h
index 9af6c6ffbfa2..57af489a5de3 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.h
@@ -28,8 +28,8 @@
#include "nbio_v7_0.h"
#include "nbio_v7_4.h"
-#define SOC15_FLUSH_GPU_TLB_NUM_WREG 4
-#define SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT 1
+#define SOC15_FLUSH_GPU_TLB_NUM_WREG 6
+#define SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT 3
extern const struct amd_ip_funcs soc15_common_ip_funcs;
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
index 03083e5f731a..93edf9193a7b 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
@@ -293,7 +293,7 @@ static int vcn_v2_5_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
struct amdgpu_ring *ring;
- int i;
+ int i, j;
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
if (adev->vcn.harvest_config & (1 << i))
@@ -305,8 +305,8 @@ static int vcn_v2_5_hw_fini(void *handle)
ring->sched.ready = false;
- for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
- ring = &adev->vcn.inst[i].ring_enc[i];
+ for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
+ ring = &adev->vcn.inst[i].ring_enc[j];
ring->sched.ready = false;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 78e5cdc0c058..f1b171e30774 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -783,10 +783,13 @@ static int vi_asic_reset(struct amdgpu_device *adev)
{
int r;
- if (vi_asic_reset_method(adev) == AMD_RESET_METHOD_BACO)
+ if (vi_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
+ if (!adev->in_suspend)
+ amdgpu_inc_vram_lost(adev);
r = smu7_asic_baco_reset(adev);
- else
+ } else {
r = vi_asic_pci_config_reset(adev);
+ }
return r;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig b/drivers/gpu/drm/amd/amdkfd/Kconfig
index a1a35d4d594b..ba0e68057a89 100644
--- a/drivers/gpu/drm/amd/amdkfd/Kconfig
+++ b/drivers/gpu/drm/amd/amdkfd/Kconfig
@@ -5,7 +5,7 @@
config HSA_AMD
bool "HSA kernel driver for AMD GPU devices"
- depends on DRM_AMDGPU && (X86_64 || ARM64)
+ depends on DRM_AMDGPU && (X86_64 || ARM64 || PPC64)
imply AMD_IOMMU_V2 if X86_64
select MMU_NOTIFIER
help
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 9af45d07515b..1544007af34a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -49,7 +49,7 @@ static const char kfd_dev_name[] = "kfd";
static const struct file_operations kfd_fops = {
.owner = THIS_MODULE,
.unlocked_ioctl = kfd_ioctl,
- .compat_ioctl = kfd_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
.open = kfd_open,
.mmap = kfd_mmap,
};
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index caba9ecac723..7aac9568d3be 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -719,7 +719,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
*/
if (adev->flags & AMD_IS_APU &&
adev->asic_type >= CHIP_CARRIZO &&
- adev->asic_type <= CHIP_RAVEN)
+ adev->asic_type < CHIP_RAVEN)
init_data.flags.gpu_vm_support = true;
if (amdgpu_dc_feature_mask & DC_FBC_MASK)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
index 49cf39711dfc..2bf8534c18fb 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -36,7 +36,9 @@
#include "dc_link_ddc.h"
#include "i2caux_interface.h"
-
+#if defined(CONFIG_DEBUG_FS)
+#include "amdgpu_dm_debugfs.h"
+#endif
/* #define TRACE_DPCD */
#ifdef TRACE_DPCD
@@ -147,6 +149,12 @@ amdgpu_dm_mst_connector_late_register(struct drm_connector *connector)
to_amdgpu_dm_connector(connector);
struct drm_dp_mst_port *port = amdgpu_dm_connector->port;
+#if defined(CONFIG_DEBUG_FS)
+ connector_debugfs_init(amdgpu_dm_connector);
+ amdgpu_dm_connector->debugfs_dpcd_address = 0;
+ amdgpu_dm_connector->debugfs_dpcd_size = 0;
+#endif
+
return drm_dp_mst_connector_late_register(connector, port);
}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
index 55a520a63712..778f186b3a05 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
@@ -342,7 +342,8 @@ bool dm_pp_get_clock_levels_by_type(
if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_clock_by_type) {
if (adev->powerplay.pp_funcs->get_clock_by_type(pp_handle,
dc_to_pp_clock_type(clk_type), &pp_clks)) {
- /* Error in pplib. Provide default values. */
+ /* Error in pplib. Provide default values. */
+ get_default_clock_levels(clk_type, dc_clks);
return true;
}
} else if (adev->smu.ppt_funcs && adev->smu.ppt_funcs->get_clock_by_type) {
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
index 921a36668ced..ac8c18fadefc 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
@@ -1037,6 +1037,25 @@ void dcn20_pipe_control_lock(
if (pipe->plane_state != NULL)
flip_immediate = pipe->plane_state->flip_immediate;
+ if (flip_immediate && lock) {
+ const int TIMEOUT_FOR_FLIP_PENDING = 100000;
+ int i;
+
+ for (i = 0; i < TIMEOUT_FOR_FLIP_PENDING; ++i) {
+ if (!pipe->plane_res.hubp->funcs->hubp_is_flip_pending(pipe->plane_res.hubp))
+ break;
+ udelay(1);
+ }
+
+ if (pipe->bottom_pipe != NULL) {
+ for (i = 0; i < TIMEOUT_FOR_FLIP_PENDING; ++i) {
+ if (!pipe->bottom_pipe->plane_res.hubp->funcs->hubp_is_flip_pending(pipe->bottom_pipe->plane_res.hubp))
+ break;
+ udelay(1);
+ }
+ }
+ }
+
/* In flip immediate and pipe splitting case, we need to use GSL
* for synchronization. Only do setup on locking and on flip type change.
*/
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
index bbd1c98564be..09793336d84f 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
@@ -157,6 +157,74 @@ struct _vcs_dpi_ip_params_st dcn2_0_ip = {
.xfc_fill_constant_bytes = 0,
};
+struct _vcs_dpi_ip_params_st dcn2_0_nv14_ip = {
+ .odm_capable = 1,
+ .gpuvm_enable = 0,
+ .hostvm_enable = 0,
+ .gpuvm_max_page_table_levels = 4,
+ .hostvm_max_page_table_levels = 4,
+ .hostvm_cached_page_table_levels = 0,
+ .num_dsc = 5,
+ .rob_buffer_size_kbytes = 168,
+ .det_buffer_size_kbytes = 164,
+ .dpte_buffer_size_in_pte_reqs_luma = 84,
+ .dpte_buffer_size_in_pte_reqs_chroma = 42,//todo
+ .dpp_output_buffer_pixels = 2560,
+ .opp_output_buffer_lines = 1,
+ .pixel_chunk_size_kbytes = 8,
+ .pte_enable = 1,
+ .max_page_table_levels = 4,
+ .pte_chunk_size_kbytes = 2,
+ .meta_chunk_size_kbytes = 2,
+ .writeback_chunk_size_kbytes = 2,
+ .line_buffer_size_bits = 789504,
+ .is_line_buffer_bpp_fixed = 0,
+ .line_buffer_fixed_bpp = 0,
+ .dcc_supported = true,
+ .max_line_buffer_lines = 12,
+ .writeback_luma_buffer_size_kbytes = 12,
+ .writeback_chroma_buffer_size_kbytes = 8,
+ .writeback_chroma_line_buffer_width_pixels = 4,
+ .writeback_max_hscl_ratio = 1,
+ .writeback_max_vscl_ratio = 1,
+ .writeback_min_hscl_ratio = 1,
+ .writeback_min_vscl_ratio = 1,
+ .writeback_max_hscl_taps = 12,
+ .writeback_max_vscl_taps = 12,
+ .writeback_line_buffer_luma_buffer_size = 0,
+ .writeback_line_buffer_chroma_buffer_size = 14643,
+ .cursor_buffer_size = 8,
+ .cursor_chunk_size = 2,
+ .max_num_otg = 5,
+ .max_num_dpp = 5,
+ .max_num_wb = 1,
+ .max_dchub_pscl_bw_pix_per_clk = 4,
+ .max_pscl_lb_bw_pix_per_clk = 2,
+ .max_lb_vscl_bw_pix_per_clk = 4,
+ .max_vscl_hscl_bw_pix_per_clk = 4,
+ .max_hscl_ratio = 8,
+ .max_vscl_ratio = 8,
+ .hscl_mults = 4,
+ .vscl_mults = 4,
+ .max_hscl_taps = 8,
+ .max_vscl_taps = 8,
+ .dispclk_ramp_margin_percent = 1,
+ .underscan_factor = 1.10,
+ .min_vblank_lines = 32, //
+ .dppclk_delay_subtotal = 77, //
+ .dppclk_delay_scl_lb_only = 16,
+ .dppclk_delay_scl = 50,
+ .dppclk_delay_cnvc_formatter = 8,
+ .dppclk_delay_cnvc_cursor = 6,
+ .dispclk_delay_subtotal = 87, //
+ .dcfclk_cstate_latency = 10, // SRExitTime
+ .max_inter_dcn_tile_repeaters = 8,
+ .xfc_supported = true,
+ .xfc_fill_bw_overhead_percent = 10.0,
+ .xfc_fill_constant_bytes = 0,
+ .ptoi_supported = 0
+};
+
struct _vcs_dpi_soc_bounding_box_st dcn2_0_soc = {
/* Defaults that get patched on driver load from firmware. */
.clock_limits = {
@@ -854,6 +922,8 @@ static const struct resource_caps res_cap_nv14 = {
.num_pll = 5,
.num_dwb = 1,
.num_ddc = 5,
+ .num_vmid = 16,
+ .num_dsc = 5,
};
static const struct dc_debug_options debug_defaults_drv = {
@@ -3212,6 +3282,10 @@ static struct _vcs_dpi_soc_bounding_box_st *get_asic_rev_soc_bb(
static struct _vcs_dpi_ip_params_st *get_asic_rev_ip_params(
uint32_t hw_internal_rev)
{
+ /* NV14 */
+ if (ASICREV_IS_NAVI14_M(hw_internal_rev))
+ return &dcn2_0_nv14_ip;
+
/* NV12 and NV10 */
return &dcn2_0_ip;
}
diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
index 1e2da4d37567..5ff7ccedfbed 100644
--- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
@@ -591,10 +591,18 @@ int smu_sys_set_pp_table(struct smu_context *smu, void *buf, size_t size)
smu_table->power_play_table = smu_table->hardcode_pptable;
smu_table->power_play_table_size = size;
+ /*
+ * Special hw_fini action(for Navi1x, the DPMs disablement will be
+ * skipped) may be needed for custom pptable uploading.
+ */
+ smu->uploading_custom_pp_table = true;
+
ret = smu_reset(smu);
if (ret)
pr_info("smu reset failed, ret = %d\n", ret);
+ smu->uploading_custom_pp_table = false;
+
failed:
mutex_unlock(&smu->mutex);
return ret;
@@ -719,6 +727,7 @@ static int smu_set_funcs(struct amdgpu_device *adev)
switch (adev->asic_type) {
case CHIP_VEGA20:
+ adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
vega20_set_ppt_funcs(smu);
break;
case CHIP_NAVI10:
@@ -727,6 +736,7 @@ static int smu_set_funcs(struct amdgpu_device *adev)
navi10_set_ppt_funcs(smu);
break;
case CHIP_ARCTURUS:
+ adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
arcturus_set_ppt_funcs(smu);
/* OD is not supported on Arcturus */
smu->od_enabled =false;
@@ -1068,10 +1078,6 @@ static int smu_smc_table_hw_init(struct smu_context *smu,
return ret;
if (adev->asic_type != CHIP_ARCTURUS) {
- ret = smu_override_pcie_parameters(smu);
- if (ret)
- return ret;
-
ret = smu_notify_display_change(smu);
if (ret)
return ret;
@@ -1100,6 +1106,12 @@ static int smu_smc_table_hw_init(struct smu_context *smu,
return ret;
}
+ if (adev->asic_type != CHIP_ARCTURUS) {
+ ret = smu_override_pcie_parameters(smu);
+ if (ret)
+ return ret;
+ }
+
ret = smu_set_default_od_settings(smu, initialize);
if (ret)
return ret;
@@ -1109,7 +1121,7 @@ static int smu_smc_table_hw_init(struct smu_context *smu,
if (ret)
return ret;
- ret = smu_get_power_limit(smu, &smu->default_power_limit, true, false);
+ ret = smu_get_power_limit(smu, &smu->default_power_limit, false, false);
if (ret)
return ret;
}
@@ -1293,10 +1305,25 @@ static int smu_hw_fini(void *handle)
return ret;
}
- ret = smu_stop_dpms(smu);
- if (ret) {
- pr_warn("Fail to stop Dpms!\n");
- return ret;
+ /*
+ * For custom pptable uploading, skip the DPM features
+ * disable process on Navi1x ASICs.
+ * - As the gfx related features are under control of
+ * RLC on those ASICs. RLC reinitialization will be
+ * needed to reenable them. That will cost much more
+ * efforts.
+ *
+ * - SMU firmware can handle the DPM reenablement
+ * properly.
+ */
+ if (!smu->uploading_custom_pp_table ||
+ !((adev->asic_type >= CHIP_NAVI10) &&
+ (adev->asic_type <= CHIP_NAVI12))) {
+ ret = smu_stop_dpms(smu);
+ if (ret) {
+ pr_warn("Fail to stop Dpms!\n");
+ return ret;
+ }
}
kfree(table_context->driver_pptable);
@@ -2511,3 +2538,22 @@ int smu_get_dpm_clock_table(struct smu_context *smu,
return ret;
}
+
+uint32_t smu_get_pptable_power_limit(struct smu_context *smu)
+{
+ uint32_t ret = 0;
+
+ if (smu->ppt_funcs->get_pptable_power_limit)
+ ret = smu->ppt_funcs->get_pptable_power_limit(smu);
+
+ return ret;
+}
+
+int smu_send_smc_msg(struct smu_context *smu,
+ enum smu_message_type msg)
+{
+ int ret;
+
+ ret = smu_send_smc_msg_with_param(smu, msg, 0);
+ return ret;
+}
diff --git a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c
index 3099ac256bd3..ce3566ca3e24 100644
--- a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c
+++ b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c
@@ -1261,15 +1261,14 @@ arcturus_get_profiling_clk_mask(struct smu_context *smu,
static int arcturus_get_power_limit(struct smu_context *smu,
uint32_t *limit,
- bool asic_default)
+ bool cap)
{
PPTable_t *pptable = smu->smu_table.driver_pptable;
uint32_t asic_default_power_limit = 0;
int ret = 0;
int power_src;
- if (!smu->default_power_limit ||
- !smu->power_limit) {
+ if (!smu->power_limit) {
if (smu_feature_is_enabled(smu, SMU_FEATURE_PPT_BIT)) {
power_src = smu_power_get_index(smu, SMU_POWER_SOURCE_AC);
if (power_src < 0)
@@ -1292,17 +1291,11 @@ static int arcturus_get_power_limit(struct smu_context *smu,
pptable->SocketPowerLimitAc[PPT_THROTTLER_PPT0];
}
- if (smu->od_enabled) {
- asic_default_power_limit *= (100 + smu->smu_table.TDPODLimit);
- asic_default_power_limit /= 100;
- }
-
- smu->default_power_limit = asic_default_power_limit;
smu->power_limit = asic_default_power_limit;
}
- if (asic_default)
- *limit = smu->default_power_limit;
+ if (cap)
+ *limit = smu_v11_0_get_max_power_limit(smu);
else
*limit = smu->power_limit;
@@ -2070,6 +2063,13 @@ static void arcturus_i2c_eeprom_control_fini(struct i2c_adapter *control)
i2c_del_adapter(control);
}
+static uint32_t arcturus_get_pptable_power_limit(struct smu_context *smu)
+{
+ PPTable_t *pptable = smu->smu_table.driver_pptable;
+
+ return pptable->SocketPowerLimitAc[PPT_THROTTLER_PPT0];
+}
+
static const struct pptable_funcs arcturus_ppt_funcs = {
/* translate smu index into arcturus specific index */
.get_smu_msg_index = arcturus_get_smu_msg_index,
@@ -2130,7 +2130,6 @@ static const struct pptable_funcs arcturus_ppt_funcs = {
.set_tool_table_location = smu_v11_0_set_tool_table_location,
.notify_memory_pool_location = smu_v11_0_notify_memory_pool_location,
.system_features_control = smu_v11_0_system_features_control,
- .send_smc_msg = smu_v11_0_send_msg,
.send_smc_msg_with_param = smu_v11_0_send_msg_with_param,
.read_smc_arg = smu_v11_0_read_arg,
.init_display_count = smu_v11_0_init_display_count,
@@ -2160,6 +2159,7 @@ static const struct pptable_funcs arcturus_ppt_funcs = {
.get_dpm_ultimate_freq = smu_v11_0_get_dpm_ultimate_freq,
.set_soft_freq_limited_range = smu_v11_0_set_soft_freq_limited_range,
.override_pcie_parameters = smu_v11_0_override_pcie_parameters,
+ .get_pptable_power_limit = arcturus_get_pptable_power_limit,
};
void arcturus_set_ppt_funcs(struct smu_context *smu)
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c
index a24beaa4fb01..d2909c91d65b 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c
@@ -81,6 +81,8 @@ static void hwmgr_init_workload_prority(struct pp_hwmgr *hwmgr)
int hwmgr_early_init(struct pp_hwmgr *hwmgr)
{
+ struct amdgpu_device *adev;
+
if (!hwmgr)
return -EINVAL;
@@ -94,8 +96,11 @@ int hwmgr_early_init(struct pp_hwmgr *hwmgr)
hwmgr_init_workload_prority(hwmgr);
hwmgr->gfxoff_state_changed_by_workload = false;
+ adev = hwmgr->adev;
+
switch (hwmgr->chip_family) {
case AMDGPU_FAMILY_CI:
+ adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
hwmgr->smumgr_funcs = &ci_smu_funcs;
ci_set_asic_special_caps(hwmgr);
hwmgr->feature_mask &= ~(PP_VBI_TIME_SUPPORT_MASK |
@@ -106,12 +111,14 @@ int hwmgr_early_init(struct pp_hwmgr *hwmgr)
smu7_init_function_pointers(hwmgr);
break;
case AMDGPU_FAMILY_CZ:
+ adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
hwmgr->od_enabled = false;
hwmgr->smumgr_funcs = &smu8_smu_funcs;
hwmgr->feature_mask &= ~PP_GFXOFF_MASK;
smu8_init_function_pointers(hwmgr);
break;
case AMDGPU_FAMILY_VI:
+ adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
hwmgr->feature_mask &= ~PP_GFXOFF_MASK;
switch (hwmgr->chip_id) {
case CHIP_TOPAZ:
@@ -153,6 +160,7 @@ int hwmgr_early_init(struct pp_hwmgr *hwmgr)
case AMDGPU_FAMILY_AI:
switch (hwmgr->chip_id) {
case CHIP_VEGA10:
+ adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
hwmgr->feature_mask &= ~PP_GFXOFF_MASK;
hwmgr->smumgr_funcs = &vega10_smu_funcs;
vega10_hwmgr_init(hwmgr);
@@ -162,6 +170,7 @@ int hwmgr_early_init(struct pp_hwmgr *hwmgr)
vega12_hwmgr_init(hwmgr);
break;
case CHIP_VEGA20:
+ adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
hwmgr->feature_mask &= ~PP_GFXOFF_MASK;
hwmgr->smumgr_funcs = &vega20_smu_funcs;
vega20_hwmgr_init(hwmgr);
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
index c805c6fcdba2..f73dff68e799 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
@@ -3477,18 +3477,31 @@ static int smu7_get_pp_table_entry(struct pp_hwmgr *hwmgr,
static int smu7_get_gpu_power(struct pp_hwmgr *hwmgr, u32 *query)
{
+ struct amdgpu_device *adev = hwmgr->adev;
int i;
u32 tmp = 0;
if (!query)
return -EINVAL;
- smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_GetCurrPkgPwr, 0);
- tmp = cgs_read_register(hwmgr->device, mmSMC_MSG_ARG_0);
- *query = tmp;
+ /*
+ * PPSMC_MSG_GetCurrPkgPwr is not supported on:
+ * - Hawaii
+ * - Bonaire
+ * - Fiji
+ * - Tonga
+ */
+ if ((adev->asic_type != CHIP_HAWAII) &&
+ (adev->asic_type != CHIP_BONAIRE) &&
+ (adev->asic_type != CHIP_FIJI) &&
+ (adev->asic_type != CHIP_TONGA)) {
+ smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_GetCurrPkgPwr, 0);
+ tmp = cgs_read_register(hwmgr->device, mmSMC_MSG_ARG_0);
+ *query = tmp;
- if (tmp != 0)
- return 0;
+ if (tmp != 0)
+ return 0;
+ }
smum_send_msg_to_smc(hwmgr, PPSMC_MSG_PmStatusLogStart);
cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
@@ -3969,6 +3982,13 @@ static int smu7_set_power_state_tasks(struct pp_hwmgr *hwmgr, const void *input)
"Failed to populate and upload SCLK MCLK DPM levels!",
result = tmp_result);
+ /*
+ * If a custom pp table is loaded, set DPMTABLE_OD_UPDATE_VDDC flag.
+ * That effectively disables AVFS feature.
+ */
+ if (hwmgr->hardcode_pp_table != NULL)
+ data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_VDDC;
+
tmp_result = smu7_update_avfs(hwmgr);
PP_ASSERT_WITH_CODE((0 == tmp_result),
"Failed to update avfs voltages!",
diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
index 8120e7587585..ac9758305ab3 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
@@ -261,7 +261,6 @@ struct smu_table_context
struct smu_table *tables;
struct smu_table memory_pool;
uint8_t thermal_controller_type;
- uint16_t TDPODLimit;
void *overdrive_table;
};
@@ -391,6 +390,7 @@ struct smu_context
uint32_t smc_if_version;
+ bool uploading_custom_pp_table;
};
struct i2c_adapter;
@@ -497,8 +497,8 @@ struct pptable_funcs {
int (*notify_memory_pool_location)(struct smu_context *smu);
int (*set_last_dcef_min_deep_sleep_clk)(struct smu_context *smu);
int (*system_features_control)(struct smu_context *smu, bool en);
- int (*send_smc_msg)(struct smu_context *smu, uint16_t msg);
- int (*send_smc_msg_with_param)(struct smu_context *smu, uint16_t msg, uint32_t param);
+ int (*send_smc_msg_with_param)(struct smu_context *smu,
+ enum smu_message_type msg, uint32_t param);
int (*read_smc_arg)(struct smu_context *smu, uint32_t *arg);
int (*init_display_count)(struct smu_context *smu, uint32_t count);
int (*set_allowed_mask)(struct smu_context *smu);
@@ -548,6 +548,7 @@ struct pptable_funcs {
int (*get_dpm_ultimate_freq)(struct smu_context *smu, enum smu_clk_type clk_type, uint32_t *min, uint32_t *max);
int (*set_soft_freq_limited_range)(struct smu_context *smu, enum smu_clk_type clk_type, uint32_t min, uint32_t max);
int (*override_pcie_parameters)(struct smu_context *smu);
+ uint32_t (*get_pptable_power_limit)(struct smu_context *smu);
};
int smu_load_microcode(struct smu_context *smu);
@@ -717,4 +718,6 @@ int smu_get_uclk_dpm_states(struct smu_context *smu,
int smu_get_dpm_clock_table(struct smu_context *smu,
struct dpm_clocks *clock_table);
+uint32_t smu_get_pptable_power_limit(struct smu_context *smu);
+
#endif
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h
index fd6ec9033d06..719844257713 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h
@@ -48,6 +48,8 @@
#define SMU11_TOOL_SIZE 0x19000
+#define MAX_PCIE_CONF 2
+
#define CLK_MAP(clk, index) \
[SMU_##clk] = {1, (index)}
@@ -88,6 +90,11 @@ struct smu_11_0_dpm_table {
uint32_t max; /* MHz */
};
+struct smu_11_0_pcie_table {
+ uint8_t pcie_gen[MAX_PCIE_CONF];
+ uint8_t pcie_lane[MAX_PCIE_CONF];
+};
+
struct smu_11_0_dpm_tables {
struct smu_11_0_dpm_table soc_table;
struct smu_11_0_dpm_table gfx_table;
@@ -100,6 +107,7 @@ struct smu_11_0_dpm_tables {
struct smu_11_0_dpm_table display_table;
struct smu_11_0_dpm_table phy_table;
struct smu_11_0_dpm_table fclk_table;
+ struct smu_11_0_pcie_table pcie_table;
};
struct smu_11_0_dpm_context {
@@ -169,10 +177,9 @@ int smu_v11_0_notify_memory_pool_location(struct smu_context *smu);
int smu_v11_0_system_features_control(struct smu_context *smu,
bool en);
-int smu_v11_0_send_msg(struct smu_context *smu, uint16_t msg);
-
int
-smu_v11_0_send_msg_with_param(struct smu_context *smu, uint16_t msg,
+smu_v11_0_send_msg_with_param(struct smu_context *smu,
+ enum smu_message_type msg,
uint32_t param);
int smu_v11_0_read_arg(struct smu_context *smu, uint32_t *arg);
@@ -250,4 +257,8 @@ int smu_v11_0_set_soft_freq_limited_range(struct smu_context *smu, enum smu_clk_
int smu_v11_0_override_pcie_parameters(struct smu_context *smu);
+int smu_v11_0_set_default_od_settings(struct smu_context *smu, bool initialize, size_t overdrive_table_size);
+
+uint32_t smu_v11_0_get_max_power_limit(struct smu_context *smu);
+
#endif
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0_pptable.h b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0_pptable.h
index 86cdc3393eac..b2f96a101124 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0_pptable.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0_pptable.h
@@ -141,7 +141,9 @@ struct smu_11_0_powerplay_table
struct smu_11_0_power_saving_clock_table power_saving_clock;
struct smu_11_0_overdrive_table overdrive_table;
+#ifndef SMU_11_0_PARTIAL_PPTABLE
PPTable_t smc_pptable; //PPTable_t in smu11_driver_if.h
+#endif
} __attribute__((packed));
#endif
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu_v12_0.h b/drivers/gpu/drm/amd/powerplay/inc/smu_v12_0.h
index 9b9f5df0911c..9d81d789c713 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smu_v12_0.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smu_v12_0.h
@@ -44,10 +44,9 @@ int smu_v12_0_read_arg(struct smu_context *smu, uint32_t *arg);
int smu_v12_0_wait_for_response(struct smu_context *smu);
-int smu_v12_0_send_msg(struct smu_context *smu, uint16_t msg);
-
int
-smu_v12_0_send_msg_with_param(struct smu_context *smu, uint16_t msg,
+smu_v12_0_send_msg_with_param(struct smu_context *smu,
+ enum smu_message_type msg,
uint32_t param);
int smu_v12_0_check_fw_status(struct smu_context *smu);
diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c
index 354f70978f82..4a14fd1f9fd5 100644
--- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c
@@ -36,6 +36,7 @@
#include "navi10_ppt.h"
#include "smu_v11_0_pptable.h"
#include "smu_v11_0_ppsmc.h"
+#include "nbio/nbio_7_4_sh_mask.h"
#include "asic_reg/mp/mp_11_0_sh_mask.h"
@@ -599,6 +600,7 @@ static int navi10_set_default_dpm_table(struct smu_context *smu)
struct smu_table_context *table_context = &smu->smu_table;
struct smu_11_0_dpm_context *dpm_context = smu_dpm->dpm_context;
PPTable_t *driver_ppt = NULL;
+ int i;
driver_ppt = table_context->driver_pptable;
@@ -629,6 +631,11 @@ static int navi10_set_default_dpm_table(struct smu_context *smu)
dpm_context->dpm_tables.phy_table.min = driver_ppt->FreqTablePhyclk[0];
dpm_context->dpm_tables.phy_table.max = driver_ppt->FreqTablePhyclk[NUM_PHYCLK_DPM_LEVELS - 1];
+ for (i = 0; i < MAX_PCIE_CONF; i++) {
+ dpm_context->dpm_tables.pcie_table.pcie_gen[i] = driver_ppt->PcieGenSpeed[i];
+ dpm_context->dpm_tables.pcie_table.pcie_lane[i] = driver_ppt->PcieLaneCount[i];
+ }
+
return 0;
}
@@ -691,13 +698,29 @@ static bool navi10_is_support_fine_grained_dpm(struct smu_context *smu, enum smu
return dpm_desc->SnapToDiscrete == 0 ? true : false;
}
+static inline bool navi10_od_feature_is_supported(struct smu_11_0_overdrive_table *od_table, enum SMU_11_0_ODFEATURE_ID feature)
+{
+ return od_table->cap[feature];
+}
+
+
static int navi10_print_clk_levels(struct smu_context *smu,
enum smu_clk_type clk_type, char *buf)
{
+ uint16_t *curve_settings;
int i, size = 0, ret = 0;
uint32_t cur_value = 0, value = 0, count = 0;
uint32_t freq_values[3] = {0};
uint32_t mark_index = 0;
+ struct smu_table_context *table_context = &smu->smu_table;
+ uint32_t gen_speed, lane_width;
+ struct smu_dpm_context *smu_dpm = &smu->smu_dpm;
+ struct smu_11_0_dpm_context *dpm_context = smu_dpm->dpm_context;
+ struct amdgpu_device *adev = smu->adev;
+ PPTable_t *pptable = (PPTable_t *)table_context->driver_pptable;
+ OverDriveTable_t *od_table =
+ (OverDriveTable_t *)table_context->overdrive_table;
+ struct smu_11_0_overdrive_table *od_settings = smu->od_settings;
switch (clk_type) {
case SMU_GFXCLK:
@@ -748,6 +771,69 @@ static int navi10_print_clk_levels(struct smu_context *smu,
}
break;
+ case SMU_PCIE:
+ gen_speed = (RREG32_PCIE(smnPCIE_LC_SPEED_CNTL) &
+ PSWUSP0_PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE_MASK)
+ >> PSWUSP0_PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE__SHIFT;
+ lane_width = (RREG32_PCIE(smnPCIE_LC_LINK_WIDTH_CNTL) &
+ PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD_MASK)
+ >> PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD__SHIFT;
+ for (i = 0; i < NUM_LINK_LEVELS; i++)
+ size += sprintf(buf + size, "%d: %s %s %dMhz %s\n", i,
+ (dpm_context->dpm_tables.pcie_table.pcie_gen[i] == 0) ? "2.5GT/s," :
+ (dpm_context->dpm_tables.pcie_table.pcie_gen[i] == 1) ? "5.0GT/s," :
+ (dpm_context->dpm_tables.pcie_table.pcie_gen[i] == 2) ? "8.0GT/s," :
+ (dpm_context->dpm_tables.pcie_table.pcie_gen[i] == 3) ? "16.0GT/s," : "",
+ (dpm_context->dpm_tables.pcie_table.pcie_lane[i] == 1) ? "x1" :
+ (dpm_context->dpm_tables.pcie_table.pcie_lane[i] == 2) ? "x2" :
+ (dpm_context->dpm_tables.pcie_table.pcie_lane[i] == 3) ? "x4" :
+ (dpm_context->dpm_tables.pcie_table.pcie_lane[i] == 4) ? "x8" :
+ (dpm_context->dpm_tables.pcie_table.pcie_lane[i] == 5) ? "x12" :
+ (dpm_context->dpm_tables.pcie_table.pcie_lane[i] == 6) ? "x16" : "",
+ pptable->LclkFreq[i],
+ (gen_speed == dpm_context->dpm_tables.pcie_table.pcie_gen[i]) &&
+ (lane_width == dpm_context->dpm_tables.pcie_table.pcie_lane[i]) ?
+ "*" : "");
+ break;
+ case SMU_OD_SCLK:
+ if (!smu->od_enabled || !od_table || !od_settings)
+ break;
+ if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_GFXCLK_LIMITS))
+ break;
+ size += sprintf(buf + size, "OD_SCLK:\n");
+ size += sprintf(buf + size, "0: %uMhz\n1: %uMhz\n", od_table->GfxclkFmin, od_table->GfxclkFmax);
+ break;
+ case SMU_OD_MCLK:
+ if (!smu->od_enabled || !od_table || !od_settings)
+ break;
+ if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_UCLK_MAX))
+ break;
+ size += sprintf(buf + size, "OD_MCLK:\n");
+ size += sprintf(buf + size, "0: %uMHz\n", od_table->UclkFmax);
+ break;
+ case SMU_OD_VDDC_CURVE:
+ if (!smu->od_enabled || !od_table || !od_settings)
+ break;
+ if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_GFXCLK_CURVE))
+ break;
+ size += sprintf(buf + size, "OD_VDDC_CURVE:\n");
+ for (i = 0; i < 3; i++) {
+ switch (i) {
+ case 0:
+ curve_settings = &od_table->GfxclkFreq1;
+ break;
+ case 1:
+ curve_settings = &od_table->GfxclkFreq2;
+ break;
+ case 2:
+ curve_settings = &od_table->GfxclkFreq3;
+ break;
+ default:
+ break;
+ }
+ size += sprintf(buf + size, "%d: %uMHz @ %umV\n", i, curve_settings[0], curve_settings[1] / NAVI10_VOLTAGE_SCALE);
+ }
+ break;
default:
break;
}
@@ -773,6 +859,12 @@ static int navi10_force_clk_levels(struct smu_context *smu,
case SMU_UCLK:
case SMU_DCEFCLK:
case SMU_FCLK:
+ /* There is only 2 levels for fine grained DPM */
+ if (navi10_is_support_fine_grained_dpm(smu, clk_type)) {
+ soft_max_level = (soft_max_level >= 1 ? 1 : 0);
+ soft_min_level = (soft_min_level >= 1 ? 1 : 0);
+ }
+
ret = smu_get_dpm_freq_by_index(smu, clk_type, soft_min_level, &min_freq);
if (ret)
return size;
@@ -1582,17 +1674,22 @@ static int navi10_display_disable_memory_clock_switch(struct smu_context *smu,
return ret;
}
+static uint32_t navi10_get_pptable_power_limit(struct smu_context *smu)
+{
+ PPTable_t *pptable = smu->smu_table.driver_pptable;
+ return pptable->SocketPowerLimitAc[PPT_THROTTLER_PPT0];
+}
+
static int navi10_get_power_limit(struct smu_context *smu,
uint32_t *limit,
- bool asic_default)
+ bool cap)
{
PPTable_t *pptable = smu->smu_table.driver_pptable;
uint32_t asic_default_power_limit = 0;
int ret = 0;
int power_src;
- if (!smu->default_power_limit ||
- !smu->power_limit) {
+ if (!smu->power_limit) {
if (smu_feature_is_enabled(smu, SMU_FEATURE_PPT_BIT)) {
power_src = smu_power_get_index(smu, SMU_POWER_SOURCE_AC);
if (power_src < 0)
@@ -1615,17 +1712,11 @@ static int navi10_get_power_limit(struct smu_context *smu,
pptable->SocketPowerLimitAc[PPT_THROTTLER_PPT0];
}
- if (smu->od_enabled) {
- asic_default_power_limit *= (100 + smu->smu_table.TDPODLimit);
- asic_default_power_limit /= 100;
- }
-
- smu->default_power_limit = asic_default_power_limit;
smu->power_limit = asic_default_power_limit;
}
- if (asic_default)
- *limit = smu->default_power_limit;
+ if (cap)
+ *limit = smu_v11_0_get_max_power_limit(smu);
else
*limit = smu->power_limit;
@@ -1640,6 +1731,9 @@ static int navi10_update_pcie_parameters(struct smu_context *smu,
int ret, i;
uint32_t smu_pcie_arg;
+ struct smu_dpm_context *smu_dpm = &smu->smu_dpm;
+ struct smu_11_0_dpm_context *dpm_context = smu_dpm->dpm_context;
+
for (i = 0; i < NUM_LINK_LEVELS; i++) {
smu_pcie_arg = (i << 16) |
((pptable->PcieGenSpeed[i] <= pcie_gen_cap) ? (pptable->PcieGenSpeed[i] << 8) :
@@ -1648,11 +1742,260 @@ static int navi10_update_pcie_parameters(struct smu_context *smu,
ret = smu_send_smc_msg_with_param(smu,
SMU_MSG_OverridePcieParameters,
smu_pcie_arg);
+
+ if (ret)
+ return ret;
+
+ if (pptable->PcieGenSpeed[i] > pcie_gen_cap)
+ dpm_context->dpm_tables.pcie_table.pcie_gen[i] = pcie_gen_cap;
+ if (pptable->PcieLaneCount[i] > pcie_width_cap)
+ dpm_context->dpm_tables.pcie_table.pcie_lane[i] = pcie_width_cap;
+ }
+
+ return 0;
+}
+
+static inline void navi10_dump_od_table(OverDriveTable_t *od_table) {
+ pr_debug("OD: Gfxclk: (%d, %d)\n", od_table->GfxclkFmin, od_table->GfxclkFmax);
+ pr_debug("OD: Gfx1: (%d, %d)\n", od_table->GfxclkFreq1, od_table->GfxclkVolt1);
+ pr_debug("OD: Gfx2: (%d, %d)\n", od_table->GfxclkFreq2, od_table->GfxclkVolt2);
+ pr_debug("OD: Gfx3: (%d, %d)\n", od_table->GfxclkFreq3, od_table->GfxclkVolt3);
+ pr_debug("OD: UclkFmax: %d\n", od_table->UclkFmax);
+ pr_debug("OD: OverDrivePct: %d\n", od_table->OverDrivePct);
+}
+
+static int navi10_od_setting_check_range(struct smu_11_0_overdrive_table *od_table, enum SMU_11_0_ODSETTING_ID setting, uint32_t value)
+{
+ if (value < od_table->min[setting]) {
+ pr_warn("OD setting (%d, %d) is less than the minimum allowed (%d)\n", setting, value, od_table->min[setting]);
+ return -EINVAL;
+ }
+ if (value > od_table->max[setting]) {
+ pr_warn("OD setting (%d, %d) is greater than the maximum allowed (%d)\n", setting, value, od_table->max[setting]);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int navi10_setup_od_limits(struct smu_context *smu) {
+ struct smu_11_0_overdrive_table *overdrive_table = NULL;
+ struct smu_11_0_powerplay_table *powerplay_table = NULL;
+
+ if (!smu->smu_table.power_play_table) {
+ pr_err("powerplay table uninitialized!\n");
+ return -ENOENT;
+ }
+ powerplay_table = (struct smu_11_0_powerplay_table *)smu->smu_table.power_play_table;
+ overdrive_table = &powerplay_table->overdrive_table;
+ if (!smu->od_settings) {
+ smu->od_settings = kmemdup(overdrive_table, sizeof(struct smu_11_0_overdrive_table), GFP_KERNEL);
+ } else {
+ memcpy(smu->od_settings, overdrive_table, sizeof(struct smu_11_0_overdrive_table));
+ }
+ return 0;
+}
+
+static int navi10_set_default_od_settings(struct smu_context *smu, bool initialize) {
+ OverDriveTable_t *od_table;
+ int ret = 0;
+
+ ret = smu_v11_0_set_default_od_settings(smu, initialize, sizeof(OverDriveTable_t));
+ if (ret)
+ return ret;
+
+ if (initialize) {
+ ret = navi10_setup_od_limits(smu);
+ if (ret) {
+ pr_err("Failed to retrieve board OD limits\n");
+ return ret;
+ }
+
}
+ od_table = (OverDriveTable_t *)smu->smu_table.overdrive_table;
+ if (od_table) {
+ navi10_dump_od_table(od_table);
+ }
+
+ return ret;
+}
+
+static int navi10_od_edit_dpm_table(struct smu_context *smu, enum PP_OD_DPM_TABLE_COMMAND type, long input[], uint32_t size) {
+ int i;
+ int ret = 0;
+ struct smu_table_context *table_context = &smu->smu_table;
+ OverDriveTable_t *od_table;
+ struct smu_11_0_overdrive_table *od_settings;
+ enum SMU_11_0_ODSETTING_ID freq_setting, voltage_setting;
+ uint16_t *freq_ptr, *voltage_ptr;
+ od_table = (OverDriveTable_t *)table_context->overdrive_table;
+
+ if (!smu->od_enabled) {
+ pr_warn("OverDrive is not enabled!\n");
+ return -EINVAL;
+ }
+
+ if (!smu->od_settings) {
+ pr_err("OD board limits are not set!\n");
+ return -ENOENT;
+ }
+
+ od_settings = smu->od_settings;
+
+ switch (type) {
+ case PP_OD_EDIT_SCLK_VDDC_TABLE:
+ if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_GFXCLK_LIMITS)) {
+ pr_warn("GFXCLK_LIMITS not supported!\n");
+ return -ENOTSUPP;
+ }
+ if (!table_context->overdrive_table) {
+ pr_err("Overdrive is not initialized\n");
+ return -EINVAL;
+ }
+ for (i = 0; i < size; i += 2) {
+ if (i + 2 > size) {
+ pr_info("invalid number of input parameters %d\n", size);
+ return -EINVAL;
+ }
+ switch (input[i]) {
+ case 0:
+ freq_setting = SMU_11_0_ODSETTING_GFXCLKFMIN;
+ freq_ptr = &od_table->GfxclkFmin;
+ if (input[i + 1] > od_table->GfxclkFmax) {
+ pr_info("GfxclkFmin (%ld) must be <= GfxclkFmax (%u)!\n",
+ input[i + 1],
+ od_table->GfxclkFmin);
+ return -EINVAL;
+ }
+ break;
+ case 1:
+ freq_setting = SMU_11_0_ODSETTING_GFXCLKFMAX;
+ freq_ptr = &od_table->GfxclkFmax;
+ if (input[i + 1] < od_table->GfxclkFmin) {
+ pr_info("GfxclkFmax (%ld) must be >= GfxclkFmin (%u)!\n",
+ input[i + 1],
+ od_table->GfxclkFmax);
+ return -EINVAL;
+ }
+ break;
+ default:
+ pr_info("Invalid SCLK_VDDC_TABLE index: %ld\n", input[i]);
+ pr_info("Supported indices: [0:min,1:max]\n");
+ return -EINVAL;
+ }
+ ret = navi10_od_setting_check_range(od_settings, freq_setting, input[i + 1]);
+ if (ret)
+ return ret;
+ *freq_ptr = input[i + 1];
+ }
+ break;
+ case PP_OD_EDIT_MCLK_VDDC_TABLE:
+ if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_UCLK_MAX)) {
+ pr_warn("UCLK_MAX not supported!\n");
+ return -ENOTSUPP;
+ }
+ if (size < 2) {
+ pr_info("invalid number of parameters: %d\n", size);
+ return -EINVAL;
+ }
+ if (input[0] != 1) {
+ pr_info("Invalid MCLK_VDDC_TABLE index: %ld\n", input[0]);
+ pr_info("Supported indices: [1:max]\n");
+ return -EINVAL;
+ }
+ ret = navi10_od_setting_check_range(od_settings, SMU_11_0_ODSETTING_UCLKFMAX, input[1]);
+ if (ret)
+ return ret;
+ od_table->UclkFmax = input[1];
+ break;
+ case PP_OD_COMMIT_DPM_TABLE:
+ navi10_dump_od_table(od_table);
+ ret = smu_update_table(smu, SMU_TABLE_OVERDRIVE, 0, (void *)od_table, true);
+ if (ret) {
+ pr_err("Failed to import overdrive table!\n");
+ return ret;
+ }
+ // no lock needed because smu_od_edit_dpm_table has it
+ ret = smu_handle_task(smu, smu->smu_dpm.dpm_level,
+ AMD_PP_TASK_READJUST_POWER_STATE,
+ false);
+ if (ret) {
+ return ret;
+ }
+ break;
+ case PP_OD_EDIT_VDDC_CURVE:
+ if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_GFXCLK_CURVE)) {
+ pr_warn("GFXCLK_CURVE not supported!\n");
+ return -ENOTSUPP;
+ }
+ if (size < 3) {
+ pr_info("invalid number of parameters: %d\n", size);
+ return -EINVAL;
+ }
+ if (!od_table) {
+ pr_info("Overdrive is not initialized\n");
+ return -EINVAL;
+ }
+
+ switch (input[0]) {
+ case 0:
+ freq_setting = SMU_11_0_ODSETTING_VDDGFXCURVEFREQ_P1;
+ voltage_setting = SMU_11_0_ODSETTING_VDDGFXCURVEVOLTAGE_P1;
+ freq_ptr = &od_table->GfxclkFreq1;
+ voltage_ptr = &od_table->GfxclkVolt1;
+ break;
+ case 1:
+ freq_setting = SMU_11_0_ODSETTING_VDDGFXCURVEFREQ_P2;
+ voltage_setting = SMU_11_0_ODSETTING_VDDGFXCURVEVOLTAGE_P2;
+ freq_ptr = &od_table->GfxclkFreq2;
+ voltage_ptr = &od_table->GfxclkVolt2;
+ break;
+ case 2:
+ freq_setting = SMU_11_0_ODSETTING_VDDGFXCURVEFREQ_P3;
+ voltage_setting = SMU_11_0_ODSETTING_VDDGFXCURVEVOLTAGE_P3;
+ freq_ptr = &od_table->GfxclkFreq3;
+ voltage_ptr = &od_table->GfxclkVolt3;
+ break;
+ default:
+ pr_info("Invalid VDDC_CURVE index: %ld\n", input[0]);
+ pr_info("Supported indices: [0, 1, 2]\n");
+ return -EINVAL;
+ }
+ ret = navi10_od_setting_check_range(od_settings, freq_setting, input[1]);
+ if (ret)
+ return ret;
+ // Allow setting zero to disable the OverDrive VDDC curve
+ if (input[2] != 0) {
+ ret = navi10_od_setting_check_range(od_settings, voltage_setting, input[2]);
+ if (ret)
+ return ret;
+ *freq_ptr = input[1];
+ *voltage_ptr = ((uint16_t)input[2]) * NAVI10_VOLTAGE_SCALE;
+ pr_debug("OD: set curve %ld: (%d, %d)\n", input[0], *freq_ptr, *voltage_ptr);
+ } else {
+ // If setting 0, disable all voltage curve settings
+ od_table->GfxclkVolt1 = 0;
+ od_table->GfxclkVolt2 = 0;
+ od_table->GfxclkVolt3 = 0;
+ }
+ navi10_dump_od_table(od_table);
+ break;
+ default:
+ return -ENOSYS;
+ }
return ret;
}
+static int navi10_run_btc(struct smu_context *smu)
+{
+ int ret = 0;
+
+ ret = smu_send_smc_msg(smu, SMU_MSG_RunBtc);
+ if (ret)
+ pr_err("RunBtc failed!\n");
+
+ return ret;
+}
static const struct pptable_funcs navi10_ppt_funcs = {
.tables_init = navi10_tables_init,
@@ -1712,7 +2055,6 @@ static const struct pptable_funcs navi10_ppt_funcs = {
.set_tool_table_location = smu_v11_0_set_tool_table_location,
.notify_memory_pool_location = smu_v11_0_notify_memory_pool_location,
.system_features_control = smu_v11_0_system_features_control,
- .send_smc_msg = smu_v11_0_send_msg,
.send_smc_msg_with_param = smu_v11_0_send_msg_with_param,
.read_smc_arg = smu_v11_0_read_arg,
.init_display_count = smu_v11_0_init_display_count,
@@ -1742,6 +2084,10 @@ static const struct pptable_funcs navi10_ppt_funcs = {
.get_dpm_ultimate_freq = smu_v11_0_get_dpm_ultimate_freq,
.set_soft_freq_limited_range = smu_v11_0_set_soft_freq_limited_range,
.override_pcie_parameters = smu_v11_0_override_pcie_parameters,
+ .set_default_od_settings = navi10_set_default_od_settings,
+ .od_edit_dpm_table = navi10_od_edit_dpm_table,
+ .get_pptable_power_limit = navi10_get_pptable_power_limit,
+ .run_btc = navi10_run_btc,
};
void navi10_set_ppt_funcs(struct smu_context *smu)
diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.h b/drivers/gpu/drm/amd/powerplay/navi10_ppt.h
index a37e37c5f105..ec03c7992f6d 100644
--- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.h
+++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.h
@@ -33,6 +33,11 @@
#define NAVI14_UMD_PSTATE_PEAK_XTX_GFXCLK (1717)
#define NAVI14_UMD_PSTATE_PEAK_XL_GFXCLK (1448)
+#define NAVI10_VOLTAGE_SCALE (4)
+
+#define smnPCIE_LC_SPEED_CNTL 0x11140290
+#define smnPCIE_LC_LINK_WIDTH_CNTL 0x11140288
+
extern void navi10_set_ppt_funcs(struct smu_context *smu);
#endif
diff --git a/drivers/gpu/drm/amd/powerplay/renoir_ppt.c b/drivers/gpu/drm/amd/powerplay/renoir_ppt.c
index 04daf7e9fe05..977bdd962e98 100644
--- a/drivers/gpu/drm/amd/powerplay/renoir_ppt.c
+++ b/drivers/gpu/drm/amd/powerplay/renoir_ppt.c
@@ -697,7 +697,6 @@ static const struct pptable_funcs renoir_ppt_funcs = {
.check_fw_version = smu_v12_0_check_fw_version,
.powergate_sdma = smu_v12_0_powergate_sdma,
.powergate_vcn = smu_v12_0_powergate_vcn,
- .send_smc_msg = smu_v12_0_send_msg,
.send_smc_msg_with_param = smu_v12_0_send_msg_with_param,
.read_smc_arg = smu_v12_0_read_arg,
.set_gfx_cgpg = smu_v12_0_set_gfx_cgpg,
diff --git a/drivers/gpu/drm/amd/powerplay/smu_internal.h b/drivers/gpu/drm/amd/powerplay/smu_internal.h
index 8bcda7871309..8872f8b2d502 100644
--- a/drivers/gpu/drm/amd/powerplay/smu_internal.h
+++ b/drivers/gpu/drm/amd/powerplay/smu_internal.h
@@ -75,8 +75,8 @@
#define smu_set_default_od_settings(smu, initialize) \
((smu)->ppt_funcs->set_default_od_settings ? (smu)->ppt_funcs->set_default_od_settings((smu), (initialize)) : 0)
-#define smu_send_smc_msg(smu, msg) \
- ((smu)->ppt_funcs->send_smc_msg? (smu)->ppt_funcs->send_smc_msg((smu), (msg)) : 0)
+int smu_send_smc_msg(struct smu_context *smu, enum smu_message_type msg);
+
#define smu_send_smc_msg_with_param(smu, msg, param) \
((smu)->ppt_funcs->send_smc_msg_with_param? (smu)->ppt_funcs->send_smc_msg_with_param((smu), (msg), (param)) : 0)
#define smu_read_smc_arg(smu, arg) \
diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
index e859bb1132ac..e4268a627eff 100644
--- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
+++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
@@ -24,6 +24,8 @@
#include <linux/module.h>
#include <linux/pci.h>
+#define SMU_11_0_PARTIAL_PPTABLE
+
#include "pp_debug.h"
#include "amdgpu.h"
#include "amdgpu_smu.h"
@@ -31,6 +33,7 @@
#include "atomfirmware.h"
#include "amdgpu_atomfirmware.h"
#include "smu_v11_0.h"
+#include "smu_v11_0_pptable.h"
#include "soc15_common.h"
#include "atom.h"
#include "amd_pcie.h"
@@ -87,36 +90,11 @@ static int smu_v11_0_wait_for_response(struct smu_context *smu)
return RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90) == 0x1 ? 0 : -EIO;
}
-int smu_v11_0_send_msg(struct smu_context *smu, uint16_t msg)
-{
- struct amdgpu_device *adev = smu->adev;
- int ret = 0, index = 0;
-
- index = smu_msg_get_index(smu, msg);
- if (index < 0)
- return index;
-
- smu_v11_0_wait_for_response(smu);
-
- WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90, 0);
-
- smu_v11_0_send_msg_without_waiting(smu, (uint16_t)index);
-
- ret = smu_v11_0_wait_for_response(smu);
-
- if (ret)
- pr_err("failed send message: %10s (%d) response %#x\n",
- smu_get_message_name(smu, msg), index, ret);
-
- return ret;
-
-}
-
int
-smu_v11_0_send_msg_with_param(struct smu_context *smu, uint16_t msg,
+smu_v11_0_send_msg_with_param(struct smu_context *smu,
+ enum smu_message_type msg,
uint32_t param)
{
-
struct amdgpu_device *adev = smu->adev;
int ret = 0, index = 0;
@@ -1045,13 +1023,44 @@ int smu_v11_0_init_max_sustainable_clocks(struct smu_context *smu)
return 0;
}
+uint32_t smu_v11_0_get_max_power_limit(struct smu_context *smu) {
+ uint32_t od_limit, max_power_limit;
+ struct smu_11_0_powerplay_table *powerplay_table = NULL;
+ struct smu_table_context *table_context = &smu->smu_table;
+ powerplay_table = table_context->power_play_table;
+
+ max_power_limit = smu_get_pptable_power_limit(smu);
+
+ if (!max_power_limit) {
+ // If we couldn't get the table limit, fall back on first-read value
+ if (!smu->default_power_limit)
+ smu->default_power_limit = smu->power_limit;
+ max_power_limit = smu->default_power_limit;
+ }
+
+ if (smu->od_enabled) {
+ od_limit = le32_to_cpu(powerplay_table->overdrive_table.max[SMU_11_0_ODSETTING_POWERPERCENTAGE]);
+
+ pr_debug("ODSETTING_POWERPERCENTAGE: %d (default: %d)\n", od_limit, smu->default_power_limit);
+
+ max_power_limit *= (100 + od_limit);
+ max_power_limit /= 100;
+ }
+
+ return max_power_limit;
+}
+
int smu_v11_0_set_power_limit(struct smu_context *smu, uint32_t n)
{
int ret = 0;
+ uint32_t max_power_limit;
+
+ max_power_limit = smu_v11_0_get_max_power_limit(smu);
- if (n > smu->default_power_limit) {
- pr_err("New power limit is over the max allowed %d\n",
- smu->default_power_limit);
+ if (n > max_power_limit) {
+ pr_err("New power limit (%d) is over the max allowed %d\n",
+ n,
+ max_power_limit);
return -EINVAL;
}
@@ -1779,3 +1788,30 @@ int smu_v11_0_override_pcie_parameters(struct smu_context *smu)
return ret;
}
+
+int smu_v11_0_set_default_od_settings(struct smu_context *smu, bool initialize, size_t overdrive_table_size)
+{
+ struct smu_table_context *table_context = &smu->smu_table;
+ int ret = 0;
+
+ if (initialize) {
+ if (table_context->overdrive_table) {
+ return -EINVAL;
+ }
+ table_context->overdrive_table = kzalloc(overdrive_table_size, GFP_KERNEL);
+ if (!table_context->overdrive_table) {
+ return -ENOMEM;
+ }
+ ret = smu_update_table(smu, SMU_TABLE_OVERDRIVE, 0, table_context->overdrive_table, false);
+ if (ret) {
+ pr_err("Failed to export overdrive table!\n");
+ return ret;
+ }
+ }
+ ret = smu_update_table(smu, SMU_TABLE_OVERDRIVE, 0, table_context->overdrive_table, true);
+ if (ret) {
+ pr_err("Failed to import overdrive table!\n");
+ return ret;
+ }
+ return ret;
+}
diff --git a/drivers/gpu/drm/amd/powerplay/smu_v12_0.c b/drivers/gpu/drm/amd/powerplay/smu_v12_0.c
index 139dd737eaa5..094cfc46adac 100644
--- a/drivers/gpu/drm/amd/powerplay/smu_v12_0.c
+++ b/drivers/gpu/drm/amd/powerplay/smu_v12_0.c
@@ -77,33 +77,9 @@ int smu_v12_0_wait_for_response(struct smu_context *smu)
return RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90) == 0x1 ? 0 : -EIO;
}
-int smu_v12_0_send_msg(struct smu_context *smu, uint16_t msg)
-{
- struct amdgpu_device *adev = smu->adev;
- int ret = 0, index = 0;
-
- index = smu_msg_get_index(smu, msg);
- if (index < 0)
- return index;
-
- smu_v12_0_wait_for_response(smu);
-
- WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90, 0);
-
- smu_v12_0_send_msg_without_waiting(smu, (uint16_t)index);
-
- ret = smu_v12_0_wait_for_response(smu);
-
- if (ret)
- pr_err("Failed to send message 0x%x, response 0x%x\n", index,
- ret);
-
- return ret;
-
-}
-
int
-smu_v12_0_send_msg_with_param(struct smu_context *smu, uint16_t msg,
+smu_v12_0_send_msg_with_param(struct smu_context *smu,
+ enum smu_message_type msg,
uint32_t param)
{
struct amdgpu_device *adev = smu->adev;
diff --git a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c
index 5b21386f558d..60b9ff097142 100644
--- a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c
+++ b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c
@@ -466,7 +466,6 @@ static int vega20_store_powerplay_table(struct smu_context *smu)
sizeof(PPTable_t));
table_context->thermal_controller_type = powerplay_table->ucThermalControllerType;
- table_context->TDPODLimit = le32_to_cpu(powerplay_table->OverDrive8Table.ODSettingsMax[ATOM_VEGA20_ODSETTING_POWERPERCENTAGE]);
return 0;
}
@@ -3232,7 +3231,6 @@ static const struct pptable_funcs vega20_ppt_funcs = {
.set_tool_table_location = smu_v11_0_set_tool_table_location,
.notify_memory_pool_location = smu_v11_0_notify_memory_pool_location,
.system_features_control = smu_v11_0_system_features_control,
- .send_smc_msg = smu_v11_0_send_msg,
.send_smc_msg_with_param = smu_v11_0_send_msg_with_param,
.read_smc_arg = smu_v11_0_read_arg,
.init_display_count = smu_v11_0_init_display_count,
diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-i2s-audio.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-i2s-audio.c
index 20f4f92dd866..d7e65c869415 100644
--- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-i2s-audio.c
+++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-i2s-audio.c
@@ -160,12 +160,23 @@ static int dw_hdmi_i2s_get_dai_id(struct snd_soc_component *component,
return -EINVAL;
}
+static int dw_hdmi_i2s_hook_plugged_cb(struct device *dev, void *data,
+ hdmi_codec_plugged_cb fn,
+ struct device *codec_dev)
+{
+ struct dw_hdmi_i2s_audio_data *audio = data;
+ struct dw_hdmi *hdmi = audio->hdmi;
+
+ return dw_hdmi_set_plugged_cb(hdmi, fn, codec_dev);
+}
+
static struct hdmi_codec_ops dw_hdmi_i2s_ops = {
.hw_params = dw_hdmi_i2s_hw_params,
.audio_startup = dw_hdmi_i2s_audio_startup,
.audio_shutdown = dw_hdmi_i2s_audio_shutdown,
.get_eld = dw_hdmi_i2s_get_eld,
.get_dai_id = dw_hdmi_i2s_get_dai_id,
+ .hook_plugged_cb = dw_hdmi_i2s_hook_plugged_cb,
};
static int snd_dw_hdmi_probe(struct platform_device *pdev)
diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
index dbe38a54870b..67fca439bbfb 100644
--- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
+++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
@@ -194,6 +194,10 @@ struct dw_hdmi {
struct mutex cec_notifier_mutex;
struct cec_notifier *cec_notifier;
+
+ hdmi_codec_plugged_cb plugged_cb;
+ struct device *codec_dev;
+ enum drm_connector_status last_connector_result;
};
#define HDMI_IH_PHY_STAT0_RX_SENSE \
@@ -218,6 +222,28 @@ static inline u8 hdmi_readb(struct dw_hdmi *hdmi, int offset)
return val;
}
+static void handle_plugged_change(struct dw_hdmi *hdmi, bool plugged)
+{
+ if (hdmi->plugged_cb && hdmi->codec_dev)
+ hdmi->plugged_cb(hdmi->codec_dev, plugged);
+}
+
+int dw_hdmi_set_plugged_cb(struct dw_hdmi *hdmi, hdmi_codec_plugged_cb fn,
+ struct device *codec_dev)
+{
+ bool plugged;
+
+ mutex_lock(&hdmi->mutex);
+ hdmi->plugged_cb = fn;
+ hdmi->codec_dev = codec_dev;
+ plugged = hdmi->last_connector_result == connector_status_connected;
+ handle_plugged_change(hdmi, plugged);
+ mutex_unlock(&hdmi->mutex);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(dw_hdmi_set_plugged_cb);
+
static void hdmi_modb(struct dw_hdmi *hdmi, u8 data, u8 mask, unsigned reg)
{
regmap_update_bits(hdmi->regm, reg << hdmi->reg_shift, mask, data);
@@ -2229,6 +2255,7 @@ dw_hdmi_connector_detect(struct drm_connector *connector, bool force)
{
struct dw_hdmi *hdmi = container_of(connector, struct dw_hdmi,
connector);
+ enum drm_connector_status result;
mutex_lock(&hdmi->mutex);
hdmi->force = DRM_FORCE_UNSPECIFIED;
@@ -2236,7 +2263,18 @@ dw_hdmi_connector_detect(struct drm_connector *connector, bool force)
dw_hdmi_update_phy_mask(hdmi);
mutex_unlock(&hdmi->mutex);
- return hdmi->phy.ops->read_hpd(hdmi, hdmi->phy.data);
+ result = hdmi->phy.ops->read_hpd(hdmi, hdmi->phy.data);
+
+ mutex_lock(&hdmi->mutex);
+ if (result != hdmi->last_connector_result) {
+ dev_dbg(hdmi->dev, "read_hpd result: %d", result);
+ handle_plugged_change(hdmi,
+ result == connector_status_connected);
+ hdmi->last_connector_result = result;
+ }
+ mutex_unlock(&hdmi->mutex);
+
+ return result;
}
static int dw_hdmi_connector_get_modes(struct drm_connector *connector)
@@ -2731,6 +2769,7 @@ __dw_hdmi_probe(struct platform_device *pdev,
hdmi->rxsense = true;
hdmi->phy_mask = (u8)~(HDMI_PHY_HPD | HDMI_PHY_RX_SENSE);
hdmi->mc_clkdis = 0x7f;
+ hdmi->last_connector_result = connector_status_disconnected;
mutex_init(&hdmi->mutex);
mutex_init(&hdmi->audio_mutex);
diff --git a/drivers/gpu/drm/bridge/ti-tfp410.c b/drivers/gpu/drm/bridge/ti-tfp410.c
index aa3198dc9903..6f6d6d1e60ae 100644
--- a/drivers/gpu/drm/bridge/ti-tfp410.c
+++ b/drivers/gpu/drm/bridge/ti-tfp410.c
@@ -285,8 +285,8 @@ static int tfp410_get_connector_properties(struct tfp410 *dvi)
else
dvi->connector_type = DRM_MODE_CONNECTOR_DVID;
- dvi->hpd = fwnode_get_named_gpiod(&connector_node->fwnode,
- "hpd-gpios", 0, GPIOD_IN, "hpd");
+ dvi->hpd = fwnode_gpiod_get_index(&connector_node->fwnode,
+ "hpd", 0, GPIOD_IN, "hpd");
if (IS_ERR(dvi->hpd)) {
ret = PTR_ERR(dvi->hpd);
dvi->hpd = NULL;
diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c
index 0965632008a9..2166000ed057 100644
--- a/drivers/gpu/drm/drm_connector.c
+++ b/drivers/gpu/drm/drm_connector.c
@@ -712,7 +712,7 @@ void drm_connector_list_iter_end(struct drm_connector_list_iter *iter)
__drm_connector_put_safe(iter->conn);
spin_unlock_irqrestore(&config->connector_list_lock, flags);
}
- lock_release(&connector_list_iter_dep_map, 0, _RET_IP_);
+ lock_release(&connector_list_iter_dep_map, _RET_IP_);
}
EXPORT_SYMBOL(drm_connector_list_iter_end);
diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c
index ae5809a1f19a..273dd80fabf3 100644
--- a/drivers/gpu/drm/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/drm_dp_mst_topology.c
@@ -3176,9 +3176,11 @@ int drm_dp_update_payload_part1(struct drm_dp_mst_topology_mgr *mgr)
drm_dp_mst_topology_put_port(port);
}
- for (i = 0; i < mgr->max_payloads; i++) {
- if (mgr->payloads[i].payload_state != DP_PAYLOAD_DELETE_LOCAL)
+ for (i = 0; i < mgr->max_payloads; /* do nothing */) {
+ if (mgr->payloads[i].payload_state != DP_PAYLOAD_DELETE_LOCAL) {
+ i++;
continue;
+ }
DRM_DEBUG_KMS("removing payload %d\n", i);
for (j = i; j < mgr->max_payloads - 1; j++) {
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 2f2b889096b0..000fa4a1899f 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -1105,21 +1105,33 @@ int drm_gem_mmap_obj(struct drm_gem_object *obj, unsigned long obj_size,
if (obj_size < vma->vm_end - vma->vm_start)
return -EINVAL;
+ /* Take a ref for this mapping of the object, so that the fault
+ * handler can dereference the mmap offset's pointer to the object.
+ * This reference is cleaned up by the corresponding vm_close
+ * (which should happen whether the vma was created by this call, or
+ * by a vm_open due to mremap or partial unmap or whatever).
+ */
+ drm_gem_object_get(obj);
+
if (obj->funcs && obj->funcs->mmap) {
/* Remove the fake offset */
vma->vm_pgoff -= drm_vma_node_start(&obj->vma_node);
ret = obj->funcs->mmap(obj, vma);
- if (ret)
+ if (ret) {
+ drm_gem_object_put_unlocked(obj);
return ret;
+ }
WARN_ON(!(vma->vm_flags & VM_DONTEXPAND));
} else {
if (obj->funcs && obj->funcs->vm_ops)
vma->vm_ops = obj->funcs->vm_ops;
else if (dev->driver->gem_vm_ops)
vma->vm_ops = dev->driver->gem_vm_ops;
- else
+ else {
+ drm_gem_object_put_unlocked(obj);
return -EINVAL;
+ }
vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
@@ -1128,14 +1140,6 @@ int drm_gem_mmap_obj(struct drm_gem_object *obj, unsigned long obj_size,
vma->vm_private_data = obj;
- /* Take a ref for this mapping of the object, so that the fault
- * handler can dereference the mmap offset's pointer to the object.
- * This reference is cleaned up by the corresponding vm_close
- * (which should happen whether the vma was created by this call, or
- * by a vm_open due to mremap or partial unmap or whatever).
- */
- drm_gem_object_get(obj);
-
return 0;
}
EXPORT_SYMBOL(drm_gem_mmap_obj);
diff --git a/drivers/gpu/drm/drm_gem_ttm_helper.c b/drivers/gpu/drm/drm_gem_ttm_helper.c
index 7412bfc5c05a..605a8a3da7f9 100644
--- a/drivers/gpu/drm/drm_gem_ttm_helper.c
+++ b/drivers/gpu/drm/drm_gem_ttm_helper.c
@@ -64,8 +64,19 @@ int drm_gem_ttm_mmap(struct drm_gem_object *gem,
struct vm_area_struct *vma)
{
struct ttm_buffer_object *bo = drm_gem_ttm_of_gem(gem);
+ int ret;
- return ttm_bo_mmap_obj(vma, bo);
+ ret = ttm_bo_mmap_obj(vma, bo);
+ if (ret < 0)
+ return ret;
+
+ /*
+ * ttm has its own object refcounting, so drop gem reference
+ * to avoid double accounting counting.
+ */
+ drm_gem_object_put_unlocked(gem);
+
+ return 0;
}
EXPORT_SYMBOL(drm_gem_ttm_mmap);
diff --git a/drivers/gpu/drm/drm_property.c b/drivers/gpu/drm/drm_property.c
index 892ce636ef72..6ee04803c362 100644
--- a/drivers/gpu/drm/drm_property.c
+++ b/drivers/gpu/drm/drm_property.c
@@ -561,7 +561,7 @@ drm_property_create_blob(struct drm_device *dev, size_t length,
struct drm_property_blob *blob;
int ret;
- if (!length || length > ULONG_MAX - sizeof(struct drm_property_blob))
+ if (!length || length > INT_MAX - sizeof(struct drm_property_blob))
return ERR_PTR(-EINVAL);
blob = kvzalloc(sizeof(struct drm_property_blob)+length, GFP_KERNEL);
diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug
index a1cf437d2e99..1cb28c20807c 100644
--- a/drivers/gpu/drm/i915/Kconfig.debug
+++ b/drivers/gpu/drm/i915/Kconfig.debug
@@ -7,7 +7,6 @@ config DRM_I915_WERROR
# We use the dependency on !COMPILE_TEST to not be enabled in
# allmodconfig or allyesconfig configurations
depends on !COMPILE_TEST
- select HEADER_TEST
default n
help
Add -Werror to the build flags for (and only for) i915.ko.
@@ -22,7 +21,6 @@ config DRM_I915_DEBUG
depends on DRM_I915
select DEBUG_FS
select PREEMPT_COUNT
- select REFCOUNT_FULL
select I2C_CHARDEV
select STACKDEPOT
select DRM_DP_AUX_CHARDEV
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index ae5cae1fe503..46b4d1d643f8 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -1738,7 +1738,7 @@ replace:
i915_gem_context_set_user_engines(ctx);
else
i915_gem_context_clear_user_engines(ctx);
- rcu_swap_protected(ctx->engines, set.engines, 1);
+ set.engines = rcu_replace_pointer(ctx->engines, set.engines, 1);
mutex_unlock(&ctx->engines_mutex);
call_rcu(&set.engines->rcu, free_engines_rcu);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
index 066b3df677e8..f7e4b39c734f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -436,12 +436,12 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915,
fs_reclaim_acquire(GFP_KERNEL);
mutex_acquire(&mutex->dep_map, 0, 0, _RET_IP_);
- mutex_release(&mutex->dep_map, 0, _RET_IP_);
+ mutex_release(&mutex->dep_map, _RET_IP_);
fs_reclaim_release(GFP_KERNEL);
if (unlock)
- mutex_release(&i915->drm.struct_mutex.dep_map, 0, _RET_IP_);
+ mutex_release(&i915->drm.struct_mutex.dep_map, _RET_IP_);
}
#define obj_to_i915(obj__) to_i915((obj__)->base.dev)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index 21183ad87368..889eb37e386a 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -55,7 +55,7 @@ static inline unsigned long __timeline_mark_lock(struct intel_context *ce)
static inline void __timeline_mark_unlock(struct intel_context *ce,
unsigned long flags)
{
- mutex_release(&ce->timeline->mutex.dep_map, 0, _THIS_IP_);
+ mutex_release(&ce->timeline->mutex.dep_map, _THIS_IP_);
local_irq_restore(flags);
}
diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index 6a3ac8cde95d..21a176cd8acc 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -1599,9 +1599,9 @@ static int cmd_handler_mi_op_2f(struct parser_exec_state *s)
if (!(cmd_val(s, 0) & (1 << 22)))
return ret;
- /* check if QWORD */
- if (DWORD_FIELD(0, 20, 19) == 1)
- valid_len += 8;
+ /* check inline data */
+ if (cmd_val(s, 0) & BIT(18))
+ valid_len = CMD_LEN(9);
ret = gvt_check_valid_cmd_length(cmd_length(s),
valid_len);
if (ret)
diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.c b/drivers/gpu/drm/i915/gvt/dmabuf.c
index a816aef6142b..e451298d11c3 100644
--- a/drivers/gpu/drm/i915/gvt/dmabuf.c
+++ b/drivers/gpu/drm/i915/gvt/dmabuf.c
@@ -499,8 +499,6 @@ int intel_vgpu_get_dmabuf(struct intel_vgpu *vgpu, unsigned int dmabuf_id)
goto out_free_gem;
}
- i915_gem_object_put(obj);
-
ret = dma_buf_fd(dmabuf, DRM_CLOEXEC | DRM_RDWR);
if (ret < 0) {
gvt_vgpu_err("create dma-buf fd failed ret:%d\n", ret);
@@ -525,6 +523,8 @@ int intel_vgpu_get_dmabuf(struct intel_vgpu *vgpu, unsigned int dmabuf_id)
file_count(dmabuf->file),
kref_read(&obj->base.refcount));
+ i915_gem_object_put(obj);
+
return dmabuf_fd;
out_free_dmabuf:
diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index afd7f66bdc2d..bb9fe6bf5275 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -460,6 +460,7 @@ static int pipeconf_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
static i915_reg_t force_nonpriv_white_list[] = {
GEN9_CS_DEBUG_MODE1, //_MMIO(0x20ec)
GEN9_CTX_PREEMPT_REG,//_MMIO(0x2248)
+ PS_INVOCATION_COUNT,//_MMIO(0x2348)
GEN8_CS_CHICKEN1,//_MMIO(0x2580)
_MMIO(0x2690),
_MMIO(0x2694),
@@ -508,7 +509,7 @@ static inline bool in_whitelist(unsigned int reg)
static int force_nonpriv_write(struct intel_vgpu *vgpu,
unsigned int offset, void *p_data, unsigned int bytes)
{
- u32 reg_nonpriv = *(u32 *)p_data;
+ u32 reg_nonpriv = (*(u32 *)p_data) & REG_GENMASK(25, 2);
int ring_id = intel_gvt_render_mmio_to_ring_id(vgpu->gvt, offset);
u32 ring_base;
struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
@@ -528,7 +529,7 @@ static int force_nonpriv_write(struct intel_vgpu *vgpu,
bytes);
} else
gvt_err("vgpu(%d) Invalid FORCE_NONPRIV write %x at offset %x\n",
- vgpu->id, reg_nonpriv, offset);
+ vgpu->id, *(u32 *)p_data, offset);
return 0;
}
@@ -3420,6 +3421,10 @@ int intel_gvt_for_each_tracked_mmio(struct intel_gvt *gvt,
}
for (i = 0; i < gvt->mmio.num_mmio_block; i++, block++) {
+ /* pvinfo data doesn't come from hw mmio */
+ if (i915_mmio_reg_offset(block->offset) == VGT_PVINFO_PAGE)
+ continue;
+
for (j = 0; j < block->size; j += 4) {
ret = handler(gvt,
i915_mmio_reg_offset(block->offset) + j,
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 3fa1650975b8..ddc6c311349c 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1519,7 +1519,7 @@ long i915_request_wait(struct i915_request *rq,
dma_fence_remove_callback(&rq->fence, &wait.cb);
out:
- mutex_release(&rq->engine->gt->reset.mutex.dep_map, 0, _THIS_IP_);
+ mutex_release(&rq->engine->gt->reset.mutex.dep_map, _THIS_IP_);
trace_i915_request_wait_end(rq);
return timeout;
}
diff --git a/drivers/gpu/drm/mgag200/mgag200_drv.c b/drivers/gpu/drm/mgag200/mgag200_drv.c
index 397f8b0a9af8..d43951caeea0 100644
--- a/drivers/gpu/drm/mgag200/mgag200_drv.c
+++ b/drivers/gpu/drm/mgag200/mgag200_drv.c
@@ -30,6 +30,8 @@ module_param_named(modeset, mgag200_modeset, int, 0400);
static struct drm_driver driver;
static const struct pci_device_id pciidlist[] = {
+ { PCI_VENDOR_ID_MATROX, 0x522, PCI_VENDOR_ID_SUN, 0x4852, 0, 0,
+ G200_SE_A | MGAG200_FLAG_HW_BUG_NO_STARTADD},
{ PCI_VENDOR_ID_MATROX, 0x522, PCI_ANY_ID, PCI_ANY_ID, 0, 0, G200_SE_A },
{ PCI_VENDOR_ID_MATROX, 0x524, PCI_ANY_ID, PCI_ANY_ID, 0, 0, G200_SE_B },
{ PCI_VENDOR_ID_MATROX, 0x530, PCI_ANY_ID, PCI_ANY_ID, 0, 0, G200_EV },
@@ -60,6 +62,35 @@ static void mga_pci_remove(struct pci_dev *pdev)
DEFINE_DRM_GEM_FOPS(mgag200_driver_fops);
+static bool mgag200_pin_bo_at_0(const struct mga_device *mdev)
+{
+ return mdev->flags & MGAG200_FLAG_HW_BUG_NO_STARTADD;
+}
+
+int mgag200_driver_dumb_create(struct drm_file *file,
+ struct drm_device *dev,
+ struct drm_mode_create_dumb *args)
+{
+ struct mga_device *mdev = dev->dev_private;
+ unsigned long pg_align;
+
+ if (WARN_ONCE(!dev->vram_mm, "VRAM MM not initialized"))
+ return -EINVAL;
+
+ pg_align = 0ul;
+
+ /*
+ * Aligning scanout buffers to the size of the video ram forces
+ * placement at offset 0. Works around a bug where HW does not
+ * respect 'startadd' field.
+ */
+ if (mgag200_pin_bo_at_0(mdev))
+ pg_align = PFN_UP(mdev->mc.vram_size);
+
+ return drm_gem_vram_fill_create_dumb(file, dev, &dev->vram_mm->bdev,
+ pg_align, false, args);
+}
+
static struct drm_driver driver = {
.driver_features = DRIVER_GEM | DRIVER_MODESET,
.load = mgag200_driver_load,
@@ -71,7 +102,10 @@ static struct drm_driver driver = {
.major = DRIVER_MAJOR,
.minor = DRIVER_MINOR,
.patchlevel = DRIVER_PATCHLEVEL,
- DRM_GEM_VRAM_DRIVER
+ .debugfs_init = drm_vram_mm_debugfs_init,
+ .dumb_create = mgag200_driver_dumb_create,
+ .dumb_map_offset = drm_gem_vram_driver_dumb_mmap_offset,
+ .gem_prime_mmap = drm_gem_prime_mmap,
};
static struct pci_driver mgag200_pci_driver = {
diff --git a/drivers/gpu/drm/mgag200/mgag200_drv.h b/drivers/gpu/drm/mgag200/mgag200_drv.h
index 0ea9a525e57d..aa32aad222c2 100644
--- a/drivers/gpu/drm/mgag200/mgag200_drv.h
+++ b/drivers/gpu/drm/mgag200/mgag200_drv.h
@@ -150,6 +150,12 @@ enum mga_type {
G200_EW3,
};
+/* HW does not handle 'startadd' field correct. */
+#define MGAG200_FLAG_HW_BUG_NO_STARTADD (1ul << 8)
+
+#define MGAG200_TYPE_MASK (0x000000ff)
+#define MGAG200_FLAG_MASK (0x00ffff00)
+
#define IS_G200_SE(mdev) (mdev->type == G200_SE_A || mdev->type == G200_SE_B)
struct mga_device {
@@ -181,6 +187,18 @@ struct mga_device {
u32 unique_rev_id;
};
+static inline enum mga_type
+mgag200_type_from_driver_data(kernel_ulong_t driver_data)
+{
+ return (enum mga_type)(driver_data & MGAG200_TYPE_MASK);
+}
+
+static inline unsigned long
+mgag200_flags_from_driver_data(kernel_ulong_t driver_data)
+{
+ return driver_data & MGAG200_FLAG_MASK;
+}
+
/* mgag200_mode.c */
int mgag200_modeset_init(struct mga_device *mdev);
void mgag200_modeset_fini(struct mga_device *mdev);
diff --git a/drivers/gpu/drm/mgag200/mgag200_main.c b/drivers/gpu/drm/mgag200/mgag200_main.c
index 5f74aabcd3df..e1bc5b0aa774 100644
--- a/drivers/gpu/drm/mgag200/mgag200_main.c
+++ b/drivers/gpu/drm/mgag200/mgag200_main.c
@@ -94,7 +94,8 @@ static int mgag200_device_init(struct drm_device *dev,
struct mga_device *mdev = dev->dev_private;
int ret, option;
- mdev->type = flags;
+ mdev->flags = mgag200_flags_from_driver_data(flags);
+ mdev->type = mgag200_type_from_driver_data(flags);
/* Hardcode the number of CRTCs to 1 */
mdev->num_crtc = 1;
diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig
index e9160ce39cbb..6deaa7d01654 100644
--- a/drivers/gpu/drm/msm/Kconfig
+++ b/drivers/gpu/drm/msm/Kconfig
@@ -7,6 +7,7 @@ config DRM_MSM
depends on OF && COMMON_CLK
depends on MMU
depends on INTERCONNECT || !INTERCONNECT
+ depends on QCOM_OCMEM || QCOM_OCMEM=n
select QCOM_MDT_LOADER if ARCH_QCOM
select REGULATOR
select DRM_KMS_HELPER
diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
index 5f7e98028eaf..7ad14937fcdf 100644
--- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
@@ -6,10 +6,6 @@
* Copyright (c) 2014 The Linux Foundation. All rights reserved.
*/
-#ifdef CONFIG_MSM_OCMEM
-# include <mach/ocmem.h>
-#endif
-
#include "a3xx_gpu.h"
#define A3XX_INT0_MASK \
@@ -195,9 +191,9 @@ static int a3xx_hw_init(struct msm_gpu *gpu)
gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x00000000);
/* Set the OCMEM base address for A330, etc */
- if (a3xx_gpu->ocmem_hdl) {
+ if (a3xx_gpu->ocmem.hdl) {
gpu_write(gpu, REG_A3XX_RB_GMEM_BASE_ADDR,
- (unsigned int)(a3xx_gpu->ocmem_base >> 14));
+ (unsigned int)(a3xx_gpu->ocmem.base >> 14));
}
/* Turn on performance counters: */
@@ -318,10 +314,7 @@ static void a3xx_destroy(struct msm_gpu *gpu)
adreno_gpu_cleanup(adreno_gpu);
-#ifdef CONFIG_MSM_OCMEM
- if (a3xx_gpu->ocmem_base)
- ocmem_free(OCMEM_GRAPHICS, a3xx_gpu->ocmem_hdl);
-#endif
+ adreno_gpu_ocmem_cleanup(&a3xx_gpu->ocmem);
kfree(a3xx_gpu);
}
@@ -494,17 +487,10 @@ struct msm_gpu *a3xx_gpu_init(struct drm_device *dev)
/* if needed, allocate gmem: */
if (adreno_is_a330(adreno_gpu)) {
-#ifdef CONFIG_MSM_OCMEM
- /* TODO this is different/missing upstream: */
- struct ocmem_buf *ocmem_hdl =
- ocmem_allocate(OCMEM_GRAPHICS, adreno_gpu->gmem);
-
- a3xx_gpu->ocmem_hdl = ocmem_hdl;
- a3xx_gpu->ocmem_base = ocmem_hdl->addr;
- adreno_gpu->gmem = ocmem_hdl->len;
- DBG("using %dK of OCMEM at 0x%08x", adreno_gpu->gmem / 1024,
- a3xx_gpu->ocmem_base);
-#endif
+ ret = adreno_gpu_ocmem_init(&adreno_gpu->base.pdev->dev,
+ adreno_gpu, &a3xx_gpu->ocmem);
+ if (ret)
+ goto fail;
}
if (!gpu->aspace) {
diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.h b/drivers/gpu/drm/msm/adreno/a3xx_gpu.h
index 5dc33e5ea53b..c555fb13e0d7 100644
--- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.h
@@ -19,8 +19,7 @@ struct a3xx_gpu {
struct adreno_gpu base;
/* if OCMEM is used for GMEM: */
- uint32_t ocmem_base;
- void *ocmem_hdl;
+ struct adreno_ocmem ocmem;
};
#define to_a3xx_gpu(x) container_of(x, struct a3xx_gpu, base)
diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
index ab2b752566d8..b01388a9e89e 100644
--- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
@@ -2,9 +2,6 @@
/* Copyright (c) 2014 The Linux Foundation. All rights reserved.
*/
#include "a4xx_gpu.h"
-#ifdef CONFIG_MSM_OCMEM
-# include <soc/qcom/ocmem.h>
-#endif
#define A4XX_INT0_MASK \
(A4XX_INT0_RBBM_AHB_ERROR | \
@@ -188,7 +185,7 @@ static int a4xx_hw_init(struct msm_gpu *gpu)
(1 << 30) | 0xFFFF);
gpu_write(gpu, REG_A4XX_RB_GMEM_BASE_ADDR,
- (unsigned int)(a4xx_gpu->ocmem_base >> 14));
+ (unsigned int)(a4xx_gpu->ocmem.base >> 14));
/* Turn on performance counters: */
gpu_write(gpu, REG_A4XX_RBBM_PERFCTR_CTL, 0x01);
@@ -318,10 +315,7 @@ static void a4xx_destroy(struct msm_gpu *gpu)
adreno_gpu_cleanup(adreno_gpu);
-#ifdef CONFIG_MSM_OCMEM
- if (a4xx_gpu->ocmem_base)
- ocmem_free(OCMEM_GRAPHICS, a4xx_gpu->ocmem_hdl);
-#endif
+ adreno_gpu_ocmem_cleanup(&a4xx_gpu->ocmem);
kfree(a4xx_gpu);
}
@@ -578,17 +572,10 @@ struct msm_gpu *a4xx_gpu_init(struct drm_device *dev)
/* if needed, allocate gmem: */
if (adreno_is_a4xx(adreno_gpu)) {
-#ifdef CONFIG_MSM_OCMEM
- /* TODO this is different/missing upstream: */
- struct ocmem_buf *ocmem_hdl =
- ocmem_allocate(OCMEM_GRAPHICS, adreno_gpu->gmem);
-
- a4xx_gpu->ocmem_hdl = ocmem_hdl;
- a4xx_gpu->ocmem_base = ocmem_hdl->addr;
- adreno_gpu->gmem = ocmem_hdl->len;
- DBG("using %dK of OCMEM at 0x%08x", adreno_gpu->gmem / 1024,
- a4xx_gpu->ocmem_base);
-#endif
+ ret = adreno_gpu_ocmem_init(dev->dev, adreno_gpu,
+ &a4xx_gpu->ocmem);
+ if (ret)
+ goto fail;
}
if (!gpu->aspace) {
diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.h b/drivers/gpu/drm/msm/adreno/a4xx_gpu.h
index d506311ee240..a01448cba2ea 100644
--- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.h
@@ -16,8 +16,7 @@ struct a4xx_gpu {
struct adreno_gpu base;
/* if OCMEM is used for GMEM: */
- uint32_t ocmem_base;
- void *ocmem_hdl;
+ struct adreno_ocmem ocmem;
};
#define to_a4xx_gpu(x) container_of(x, struct a4xx_gpu, base)
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index e9c55d1d6c04..b02e2042547f 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -353,6 +353,9 @@ static int a5xx_me_init(struct msm_gpu *gpu)
* 2D mode 3 draw
*/
OUT_RING(ring, 0x0000000B);
+ } else if (adreno_is_a510(adreno_gpu)) {
+ /* Workaround for token and syncs */
+ OUT_RING(ring, 0x00000001);
} else {
/* No workarounds enabled */
OUT_RING(ring, 0x00000000);
@@ -568,15 +571,24 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
0x00100000 + adreno_gpu->gmem - 1);
gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000);
- gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40);
- if (adreno_is_a530(adreno_gpu))
- gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40);
- if (adreno_is_a540(adreno_gpu))
- gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x400);
- gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060);
- gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16);
-
- gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, (0x400 << 11 | 0x300 << 22));
+ if (adreno_is_a510(adreno_gpu)) {
+ gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x20);
+ gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x20);
+ gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x40000030);
+ gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x20100D0A);
+ gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
+ (0x200 << 11 | 0x200 << 22));
+ } else {
+ gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40);
+ if (adreno_is_a530(adreno_gpu))
+ gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40);
+ if (adreno_is_a540(adreno_gpu))
+ gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x400);
+ gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060);
+ gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16);
+ gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
+ (0x400 << 11 | 0x300 << 22));
+ }
if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI)
gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8));
@@ -589,6 +601,19 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
/* Enable ME/PFP split notification */
gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF);
+ /*
+ * In A5x, CCU can send context_done event of a particular context to
+ * UCHE which ultimately reaches CP even when there is valid
+ * transaction of that context inside CCU. This can let CP to program
+ * config registers, which will make the "valid transaction" inside
+ * CCU to be interpreted differently. This can cause gpu fault. This
+ * bug is fixed in latest A510 revision. To enable this bug fix -
+ * bit[11] of RB_DBG_ECO_CNTL need to be set to 0, default is 1
+ * (disable). For older A510 version this bit is unused.
+ */
+ if (adreno_is_a510(adreno_gpu))
+ gpu_rmw(gpu, REG_A5XX_RB_DBG_ECO_CNTL, (1 << 11), 0);
+
/* Enable HWCG */
a5xx_set_hwcg(gpu, true);
@@ -635,7 +660,7 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
/* UCHE */
gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16));
- if (adreno_is_a530(adreno_gpu))
+ if (adreno_is_a530(adreno_gpu) || adreno_is_a510(adreno_gpu))
gpu_write(gpu, REG_A5XX_CP_PROTECT(17),
ADRENO_PROTECT_RW(0x10000, 0x8000));
@@ -679,7 +704,8 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
a5xx_preempt_hw_init(gpu);
- a5xx_gpmu_ucode_init(gpu);
+ if (!adreno_is_a510(adreno_gpu))
+ a5xx_gpmu_ucode_init(gpu);
ret = a5xx_ucode_init(gpu);
if (ret)
@@ -712,7 +738,8 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
}
/*
- * Try to load a zap shader into the secure world. If successful
+ * If the chip that we are using does support loading one, then
+ * try to load a zap shader into the secure world. If successful
* we can use the CP to switch out of secure mode. If not then we
* have no resource but to try to switch ourselves out manually. If we
* guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
@@ -1066,6 +1093,7 @@ static void a5xx_dump(struct msm_gpu *gpu)
static int a5xx_pm_resume(struct msm_gpu *gpu)
{
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
int ret;
/* Turn on the core power */
@@ -1073,6 +1101,15 @@ static int a5xx_pm_resume(struct msm_gpu *gpu)
if (ret)
return ret;
+ if (adreno_is_a510(adreno_gpu)) {
+ /* Halt the sp_input_clk at HM level */
+ gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0x00000055);
+ a5xx_set_hwcg(gpu, true);
+ /* Turn on sp_input_clk at HM level */
+ gpu_rmw(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0xff, 0);
+ return 0;
+ }
+
/* Turn the RBCCU domain first to limit the chances of voltage droop */
gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000);
@@ -1101,9 +1138,17 @@ static int a5xx_pm_resume(struct msm_gpu *gpu)
static int a5xx_pm_suspend(struct msm_gpu *gpu)
{
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+ u32 mask = 0xf;
+
+ /* A510 has 3 XIN ports in VBIF */
+ if (adreno_is_a510(adreno_gpu))
+ mask = 0x7;
+
/* Clear the VBIF pipe before shutting down */
- gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0xF);
- spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) & 0xF) == 0xF);
+ gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, mask);
+ spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) &
+ mask) == mask);
gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0);
@@ -1289,7 +1334,7 @@ static void a5xx_gpu_state_destroy(struct kref *kref)
kfree(a5xx_state);
}
-int a5xx_gpu_state_put(struct msm_gpu_state *state)
+static int a5xx_gpu_state_put(struct msm_gpu_state *state)
{
if (IS_ERR_OR_NULL(state))
return 1;
@@ -1299,8 +1344,8 @@ int a5xx_gpu_state_put(struct msm_gpu_state *state)
#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
-void a5xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
- struct drm_printer *p)
+static void a5xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
+ struct drm_printer *p)
{
int i, j;
u32 pos = 0;
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_power.c b/drivers/gpu/drm/msm/adreno/a5xx_power.c
index a3a06db675ba..321a8061fd32 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_power.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_power.c
@@ -297,6 +297,10 @@ int a5xx_power_init(struct msm_gpu *gpu)
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
int ret;
+ /* Not all A5xx chips have a GPMU */
+ if (adreno_is_a510(adreno_gpu))
+ return 0;
+
/* Set up the limits management */
if (adreno_is_a530(adreno_gpu))
a530_lm_setup(gpu);
@@ -326,6 +330,9 @@ void a5xx_gpmu_ucode_init(struct msm_gpu *gpu)
unsigned int *data, *ptr, *cmds;
unsigned int cmds_size;
+ if (adreno_is_a510(adreno_gpu))
+ return;
+
if (a5xx_gpu->gpmu_bo)
return;
diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c
index 0888e0df660d..fbbdf86504f5 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_device.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_device.c
@@ -115,6 +115,21 @@ static const struct adreno_info gpulist[] = {
.inactive_period = DRM_MSM_INACTIVE_PERIOD,
.init = a4xx_gpu_init,
}, {
+ .rev = ADRENO_REV(5, 1, 0, ANY_ID),
+ .revn = 510,
+ .name = "A510",
+ .fw = {
+ [ADRENO_FW_PM4] = "a530_pm4.fw",
+ [ADRENO_FW_PFP] = "a530_pfp.fw",
+ },
+ .gmem = SZ_256K,
+ /*
+ * Increase inactive period to 250 to avoid bouncing
+ * the GDSC which appears to make it grumpy
+ */
+ .inactive_period = 250,
+ .init = a5xx_gpu_init,
+ }, {
.rev = ADRENO_REV(5, 3, 0, 2),
.revn = 530,
.name = "A530",
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index 048c8be426f3..0783e4b5486a 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -14,6 +14,7 @@
#include <linux/pm_opp.h>
#include <linux/slab.h>
#include <linux/soc/qcom/mdt_loader.h>
+#include <soc/qcom/ocmem.h>
#include "adreno_gpu.h"
#include "msm_gem.h"
#include "msm_mmu.h"
@@ -893,6 +894,45 @@ static int adreno_get_pwrlevels(struct device *dev,
return 0;
}
+int adreno_gpu_ocmem_init(struct device *dev, struct adreno_gpu *adreno_gpu,
+ struct adreno_ocmem *adreno_ocmem)
+{
+ struct ocmem_buf *ocmem_hdl;
+ struct ocmem *ocmem;
+
+ ocmem = of_get_ocmem(dev);
+ if (IS_ERR(ocmem)) {
+ if (PTR_ERR(ocmem) == -ENODEV) {
+ /*
+ * Return success since either the ocmem property was
+ * not specified in device tree, or ocmem support is
+ * not compiled into the kernel.
+ */
+ return 0;
+ }
+
+ return PTR_ERR(ocmem);
+ }
+
+ ocmem_hdl = ocmem_allocate(ocmem, OCMEM_GRAPHICS, adreno_gpu->gmem);
+ if (IS_ERR(ocmem_hdl))
+ return PTR_ERR(ocmem_hdl);
+
+ adreno_ocmem->ocmem = ocmem;
+ adreno_ocmem->base = ocmem_hdl->addr;
+ adreno_ocmem->hdl = ocmem_hdl;
+ adreno_gpu->gmem = ocmem_hdl->len;
+
+ return 0;
+}
+
+void adreno_gpu_ocmem_cleanup(struct adreno_ocmem *adreno_ocmem)
+{
+ if (adreno_ocmem && adreno_ocmem->base)
+ ocmem_free(adreno_ocmem->ocmem, OCMEM_GRAPHICS,
+ adreno_ocmem->hdl);
+}
+
int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev,
struct adreno_gpu *adreno_gpu,
const struct adreno_gpu_funcs *funcs, int nr_rings)
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
index c7441fb8313e..e71a7570ef72 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
@@ -126,6 +126,12 @@ struct adreno_gpu {
};
#define to_adreno_gpu(x) container_of(x, struct adreno_gpu, base)
+struct adreno_ocmem {
+ struct ocmem *ocmem;
+ unsigned long base;
+ void *hdl;
+};
+
/* platform config data (ie. from DT, or pdata) */
struct adreno_platform_config {
struct adreno_rev rev;
@@ -206,6 +212,11 @@ static inline int adreno_is_a430(struct adreno_gpu *gpu)
return gpu->revn == 430;
}
+static inline int adreno_is_a510(struct adreno_gpu *gpu)
+{
+ return gpu->revn == 510;
+}
+
static inline int adreno_is_a530(struct adreno_gpu *gpu)
{
return gpu->revn == 530;
@@ -236,6 +247,10 @@ void adreno_dump(struct msm_gpu *gpu);
void adreno_wait_ring(struct msm_ringbuffer *ring, uint32_t ndwords);
struct msm_ringbuffer *adreno_active_ring(struct msm_gpu *gpu);
+int adreno_gpu_ocmem_init(struct device *dev, struct adreno_gpu *adreno_gpu,
+ struct adreno_ocmem *ocmem);
+void adreno_gpu_ocmem_cleanup(struct adreno_ocmem *ocmem);
+
int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev,
struct adreno_gpu *gpu, const struct adreno_gpu_funcs *funcs,
int nr_rings);
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_irq.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_irq.c
index cdbea38b8697..f1bc6a1af7a7 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_irq.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_irq.c
@@ -55,8 +55,7 @@ static void dpu_core_irq_callback_handler(void *arg, int irq_idx)
int dpu_core_irq_idx_lookup(struct dpu_kms *dpu_kms,
enum dpu_intr_type intr_type, u32 instance_idx)
{
- if (!dpu_kms || !dpu_kms->hw_intr ||
- !dpu_kms->hw_intr->ops.irq_idx_lookup)
+ if (!dpu_kms->hw_intr || !dpu_kms->hw_intr->ops.irq_idx_lookup)
return -EINVAL;
return dpu_kms->hw_intr->ops.irq_idx_lookup(intr_type,
@@ -73,7 +72,7 @@ static int _dpu_core_irq_enable(struct dpu_kms *dpu_kms, int irq_idx)
unsigned long irq_flags;
int ret = 0, enable_count;
- if (!dpu_kms || !dpu_kms->hw_intr ||
+ if (!dpu_kms->hw_intr ||
!dpu_kms->irq_obj.enable_counts ||
!dpu_kms->irq_obj.irq_counts) {
DPU_ERROR("invalid params\n");
@@ -114,7 +113,7 @@ int dpu_core_irq_enable(struct dpu_kms *dpu_kms, int *irq_idxs, u32 irq_count)
{
int i, ret = 0, counts;
- if (!dpu_kms || !irq_idxs || !irq_count) {
+ if (!irq_idxs || !irq_count) {
DPU_ERROR("invalid params\n");
return -EINVAL;
}
@@ -138,7 +137,7 @@ static int _dpu_core_irq_disable(struct dpu_kms *dpu_kms, int irq_idx)
{
int ret = 0, enable_count;
- if (!dpu_kms || !dpu_kms->hw_intr || !dpu_kms->irq_obj.enable_counts) {
+ if (!dpu_kms->hw_intr || !dpu_kms->irq_obj.enable_counts) {
DPU_ERROR("invalid params\n");
return -EINVAL;
}
@@ -169,7 +168,7 @@ int dpu_core_irq_disable(struct dpu_kms *dpu_kms, int *irq_idxs, u32 irq_count)
{
int i, ret = 0, counts;
- if (!dpu_kms || !irq_idxs || !irq_count) {
+ if (!irq_idxs || !irq_count) {
DPU_ERROR("invalid params\n");
return -EINVAL;
}
@@ -186,7 +185,7 @@ int dpu_core_irq_disable(struct dpu_kms *dpu_kms, int *irq_idxs, u32 irq_count)
u32 dpu_core_irq_read(struct dpu_kms *dpu_kms, int irq_idx, bool clear)
{
- if (!dpu_kms || !dpu_kms->hw_intr ||
+ if (!dpu_kms->hw_intr ||
!dpu_kms->hw_intr->ops.get_interrupt_status)
return 0;
@@ -205,7 +204,7 @@ int dpu_core_irq_register_callback(struct dpu_kms *dpu_kms, int irq_idx,
{
unsigned long irq_flags;
- if (!dpu_kms || !dpu_kms->irq_obj.irq_cb_tbl) {
+ if (!dpu_kms->irq_obj.irq_cb_tbl) {
DPU_ERROR("invalid params\n");
return -EINVAL;
}
@@ -240,7 +239,7 @@ int dpu_core_irq_unregister_callback(struct dpu_kms *dpu_kms, int irq_idx,
{
unsigned long irq_flags;
- if (!dpu_kms || !dpu_kms->irq_obj.irq_cb_tbl) {
+ if (!dpu_kms->irq_obj.irq_cb_tbl) {
DPU_ERROR("invalid params\n");
return -EINVAL;
}
@@ -274,8 +273,7 @@ int dpu_core_irq_unregister_callback(struct dpu_kms *dpu_kms, int irq_idx,
static void dpu_clear_all_irqs(struct dpu_kms *dpu_kms)
{
- if (!dpu_kms || !dpu_kms->hw_intr ||
- !dpu_kms->hw_intr->ops.clear_all_irqs)
+ if (!dpu_kms->hw_intr || !dpu_kms->hw_intr->ops.clear_all_irqs)
return;
dpu_kms->hw_intr->ops.clear_all_irqs(dpu_kms->hw_intr);
@@ -283,8 +281,7 @@ static void dpu_clear_all_irqs(struct dpu_kms *dpu_kms)
static void dpu_disable_all_irqs(struct dpu_kms *dpu_kms)
{
- if (!dpu_kms || !dpu_kms->hw_intr ||
- !dpu_kms->hw_intr->ops.disable_all_irqs)
+ if (!dpu_kms->hw_intr || !dpu_kms->hw_intr->ops.disable_all_irqs)
return;
dpu_kms->hw_intr->ops.disable_all_irqs(dpu_kms->hw_intr);
@@ -343,18 +340,8 @@ void dpu_debugfs_core_irq_init(struct dpu_kms *dpu_kms,
void dpu_core_irq_preinstall(struct dpu_kms *dpu_kms)
{
- struct msm_drm_private *priv;
int i;
- if (!dpu_kms->dev) {
- DPU_ERROR("invalid drm device\n");
- return;
- } else if (!dpu_kms->dev->dev_private) {
- DPU_ERROR("invalid device private\n");
- return;
- }
- priv = dpu_kms->dev->dev_private;
-
pm_runtime_get_sync(&dpu_kms->pdev->dev);
dpu_clear_all_irqs(dpu_kms);
dpu_disable_all_irqs(dpu_kms);
@@ -379,18 +366,8 @@ void dpu_core_irq_preinstall(struct dpu_kms *dpu_kms)
void dpu_core_irq_uninstall(struct dpu_kms *dpu_kms)
{
- struct msm_drm_private *priv;
int i;
- if (!dpu_kms->dev) {
- DPU_ERROR("invalid drm device\n");
- return;
- } else if (!dpu_kms->dev->dev_private) {
- DPU_ERROR("invalid device private\n");
- return;
- }
- priv = dpu_kms->dev->dev_private;
-
pm_runtime_get_sync(&dpu_kms->pdev->dev);
for (i = 0; i < dpu_kms->irq_obj.total_irqs; i++)
if (atomic_read(&dpu_kms->irq_obj.enable_counts[i]) ||
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c
index 09a49b59bb5b..11f2bebe3869 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c
@@ -32,18 +32,7 @@ enum dpu_perf_mode {
static struct dpu_kms *_dpu_crtc_get_kms(struct drm_crtc *crtc)
{
struct msm_drm_private *priv;
-
- if (!crtc->dev || !crtc->dev->dev_private) {
- DPU_ERROR("invalid device\n");
- return NULL;
- }
-
priv = crtc->dev->dev_private;
- if (!priv || !priv->kms) {
- DPU_ERROR("invalid kms\n");
- return NULL;
- }
-
return to_dpu_kms(priv->kms);
}
@@ -116,7 +105,7 @@ int dpu_core_perf_crtc_check(struct drm_crtc *crtc,
}
kms = _dpu_crtc_get_kms(crtc);
- if (!kms || !kms->catalog) {
+ if (!kms->catalog) {
DPU_ERROR("invalid parameters\n");
return 0;
}
@@ -215,7 +204,6 @@ static int _dpu_core_perf_crtc_update_bus(struct dpu_kms *kms,
void dpu_core_perf_crtc_release_bw(struct drm_crtc *crtc)
{
struct dpu_crtc *dpu_crtc;
- struct dpu_crtc_state *dpu_cstate;
struct dpu_kms *kms;
if (!crtc) {
@@ -224,13 +212,12 @@ void dpu_core_perf_crtc_release_bw(struct drm_crtc *crtc)
}
kms = _dpu_crtc_get_kms(crtc);
- if (!kms || !kms->catalog) {
+ if (!kms->catalog) {
DPU_ERROR("invalid kms\n");
return;
}
dpu_crtc = to_dpu_crtc(crtc);
- dpu_cstate = to_dpu_crtc_state(crtc->state);
if (atomic_dec_return(&kms->bandwidth_ref) > 0)
return;
@@ -287,7 +274,6 @@ int dpu_core_perf_crtc_update(struct drm_crtc *crtc,
u64 clk_rate = 0;
struct dpu_crtc *dpu_crtc;
struct dpu_crtc_state *dpu_cstate;
- struct msm_drm_private *priv;
struct dpu_kms *kms;
int ret;
@@ -297,11 +283,10 @@ int dpu_core_perf_crtc_update(struct drm_crtc *crtc,
}
kms = _dpu_crtc_get_kms(crtc);
- if (!kms || !kms->catalog) {
+ if (!kms->catalog) {
DPU_ERROR("invalid kms\n");
return -EINVAL;
}
- priv = kms->dev->dev_private;
dpu_crtc = to_dpu_crtc(crtc);
dpu_cstate = to_dpu_crtc_state(crtc->state);
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c
index ce59adff06aa..f197dce54576 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c
@@ -266,11 +266,20 @@ enum dpu_intf_mode dpu_crtc_get_intf_mode(struct drm_crtc *crtc)
{
struct drm_encoder *encoder;
- if (!crtc || !crtc->dev) {
+ if (!crtc) {
DPU_ERROR("invalid crtc\n");
return INTF_MODE_NONE;
}
+ /*
+ * TODO: This function is called from dpu debugfs and as part of atomic
+ * check. When called from debugfs, the crtc->mutex must be held to
+ * read crtc->state. However reading crtc->state from atomic check isn't
+ * allowed (unless you have a good reason, a big comment, and a deep
+ * understanding of how the atomic/modeset locks work (<- and this is
+ * probably not possible)). So we'll keep the WARN_ON here for now, but
+ * really we need to figure out a better way to track our operating mode
+ */
WARN_ON(!drm_modeset_is_locked(&crtc->mutex));
/* TODO: Returns the first INTF_MODE, could there be multiple values? */
@@ -694,7 +703,7 @@ static void dpu_crtc_disable(struct drm_crtc *crtc,
unsigned long flags;
bool release_bandwidth = false;
- if (!crtc || !crtc->dev || !crtc->dev->dev_private || !crtc->state) {
+ if (!crtc || !crtc->state) {
DPU_ERROR("invalid crtc\n");
return;
}
@@ -766,7 +775,7 @@ static void dpu_crtc_enable(struct drm_crtc *crtc,
struct msm_drm_private *priv;
bool request_bandwidth;
- if (!crtc || !crtc->dev || !crtc->dev->dev_private) {
+ if (!crtc) {
DPU_ERROR("invalid crtc\n");
return;
}
@@ -1288,13 +1297,8 @@ struct drm_crtc *dpu_crtc_init(struct drm_device *dev, struct drm_plane *plane,
{
struct drm_crtc *crtc = NULL;
struct dpu_crtc *dpu_crtc = NULL;
- struct msm_drm_private *priv = NULL;
- struct dpu_kms *kms = NULL;
int i;
- priv = dev->dev_private;
- kms = to_dpu_kms(priv->kms);
-
dpu_crtc = kzalloc(sizeof(*dpu_crtc), GFP_KERNEL);
if (!dpu_crtc)
return ERR_PTR(-ENOMEM);
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
index d82ea994063f..f96e142c4361 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
@@ -645,11 +645,6 @@ static void _dpu_encoder_update_vsync_source(struct dpu_encoder_virt *dpu_enc,
priv = drm_enc->dev->dev_private;
dpu_kms = to_dpu_kms(priv->kms);
- if (!dpu_kms) {
- DPU_ERROR("invalid dpu_kms\n");
- return;
- }
-
hw_mdptop = dpu_kms->hw_mdp;
if (!hw_mdptop) {
DPU_ERROR("invalid mdptop\n");
@@ -735,8 +730,7 @@ static int dpu_encoder_resource_control(struct drm_encoder *drm_enc,
struct msm_drm_private *priv;
bool is_vid_mode = false;
- if (!drm_enc || !drm_enc->dev || !drm_enc->dev->dev_private ||
- !drm_enc->crtc) {
+ if (!drm_enc || !drm_enc->dev || !drm_enc->crtc) {
DPU_ERROR("invalid parameters\n");
return -EINVAL;
}
@@ -1092,17 +1086,13 @@ static void _dpu_encoder_virt_enable_helper(struct drm_encoder *drm_enc)
struct msm_drm_private *priv;
struct dpu_kms *dpu_kms;
- if (!drm_enc || !drm_enc->dev || !drm_enc->dev->dev_private) {
+ if (!drm_enc || !drm_enc->dev) {
DPU_ERROR("invalid parameters\n");
return;
}
priv = drm_enc->dev->dev_private;
dpu_kms = to_dpu_kms(priv->kms);
- if (!dpu_kms) {
- DPU_ERROR("invalid dpu_kms\n");
- return;
- }
dpu_enc = to_dpu_encoder_virt(drm_enc);
if (!dpu_enc || !dpu_enc->cur_master) {
@@ -1184,7 +1174,6 @@ static void dpu_encoder_virt_disable(struct drm_encoder *drm_enc)
struct dpu_encoder_virt *dpu_enc = NULL;
struct msm_drm_private *priv;
struct dpu_kms *dpu_kms;
- struct drm_display_mode *mode;
int i = 0;
if (!drm_enc) {
@@ -1193,9 +1182,6 @@ static void dpu_encoder_virt_disable(struct drm_encoder *drm_enc)
} else if (!drm_enc->dev) {
DPU_ERROR("invalid dev\n");
return;
- } else if (!drm_enc->dev->dev_private) {
- DPU_ERROR("invalid dev_private\n");
- return;
}
dpu_enc = to_dpu_encoder_virt(drm_enc);
@@ -1204,8 +1190,6 @@ static void dpu_encoder_virt_disable(struct drm_encoder *drm_enc)
mutex_lock(&dpu_enc->enc_lock);
dpu_enc->enabled = false;
- mode = &drm_enc->crtc->state->adjusted_mode;
-
priv = drm_enc->dev->dev_private;
dpu_kms = to_dpu_kms(priv->kms);
@@ -1734,8 +1718,7 @@ static void dpu_encoder_vsync_event_handler(struct timer_list *t)
struct msm_drm_private *priv;
struct msm_drm_thread *event_thread;
- if (!drm_enc->dev || !drm_enc->dev->dev_private ||
- !drm_enc->crtc) {
+ if (!drm_enc->dev || !drm_enc->crtc) {
DPU_ERROR("invalid parameters\n");
return;
}
@@ -1914,8 +1897,6 @@ static int _dpu_encoder_debugfs_status_open(struct inode *inode,
static int _dpu_encoder_init_debugfs(struct drm_encoder *drm_enc)
{
struct dpu_encoder_virt *dpu_enc = to_dpu_encoder_virt(drm_enc);
- struct msm_drm_private *priv;
- struct dpu_kms *dpu_kms;
int i;
static const struct file_operations debugfs_status_fops = {
@@ -1927,14 +1908,11 @@ static int _dpu_encoder_init_debugfs(struct drm_encoder *drm_enc)
char name[DPU_NAME_SIZE];
- if (!drm_enc->dev || !drm_enc->dev->dev_private) {
+ if (!drm_enc->dev) {
DPU_ERROR("invalid encoder or kms\n");
return -EINVAL;
}
- priv = drm_enc->dev->dev_private;
- dpu_kms = to_dpu_kms(priv->kms);
-
snprintf(name, DPU_NAME_SIZE, "encoder%u", drm_enc->base.id);
/* create overall sub-directory for the encoder */
@@ -2042,9 +2020,8 @@ static int dpu_encoder_setup_display(struct dpu_encoder_virt *dpu_enc,
enum dpu_intf_type intf_type;
struct dpu_enc_phys_init_params phys_params;
- if (!dpu_enc || !dpu_kms) {
- DPU_ERROR("invalid arg(s), enc %d kms %d\n",
- dpu_enc != 0, dpu_kms != 0);
+ if (!dpu_enc) {
+ DPU_ERROR("invalid arg(s), enc %d\n", dpu_enc != 0);
return -EINVAL;
}
@@ -2133,14 +2110,12 @@ static void dpu_encoder_frame_done_timeout(struct timer_list *t)
struct dpu_encoder_virt *dpu_enc = from_timer(dpu_enc, t,
frame_done_timer);
struct drm_encoder *drm_enc = &dpu_enc->base;
- struct msm_drm_private *priv;
u32 event;
- if (!drm_enc->dev || !drm_enc->dev->dev_private) {
+ if (!drm_enc->dev) {
DPU_ERROR("invalid parameters\n");
return;
}
- priv = drm_enc->dev->dev_private;
if (!dpu_enc->frame_busy_mask[0] || !dpu_enc->crtc_frame_event_cb) {
DRM_DEBUG_KMS("id:%u invalid timeout frame_busy_mask=%lu\n",
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c
index 2923b63d95fe..047960949fbb 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c
@@ -124,13 +124,11 @@ static void dpu_encoder_phys_cmd_pp_rd_ptr_irq(void *arg, int irq_idx)
static void dpu_encoder_phys_cmd_ctl_start_irq(void *arg, int irq_idx)
{
struct dpu_encoder_phys *phys_enc = arg;
- struct dpu_encoder_phys_cmd *cmd_enc;
if (!phys_enc || !phys_enc->hw_ctl)
return;
DPU_ATRACE_BEGIN("ctl_start_irq");
- cmd_enc = to_dpu_encoder_phys_cmd(phys_enc);
atomic_add_unless(&phys_enc->pending_ctlstart_cnt, -1, 0);
@@ -316,13 +314,9 @@ end:
static void dpu_encoder_phys_cmd_irq_control(struct dpu_encoder_phys *phys_enc,
bool enable)
{
- struct dpu_encoder_phys_cmd *cmd_enc;
-
if (!phys_enc)
return;
- cmd_enc = to_dpu_encoder_phys_cmd(phys_enc);
-
trace_dpu_enc_phys_cmd_irq_ctrl(DRMID(phys_enc->parent),
phys_enc->hw_pp->idx - PINGPONG_0,
enable, atomic_read(&phys_enc->vblank_refcount));
@@ -355,7 +349,6 @@ static void dpu_encoder_phys_cmd_tearcheck_config(
struct drm_display_mode *mode;
bool tc_enable = true;
u32 vsync_hz;
- struct msm_drm_private *priv;
struct dpu_kms *dpu_kms;
if (!phys_enc || !phys_enc->hw_pp) {
@@ -373,11 +366,6 @@ static void dpu_encoder_phys_cmd_tearcheck_config(
}
dpu_kms = phys_enc->dpu_kms;
- if (!dpu_kms || !dpu_kms->dev || !dpu_kms->dev->dev_private) {
- DPU_ERROR("invalid device\n");
- return;
- }
- priv = dpu_kms->dev->dev_private;
/*
* TE default: dsi byte clock calculated base on 70 fps;
@@ -650,13 +638,10 @@ static int dpu_encoder_phys_cmd_wait_for_tx_complete(
struct dpu_encoder_phys *phys_enc)
{
int rc;
- struct dpu_encoder_phys_cmd *cmd_enc;
if (!phys_enc)
return -EINVAL;
- cmd_enc = to_dpu_encoder_phys_cmd(phys_enc);
-
rc = _dpu_encoder_phys_cmd_wait_for_idle(phys_enc);
if (rc) {
DRM_ERROR("failed wait_for_idle: id:%u ret:%d intf:%d\n",
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c
index b9c84fb4d4a1..3123ef873cdf 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c
@@ -374,7 +374,7 @@ static void dpu_encoder_phys_vid_mode_set(
struct drm_display_mode *mode,
struct drm_display_mode *adj_mode)
{
- if (!phys_enc || !phys_enc->dpu_kms) {
+ if (!phys_enc) {
DPU_ERROR("invalid encoder/kms\n");
return;
}
@@ -566,16 +566,13 @@ static void dpu_encoder_phys_vid_prepare_for_kickoff(
static void dpu_encoder_phys_vid_disable(struct dpu_encoder_phys *phys_enc)
{
- struct msm_drm_private *priv;
unsigned long lock_flags;
int ret;
- if (!phys_enc || !phys_enc->parent || !phys_enc->parent->dev ||
- !phys_enc->parent->dev->dev_private) {
+ if (!phys_enc || !phys_enc->parent || !phys_enc->parent->dev) {
DPU_ERROR("invalid encoder/device\n");
return;
}
- priv = phys_enc->parent->dev->dev_private;
if (!phys_enc->hw_intf || !phys_enc->hw_ctl) {
DPU_ERROR("invalid hw_intf %d hw_ctl %d\n",
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
index 58b0485dc375..6c92f0fbeac9 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
@@ -30,10 +30,6 @@
#define CREATE_TRACE_POINTS
#include "dpu_trace.h"
-static const char * const iommu_ports[] = {
- "mdp_0",
-};
-
/*
* To enable overall DRM driver logging
* # echo 0x2 > /sys/module/drm/parameters/debug
@@ -68,16 +64,14 @@ static int _dpu_danger_signal_status(struct seq_file *s,
bool danger_status)
{
struct dpu_kms *kms = (struct dpu_kms *)s->private;
- struct msm_drm_private *priv;
struct dpu_danger_safe_status status;
int i;
- if (!kms->dev || !kms->dev->dev_private || !kms->hw_mdp) {
+ if (!kms->hw_mdp) {
DPU_ERROR("invalid arg(s)\n");
return 0;
}
- priv = kms->dev->dev_private;
memset(&status, 0, sizeof(struct dpu_danger_safe_status));
pm_runtime_get_sync(&kms->pdev->dev);
@@ -153,13 +147,7 @@ static int _dpu_debugfs_show_regset32(struct seq_file *s, void *data)
return 0;
dev = dpu_kms->dev;
- if (!dev)
- return 0;
-
priv = dev->dev_private;
- if (!priv)
- return 0;
-
base = dpu_kms->mmio + regset->offset;
/* insert padding spaces, if needed */
@@ -280,7 +268,6 @@ static void dpu_kms_prepare_commit(struct msm_kms *kms,
struct drm_atomic_state *state)
{
struct dpu_kms *dpu_kms;
- struct msm_drm_private *priv;
struct drm_device *dev;
struct drm_crtc *crtc;
struct drm_crtc_state *crtc_state;
@@ -292,10 +279,6 @@ static void dpu_kms_prepare_commit(struct msm_kms *kms,
dpu_kms = to_dpu_kms(kms);
dev = dpu_kms->dev;
- if (!dev || !dev->dev_private)
- return;
- priv = dev->dev_private;
-
/* Call prepare_commit for all affected encoders */
for_each_new_crtc_in_state(state, crtc, crtc_state, i) {
drm_for_each_encoder_mask(encoder, crtc->dev,
@@ -333,7 +316,6 @@ void dpu_kms_encoder_enable(struct drm_encoder *encoder)
if (funcs && funcs->commit)
funcs->commit(encoder);
- WARN_ON(!drm_modeset_is_locked(&dev->mode_config.connection_mutex));
drm_for_each_crtc(crtc, dev) {
if (!(crtc->state->encoder_mask & drm_encoder_mask(encoder)))
continue;
@@ -464,16 +446,6 @@ static void _dpu_kms_drm_obj_destroy(struct dpu_kms *dpu_kms)
struct msm_drm_private *priv;
int i;
- if (!dpu_kms) {
- DPU_ERROR("invalid dpu_kms\n");
- return;
- } else if (!dpu_kms->dev) {
- DPU_ERROR("invalid dev\n");
- return;
- } else if (!dpu_kms->dev->dev_private) {
- DPU_ERROR("invalid dev_private\n");
- return;
- }
priv = dpu_kms->dev->dev_private;
for (i = 0; i < priv->num_crtcs; i++)
@@ -505,7 +477,6 @@ static int _dpu_kms_drm_obj_init(struct dpu_kms *dpu_kms)
int primary_planes_idx = 0, cursor_planes_idx = 0, i, ret;
int max_crtc_count;
-
dev = dpu_kms->dev;
priv = dev->dev_private;
catalog = dpu_kms->catalog;
@@ -585,8 +556,6 @@ static void _dpu_kms_hw_destroy(struct dpu_kms *dpu_kms)
int i;
dev = dpu_kms->dev;
- if (!dev)
- return;
if (dpu_kms->hw_intr)
dpu_hw_intr_destroy(dpu_kms->hw_intr);
@@ -725,8 +694,7 @@ static void _dpu_kms_mmu_destroy(struct dpu_kms *dpu_kms)
mmu = dpu_kms->base.aspace->mmu;
- mmu->funcs->detach(mmu, (const char **)iommu_ports,
- ARRAY_SIZE(iommu_ports));
+ mmu->funcs->detach(mmu);
msm_gem_address_space_put(dpu_kms->base.aspace);
dpu_kms->base.aspace = NULL;
@@ -752,8 +720,7 @@ static int _dpu_kms_mmu_init(struct dpu_kms *dpu_kms)
return PTR_ERR(aspace);
}
- ret = aspace->mmu->funcs->attach(aspace->mmu, iommu_ports,
- ARRAY_SIZE(iommu_ports));
+ ret = aspace->mmu->funcs->attach(aspace->mmu);
if (ret) {
DPU_ERROR("failed to attach iommu %d\n", ret);
msm_gem_address_space_put(aspace);
@@ -803,16 +770,7 @@ static int dpu_kms_hw_init(struct msm_kms *kms)
dpu_kms = to_dpu_kms(kms);
dev = dpu_kms->dev;
- if (!dev) {
- DPU_ERROR("invalid device\n");
- return rc;
- }
-
priv = dev->dev_private;
- if (!priv) {
- DPU_ERROR("invalid private data\n");
- return rc;
- }
atomic_set(&dpu_kms->bandwidth_ref, 0);
@@ -974,7 +932,7 @@ struct msm_kms *dpu_kms_init(struct drm_device *dev)
struct dpu_kms *dpu_kms;
int irq;
- if (!dev || !dev->dev_private) {
+ if (!dev) {
DPU_ERROR("drm device node invalid\n");
return ERR_PTR(-EINVAL);
}
@@ -1064,11 +1022,6 @@ static int __maybe_unused dpu_runtime_suspend(struct device *dev)
struct dss_module_power *mp = &dpu_kms->mp;
ddev = dpu_kms->dev;
- if (!ddev) {
- DPU_ERROR("invalid drm_device\n");
- return rc;
- }
-
rc = msm_dss_enable_clk(mp->clk_config, mp->num_clk, false);
if (rc)
DPU_ERROR("clock disable failed rc:%d\n", rc);
@@ -1086,11 +1039,6 @@ static int __maybe_unused dpu_runtime_resume(struct device *dev)
struct dss_module_power *mp = &dpu_kms->mp;
ddev = dpu_kms->dev;
- if (!ddev) {
- DPU_ERROR("invalid drm_device\n");
- return rc;
- }
-
rc = msm_dss_enable_clk(mp->clk_config, mp->num_clk, true);
if (rc) {
DPU_ERROR("clock enable failed rc:%d\n", rc);
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h
index 959d03e007fa..c6169e7df19d 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h
@@ -139,10 +139,6 @@ struct vsync_info {
#define to_dpu_kms(x) container_of(x, struct dpu_kms, base)
-/* get struct msm_kms * from drm_device * */
-#define ddev_to_msm_kms(D) ((D) && (D)->dev_private ? \
- ((struct msm_drm_private *)((D)->dev_private))->kms : NULL)
-
/**
* Debugfs functions - extra helper functions for debugfs support
*
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_vbif.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_vbif.c
index 8d24b79fd400..991f4c8f8a12 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_vbif.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_vbif.c
@@ -154,10 +154,6 @@ void dpu_vbif_set_ot_limit(struct dpu_kms *dpu_kms,
u32 ot_lim;
int ret, i;
- if (!dpu_kms) {
- DPU_ERROR("invalid arguments\n");
- return;
- }
mdp = dpu_kms->hw_mdp;
for (i = 0; i < ARRAY_SIZE(dpu_kms->hw_vbif); i++) {
@@ -214,7 +210,7 @@ void dpu_vbif_set_qos_remap(struct dpu_kms *dpu_kms,
const struct dpu_vbif_qos_tbl *qos_tbl;
int i;
- if (!dpu_kms || !params || !dpu_kms->hw_mdp) {
+ if (!params || !dpu_kms->hw_mdp) {
DPU_ERROR("invalid arguments\n");
return;
}
diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c
index 50711ccc8691..dda05436f716 100644
--- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c
+++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c
@@ -157,10 +157,6 @@ static long mdp4_round_pixclk(struct msm_kms *kms, unsigned long rate,
}
}
-static const char * const iommu_ports[] = {
- "mdp_port0_cb0", "mdp_port1_cb0",
-};
-
static void mdp4_destroy(struct msm_kms *kms)
{
struct mdp4_kms *mdp4_kms = to_mdp4_kms(to_mdp_kms(kms));
@@ -172,8 +168,7 @@ static void mdp4_destroy(struct msm_kms *kms)
drm_gem_object_put_unlocked(mdp4_kms->blank_cursor_bo);
if (aspace) {
- aspace->mmu->funcs->detach(aspace->mmu,
- iommu_ports, ARRAY_SIZE(iommu_ports));
+ aspace->mmu->funcs->detach(aspace->mmu);
msm_gem_address_space_put(aspace);
}
@@ -524,8 +519,7 @@ struct msm_kms *mdp4_kms_init(struct drm_device *dev)
kms->aspace = aspace;
- ret = aspace->mmu->funcs->attach(aspace->mmu, iommu_ports,
- ARRAY_SIZE(iommu_ports));
+ ret = aspace->mmu->funcs->attach(aspace->mmu);
if (ret)
goto fail;
} else {
diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c
index f6e71ff539ca..1f48f64539a2 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c
@@ -14,7 +14,7 @@ struct mdp5_cfg_handler {
/* mdp5_cfg must be exposed (used in mdp5.xml.h) */
const struct mdp5_cfg_hw *mdp5_cfg = NULL;
-const struct mdp5_cfg_hw msm8x74v1_config = {
+static const struct mdp5_cfg_hw msm8x74v1_config = {
.name = "msm8x74v1",
.mdp = {
.count = 1,
@@ -98,7 +98,7 @@ const struct mdp5_cfg_hw msm8x74v1_config = {
.max_clk = 200000000,
};
-const struct mdp5_cfg_hw msm8x74v2_config = {
+static const struct mdp5_cfg_hw msm8x74v2_config = {
.name = "msm8x74",
.mdp = {
.count = 1,
@@ -180,7 +180,7 @@ const struct mdp5_cfg_hw msm8x74v2_config = {
.max_clk = 200000000,
};
-const struct mdp5_cfg_hw apq8084_config = {
+static const struct mdp5_cfg_hw apq8084_config = {
.name = "apq8084",
.mdp = {
.count = 1,
@@ -275,7 +275,7 @@ const struct mdp5_cfg_hw apq8084_config = {
.max_clk = 320000000,
};
-const struct mdp5_cfg_hw msm8x16_config = {
+static const struct mdp5_cfg_hw msm8x16_config = {
.name = "msm8x16",
.mdp = {
.count = 1,
@@ -342,7 +342,7 @@ const struct mdp5_cfg_hw msm8x16_config = {
.max_clk = 320000000,
};
-const struct mdp5_cfg_hw msm8x94_config = {
+static const struct mdp5_cfg_hw msm8x94_config = {
.name = "msm8x94",
.mdp = {
.count = 1,
@@ -437,7 +437,7 @@ const struct mdp5_cfg_hw msm8x94_config = {
.max_clk = 400000000,
};
-const struct mdp5_cfg_hw msm8x96_config = {
+static const struct mdp5_cfg_hw msm8x96_config = {
.name = "msm8x96",
.mdp = {
.count = 1,
@@ -545,7 +545,104 @@ const struct mdp5_cfg_hw msm8x96_config = {
.max_clk = 412500000,
};
-const struct mdp5_cfg_hw msm8917_config = {
+const struct mdp5_cfg_hw msm8x76_config = {
+ .name = "msm8x76",
+ .mdp = {
+ .count = 1,
+ .caps = MDP_CAP_SMP |
+ MDP_CAP_DSC |
+ MDP_CAP_SRC_SPLIT |
+ 0,
+ },
+ .ctl = {
+ .count = 3,
+ .base = { 0x01000, 0x01200, 0x01400 },
+ .flush_hw_mask = 0xffffffff,
+ },
+ .smp = {
+ .mmb_count = 10,
+ .mmb_size = 10240,
+ .clients = {
+ [SSPP_VIG0] = 1, [SSPP_VIG1] = 9,
+ [SSPP_DMA0] = 4,
+ [SSPP_RGB0] = 7, [SSPP_RGB1] = 8,
+ },
+ },
+ .pipe_vig = {
+ .count = 2,
+ .base = { 0x04000, 0x06000 },
+ .caps = MDP_PIPE_CAP_HFLIP |
+ MDP_PIPE_CAP_VFLIP |
+ MDP_PIPE_CAP_SCALE |
+ MDP_PIPE_CAP_CSC |
+ MDP_PIPE_CAP_DECIMATION |
+ MDP_PIPE_CAP_SW_PIX_EXT |
+ 0,
+ },
+ .pipe_rgb = {
+ .count = 2,
+ .base = { 0x14000, 0x16000 },
+ .caps = MDP_PIPE_CAP_HFLIP |
+ MDP_PIPE_CAP_VFLIP |
+ MDP_PIPE_CAP_DECIMATION |
+ MDP_PIPE_CAP_SW_PIX_EXT |
+ 0,
+ },
+ .pipe_dma = {
+ .count = 1,
+ .base = { 0x24000 },
+ .caps = MDP_PIPE_CAP_HFLIP |
+ MDP_PIPE_CAP_VFLIP |
+ MDP_PIPE_CAP_SW_PIX_EXT |
+ 0,
+ },
+ .pipe_cursor = {
+ .count = 1,
+ .base = { 0x440DC },
+ .caps = MDP_PIPE_CAP_HFLIP |
+ MDP_PIPE_CAP_VFLIP |
+ MDP_PIPE_CAP_SW_PIX_EXT |
+ MDP_PIPE_CAP_CURSOR |
+ 0,
+ },
+ .lm = {
+ .count = 2,
+ .base = { 0x44000, 0x45000 },
+ .instances = {
+ { .id = 0, .pp = 0, .dspp = 0,
+ .caps = MDP_LM_CAP_DISPLAY, },
+ { .id = 1, .pp = -1, .dspp = -1,
+ .caps = MDP_LM_CAP_WB },
+ },
+ .nb_stages = 8,
+ .max_width = 2560,
+ .max_height = 0xFFFF,
+ },
+ .dspp = {
+ .count = 1,
+ .base = { 0x54000 },
+
+ },
+ .pp = {
+ .count = 3,
+ .base = { 0x70000, 0x70800, 0x72000 },
+ },
+ .dsc = {
+ .count = 2,
+ .base = { 0x80000, 0x80400 },
+ },
+ .intf = {
+ .base = { 0x6a000, 0x6a800, 0x6b000 },
+ .connect = {
+ [0] = INTF_DISABLED,
+ [1] = INTF_DSI,
+ [2] = INTF_DSI,
+ },
+ },
+ .max_clk = 360000000,
+};
+
+static const struct mdp5_cfg_hw msm8917_config = {
.name = "msm8917",
.mdp = {
.count = 1,
@@ -630,7 +727,7 @@ const struct mdp5_cfg_hw msm8917_config = {
.max_clk = 320000000,
};
-const struct mdp5_cfg_hw msm8998_config = {
+static const struct mdp5_cfg_hw msm8998_config = {
.name = "msm8998",
.mdp = {
.count = 1,
@@ -745,6 +842,7 @@ static const struct mdp5_cfg_handler cfg_handlers_v1[] = {
{ .revision = 6, .config = { .hw = &msm8x16_config } },
{ .revision = 9, .config = { .hw = &msm8x94_config } },
{ .revision = 7, .config = { .hw = &msm8x96_config } },
+ { .revision = 11, .config = { .hw = &msm8x76_config } },
{ .revision = 15, .config = { .hw = &msm8917_config } },
};
diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c
index eb0b4b7dc7cc..05cc04f729d6 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c
@@ -214,7 +214,6 @@ static void blend_setup(struct drm_crtc *crtc)
struct mdp5_pipeline *pipeline = &mdp5_cstate->pipeline;
struct mdp5_kms *mdp5_kms = get_kms(crtc);
struct drm_plane *plane;
- const struct mdp5_cfg_hw *hw_cfg;
struct mdp5_plane_state *pstate, *pstates[STAGE_MAX + 1] = {NULL};
const struct mdp_format *format;
struct mdp5_hw_mixer *mixer = pipeline->mixer;
@@ -232,8 +231,6 @@ static void blend_setup(struct drm_crtc *crtc)
u32 val;
#define blender(stage) ((stage) - STAGE0)
- hw_cfg = mdp5_cfg_get_hw_config(mdp5_kms->cfg);
-
spin_lock_irqsave(&mdp5_crtc->lm_lock, flags);
/* ctl could be released already when we are shutting down: */
diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c
index 91cd76a2bab1..e43ecd4be10a 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c
@@ -19,10 +19,6 @@
#include "msm_mmu.h"
#include "mdp5_kms.h"
-static const char *iommu_ports[] = {
- "mdp_0",
-};
-
static int mdp5_hw_init(struct msm_kms *kms)
{
struct mdp5_kms *mdp5_kms = to_mdp5_kms(to_mdp_kms(kms));
@@ -233,8 +229,7 @@ static void mdp5_kms_destroy(struct msm_kms *kms)
mdp5_pipe_destroy(mdp5_kms->hwpipes[i]);
if (aspace) {
- aspace->mmu->funcs->detach(aspace->mmu,
- iommu_ports, ARRAY_SIZE(iommu_ports));
+ aspace->mmu->funcs->detach(aspace->mmu);
msm_gem_address_space_put(aspace);
}
}
@@ -314,6 +309,10 @@ int mdp5_disable(struct mdp5_kms *mdp5_kms)
mdp5_kms->enable_count--;
WARN_ON(mdp5_kms->enable_count < 0);
+ if (mdp5_kms->tbu_rt_clk)
+ clk_disable_unprepare(mdp5_kms->tbu_rt_clk);
+ if (mdp5_kms->tbu_clk)
+ clk_disable_unprepare(mdp5_kms->tbu_clk);
clk_disable_unprepare(mdp5_kms->ahb_clk);
clk_disable_unprepare(mdp5_kms->axi_clk);
clk_disable_unprepare(mdp5_kms->core_clk);
@@ -334,6 +333,10 @@ int mdp5_enable(struct mdp5_kms *mdp5_kms)
clk_prepare_enable(mdp5_kms->core_clk);
if (mdp5_kms->lut_clk)
clk_prepare_enable(mdp5_kms->lut_clk);
+ if (mdp5_kms->tbu_clk)
+ clk_prepare_enable(mdp5_kms->tbu_clk);
+ if (mdp5_kms->tbu_rt_clk)
+ clk_prepare_enable(mdp5_kms->tbu_rt_clk);
return 0;
}
@@ -466,14 +469,11 @@ static int modeset_init(struct mdp5_kms *mdp5_kms)
{
struct drm_device *dev = mdp5_kms->dev;
struct msm_drm_private *priv = dev->dev_private;
- const struct mdp5_cfg_hw *hw_cfg;
unsigned int num_crtcs;
int i, ret, pi = 0, ci = 0;
struct drm_plane *primary[MAX_BASES] = { NULL };
struct drm_plane *cursor[MAX_BASES] = { NULL };
- hw_cfg = mdp5_cfg_get_hw_config(mdp5_kms->cfg);
-
/*
* Construct encoders and modeset initialize connector devices
* for each external display interface.
@@ -737,8 +737,7 @@ struct msm_kms *mdp5_kms_init(struct drm_device *dev)
kms->aspace = aspace;
- ret = aspace->mmu->funcs->attach(aspace->mmu, iommu_ports,
- ARRAY_SIZE(iommu_ports));
+ ret = aspace->mmu->funcs->attach(aspace->mmu);
if (ret) {
DRM_DEV_ERROR(&pdev->dev, "failed to attach iommu: %d\n",
ret);
@@ -974,6 +973,8 @@ static int mdp5_init(struct platform_device *pdev, struct drm_device *dev)
/* optional clocks: */
get_clk(pdev, &mdp5_kms->lut_clk, "lut", false);
+ get_clk(pdev, &mdp5_kms->tbu_clk, "tbu", false);
+ get_clk(pdev, &mdp5_kms->tbu_rt_clk, "tbu_rt", false);
/* we need to set a default rate before enabling. Set a safe
* rate first, then figure out hw revision, and then set a
diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.h b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.h
index d1bf4fdfc815..128866742593 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.h
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.h
@@ -53,6 +53,8 @@ struct mdp5_kms {
struct clk *ahb_clk;
struct clk *core_clk;
struct clk *lut_clk;
+ struct clk *tbu_clk;
+ struct clk *tbu_rt_clk;
struct clk *vsync_clk;
/*
diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.c
index b31cfb554fa2..d7fa2c49e741 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.c
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.c
@@ -121,7 +121,6 @@ uint32_t mdp5_smp_calculate(struct mdp5_smp *smp,
struct mdp5_kms *mdp5_kms = get_kms(smp);
int rev = mdp5_cfg_get_hw_rev(mdp5_kms->cfg);
int i, hsub, nplanes, nlines;
- u32 fmt = format->base.pixel_format;
uint32_t blkcfg = 0;
nplanes = info->num_planes;
@@ -135,7 +134,6 @@ uint32_t mdp5_smp_calculate(struct mdp5_smp *smp,
* them together, writes to SMP using a single client.
*/
if ((rev > 0) && (format->chroma_sample > CHROMA_FULL)) {
- fmt = DRM_FORMAT_NV24;
nplanes = 2;
/* if decimation is enabled, HW decimates less on the
diff --git a/drivers/gpu/drm/msm/dsi/dsi_cfg.c b/drivers/gpu/drm/msm/dsi/dsi_cfg.c
index b7b7c1a9164a..86ad3fdf207d 100644
--- a/drivers/gpu/drm/msm/dsi/dsi_cfg.c
+++ b/drivers/gpu/drm/msm/dsi/dsi_cfg.c
@@ -66,6 +66,26 @@ static const struct msm_dsi_config msm8916_dsi_cfg = {
.num_dsi = 1,
};
+static const char * const dsi_8976_bus_clk_names[] = {
+ "mdp_core", "iface", "bus",
+};
+
+static const struct msm_dsi_config msm8976_dsi_cfg = {
+ .io_offset = DSI_6G_REG_SHIFT,
+ .reg_cfg = {
+ .num = 3,
+ .regs = {
+ {"gdsc", -1, -1},
+ {"vdda", 100000, 100}, /* 1.2 V */
+ {"vddio", 100000, 100}, /* 1.8 V */
+ },
+ },
+ .bus_clk_names = dsi_8976_bus_clk_names,
+ .num_bus_clks = ARRAY_SIZE(dsi_8976_bus_clk_names),
+ .io_start = { 0x1a94000, 0x1a96000 },
+ .num_dsi = 2,
+};
+
static const struct msm_dsi_config msm8994_dsi_cfg = {
.io_offset = DSI_6G_REG_SHIFT,
.reg_cfg = {
@@ -147,7 +167,7 @@ static const struct msm_dsi_config sdm845_dsi_cfg = {
.num_dsi = 2,
};
-const static struct msm_dsi_host_cfg_ops msm_dsi_v2_host_ops = {
+static const struct msm_dsi_host_cfg_ops msm_dsi_v2_host_ops = {
.link_clk_enable = dsi_link_clk_enable_v2,
.link_clk_disable = dsi_link_clk_disable_v2,
.clk_init_ver = dsi_clk_init_v2,
@@ -158,7 +178,7 @@ const static struct msm_dsi_host_cfg_ops msm_dsi_v2_host_ops = {
.calc_clk_rate = dsi_calc_clk_rate_v2,
};
-const static struct msm_dsi_host_cfg_ops msm_dsi_6g_host_ops = {
+static const struct msm_dsi_host_cfg_ops msm_dsi_6g_host_ops = {
.link_clk_enable = dsi_link_clk_enable_6g,
.link_clk_disable = dsi_link_clk_disable_6g,
.clk_init_ver = NULL,
@@ -169,7 +189,7 @@ const static struct msm_dsi_host_cfg_ops msm_dsi_6g_host_ops = {
.calc_clk_rate = dsi_calc_clk_rate_6g,
};
-const static struct msm_dsi_host_cfg_ops msm_dsi_6g_v2_host_ops = {
+static const struct msm_dsi_host_cfg_ops msm_dsi_6g_v2_host_ops = {
.link_clk_enable = dsi_link_clk_enable_6g,
.link_clk_disable = dsi_link_clk_disable_6g,
.clk_init_ver = dsi_clk_init_6g_v2,
@@ -197,6 +217,8 @@ static const struct msm_dsi_cfg_handler dsi_cfg_handlers[] = {
&msm8916_dsi_cfg, &msm_dsi_6g_host_ops},
{MSM_DSI_VER_MAJOR_6G, MSM_DSI_6G_VER_MINOR_V1_4_1,
&msm8996_dsi_cfg, &msm_dsi_6g_host_ops},
+ {MSM_DSI_VER_MAJOR_6G, MSM_DSI_6G_VER_MINOR_V1_4_2,
+ &msm8976_dsi_cfg, &msm_dsi_6g_host_ops},
{MSM_DSI_VER_MAJOR_6G, MSM_DSI_6G_VER_MINOR_V2_2_0,
&msm8998_dsi_cfg, &msm_dsi_6g_v2_host_ops},
{MSM_DSI_VER_MAJOR_6G, MSM_DSI_6G_VER_MINOR_V2_2_1,
diff --git a/drivers/gpu/drm/msm/dsi/dsi_cfg.h b/drivers/gpu/drm/msm/dsi/dsi_cfg.h
index e2b7a7dfbe49..50a37ceb6a25 100644
--- a/drivers/gpu/drm/msm/dsi/dsi_cfg.h
+++ b/drivers/gpu/drm/msm/dsi/dsi_cfg.h
@@ -17,6 +17,7 @@
#define MSM_DSI_6G_VER_MINOR_V1_3 0x10030000
#define MSM_DSI_6G_VER_MINOR_V1_3_1 0x10030001
#define MSM_DSI_6G_VER_MINOR_V1_4_1 0x10040001
+#define MSM_DSI_6G_VER_MINOR_V1_4_2 0x10040002
#define MSM_DSI_6G_VER_MINOR_V2_2_0 0x20000000
#define MSM_DSI_6G_VER_MINOR_V2_2_1 0x20020001
diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c
index 1e7b1be25bb0..458cec82ae13 100644
--- a/drivers/gpu/drm/msm/dsi/dsi_host.c
+++ b/drivers/gpu/drm/msm/dsi/dsi_host.c
@@ -1293,14 +1293,13 @@ static int dsi_cmd_dma_tx(struct msm_dsi_host *msm_host, int len)
static int dsi_cmd_dma_rx(struct msm_dsi_host *msm_host,
u8 *buf, int rx_byte, int pkt_size)
{
- u32 *lp, *temp, data;
+ u32 *temp, data;
int i, j = 0, cnt;
u32 read_cnt;
u8 reg[16];
int repeated_bytes = 0;
int buf_offset = buf - msm_host->rx_buf;
- lp = (u32 *)buf;
temp = (u32 *)reg;
cnt = (rx_byte + 3) >> 2;
if (cnt > 4)
diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
index 3522863a4984..b0cfa67d2a57 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
@@ -145,7 +145,7 @@ int msm_dsi_dphy_timing_calc_v2(struct msm_dsi_dphy_timing *timing,
{
const unsigned long bit_rate = clk_req->bitclk_rate;
const unsigned long esc_rate = clk_req->escclk_rate;
- s32 ui, ui_x8, lpx;
+ s32 ui, ui_x8;
s32 tmax, tmin;
s32 pcnt0 = 50;
s32 pcnt1 = 50;
@@ -175,7 +175,6 @@ int msm_dsi_dphy_timing_calc_v2(struct msm_dsi_dphy_timing *timing,
ui = mult_frac(NSEC_PER_MSEC, coeff, bit_rate / 1000);
ui_x8 = ui << 3;
- lpx = mult_frac(NSEC_PER_MSEC, coeff, esc_rate / 1000);
temp = S_DIV_ROUND_UP(38 * coeff - val_ckln * ui, ui_x8);
tmin = max_t(s32, temp, 0);
@@ -262,7 +261,7 @@ int msm_dsi_dphy_timing_calc_v3(struct msm_dsi_dphy_timing *timing,
{
const unsigned long bit_rate = clk_req->bitclk_rate;
const unsigned long esc_rate = clk_req->escclk_rate;
- s32 ui, ui_x8, lpx;
+ s32 ui, ui_x8;
s32 tmax, tmin;
s32 pcnt0 = 50;
s32 pcnt1 = 50;
@@ -284,7 +283,6 @@ int msm_dsi_dphy_timing_calc_v3(struct msm_dsi_dphy_timing *timing,
ui = mult_frac(NSEC_PER_MSEC, coeff, bit_rate / 1000);
ui_x8 = ui << 3;
- lpx = mult_frac(NSEC_PER_MSEC, coeff, esc_rate / 1000);
temp = S_DIV_ROUND_UP(38 * coeff, ui_x8);
tmin = max_t(s32, temp, 0);
@@ -485,6 +483,8 @@ static const struct of_device_id dsi_phy_dt_match[] = {
#ifdef CONFIG_DRM_MSM_DSI_28NM_PHY
{ .compatible = "qcom,dsi-phy-28nm-hpm",
.data = &dsi_phy_28nm_hpm_cfgs },
+ { .compatible = "qcom,dsi-phy-28nm-hpm-fam-b",
+ .data = &dsi_phy_28nm_hpm_famb_cfgs },
{ .compatible = "qcom,dsi-phy-28nm-lp",
.data = &dsi_phy_28nm_lp_cfgs },
#endif
diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h
index c4069ce6afe6..24b294ed3059 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h
@@ -40,6 +40,7 @@ struct msm_dsi_phy_cfg {
};
extern const struct msm_dsi_phy_cfg dsi_phy_28nm_hpm_cfgs;
+extern const struct msm_dsi_phy_cfg dsi_phy_28nm_hpm_famb_cfgs;
extern const struct msm_dsi_phy_cfg dsi_phy_28nm_lp_cfgs;
extern const struct msm_dsi_phy_cfg dsi_phy_20nm_cfgs;
extern const struct msm_dsi_phy_cfg dsi_phy_28nm_8960_cfgs;
diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c
index b3f678f6c2aa..c3c580cfd8b1 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c
@@ -39,15 +39,10 @@ static void dsi_28nm_dphy_set_timing(struct msm_dsi_phy *phy,
DSI_28nm_PHY_TIMING_CTRL_11_TRIG3_CMD(0));
}
-static void dsi_28nm_phy_regulator_ctrl(struct msm_dsi_phy *phy, bool enable)
+static void dsi_28nm_phy_regulator_enable_dcdc(struct msm_dsi_phy *phy)
{
void __iomem *base = phy->reg_base;
- if (!enable) {
- dsi_phy_write(base + REG_DSI_28nm_PHY_REGULATOR_CAL_PWR_CFG, 0);
- return;
- }
-
dsi_phy_write(base + REG_DSI_28nm_PHY_REGULATOR_CTRL_0, 0x0);
dsi_phy_write(base + REG_DSI_28nm_PHY_REGULATOR_CAL_PWR_CFG, 1);
dsi_phy_write(base + REG_DSI_28nm_PHY_REGULATOR_CTRL_5, 0);
@@ -56,6 +51,39 @@ static void dsi_28nm_phy_regulator_ctrl(struct msm_dsi_phy *phy, bool enable)
dsi_phy_write(base + REG_DSI_28nm_PHY_REGULATOR_CTRL_1, 0x9);
dsi_phy_write(base + REG_DSI_28nm_PHY_REGULATOR_CTRL_0, 0x7);
dsi_phy_write(base + REG_DSI_28nm_PHY_REGULATOR_CTRL_4, 0x20);
+ dsi_phy_write(phy->base + REG_DSI_28nm_PHY_LDO_CNTRL, 0x00);
+}
+
+static void dsi_28nm_phy_regulator_enable_ldo(struct msm_dsi_phy *phy)
+{
+ void __iomem *base = phy->reg_base;
+
+ dsi_phy_write(base + REG_DSI_28nm_PHY_REGULATOR_CTRL_0, 0x0);
+ dsi_phy_write(base + REG_DSI_28nm_PHY_REGULATOR_CAL_PWR_CFG, 0);
+ dsi_phy_write(base + REG_DSI_28nm_PHY_REGULATOR_CTRL_5, 0x7);
+ dsi_phy_write(base + REG_DSI_28nm_PHY_REGULATOR_CTRL_3, 0);
+ dsi_phy_write(base + REG_DSI_28nm_PHY_REGULATOR_CTRL_2, 0x1);
+ dsi_phy_write(base + REG_DSI_28nm_PHY_REGULATOR_CTRL_1, 0x1);
+ dsi_phy_write(base + REG_DSI_28nm_PHY_REGULATOR_CTRL_4, 0x20);
+
+ if (phy->cfg->type == MSM_DSI_PHY_28NM_LP)
+ dsi_phy_write(phy->base + REG_DSI_28nm_PHY_LDO_CNTRL, 0x05);
+ else
+ dsi_phy_write(phy->base + REG_DSI_28nm_PHY_LDO_CNTRL, 0x0d);
+}
+
+static void dsi_28nm_phy_regulator_ctrl(struct msm_dsi_phy *phy, bool enable)
+{
+ if (!enable) {
+ dsi_phy_write(phy->reg_base +
+ REG_DSI_28nm_PHY_REGULATOR_CAL_PWR_CFG, 0);
+ return;
+ }
+
+ if (phy->regulator_ldo_mode)
+ dsi_28nm_phy_regulator_enable_ldo(phy);
+ else
+ dsi_28nm_phy_regulator_enable_dcdc(phy);
}
static int dsi_28nm_phy_enable(struct msm_dsi_phy *phy, int src_pll_id,
@@ -77,8 +105,6 @@ static int dsi_28nm_phy_enable(struct msm_dsi_phy *phy, int src_pll_id,
dsi_28nm_phy_regulator_ctrl(phy, true);
- dsi_phy_write(base + REG_DSI_28nm_PHY_LDO_CNTRL, 0x00);
-
dsi_28nm_dphy_set_timing(phy, timing);
dsi_phy_write(base + REG_DSI_28nm_PHY_CTRL_1, 0x00);
@@ -142,6 +168,24 @@ const struct msm_dsi_phy_cfg dsi_phy_28nm_hpm_cfgs = {
.num_dsi_phy = 2,
};
+const struct msm_dsi_phy_cfg dsi_phy_28nm_hpm_famb_cfgs = {
+ .type = MSM_DSI_PHY_28NM_HPM,
+ .src_pll_truthtable = { {true, true}, {false, true} },
+ .reg_cfg = {
+ .num = 1,
+ .regs = {
+ {"vddio", 100000, 100},
+ },
+ },
+ .ops = {
+ .enable = dsi_28nm_phy_enable,
+ .disable = dsi_28nm_phy_disable,
+ .init = msm_dsi_phy_init_common,
+ },
+ .io_start = { 0x1a94400, 0x1a96400 },
+ .num_dsi_phy = 2,
+};
+
const struct msm_dsi_phy_cfg dsi_phy_28nm_lp_cfgs = {
.type = MSM_DSI_PHY_28NM_LP,
.src_pll_truthtable = { {true, true}, {true, true} },
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_phy.c b/drivers/gpu/drm/msm/hdmi/hdmi_phy.c
index 1697e61f9c2f..8a38d4b95102 100644
--- a/drivers/gpu/drm/msm/hdmi/hdmi_phy.c
+++ b/drivers/gpu/drm/msm/hdmi/hdmi_phy.c
@@ -29,8 +29,12 @@ static int msm_hdmi_phy_resource_init(struct hdmi_phy *phy)
reg = devm_regulator_get(dev, cfg->reg_names[i]);
if (IS_ERR(reg)) {
ret = PTR_ERR(reg);
- DRM_DEV_ERROR(dev, "failed to get phy regulator: %s (%d)\n",
- cfg->reg_names[i], ret);
+ if (ret != -EPROBE_DEFER) {
+ DRM_DEV_ERROR(dev,
+ "failed to get phy regulator: %s (%d)\n",
+ cfg->reg_names[i], ret);
+ }
+
return ret;
}
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index a052364a5d74..18f3a5c53ffb 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -16,6 +16,7 @@
#include <linux/pm_opp.h>
#include <linux/devfreq.h>
#include <linux/devcoredump.h>
+#include <linux/sched/task.h>
/*
* Power Management:
@@ -838,7 +839,7 @@ msm_gpu_create_address_space(struct msm_gpu *gpu, struct platform_device *pdev,
return ERR_CAST(aspace);
}
- ret = aspace->mmu->funcs->attach(aspace->mmu, NULL, 0);
+ ret = aspace->mmu->funcs->attach(aspace->mmu);
if (ret) {
msm_gem_address_space_put(aspace);
return ERR_PTR(ret);
@@ -995,8 +996,7 @@ void msm_gpu_cleanup(struct msm_gpu *gpu)
msm_gem_kernel_put(gpu->memptrs_bo, gpu->aspace, false);
if (!IS_ERR_OR_NULL(gpu->aspace)) {
- gpu->aspace->mmu->funcs->detach(gpu->aspace->mmu,
- NULL, 0);
+ gpu->aspace->mmu->funcs->detach(gpu->aspace->mmu);
msm_gem_address_space_put(gpu->aspace);
}
}
diff --git a/drivers/gpu/drm/msm/msm_gpummu.c b/drivers/gpu/drm/msm/msm_gpummu.c
index 34f643a0c28a..34980d8eb7ad 100644
--- a/drivers/gpu/drm/msm/msm_gpummu.c
+++ b/drivers/gpu/drm/msm/msm_gpummu.c
@@ -21,14 +21,12 @@ struct msm_gpummu {
#define GPUMMU_PAGE_SIZE SZ_4K
#define TABLE_SIZE (sizeof(uint32_t) * GPUMMU_VA_RANGE / GPUMMU_PAGE_SIZE)
-static int msm_gpummu_attach(struct msm_mmu *mmu, const char * const *names,
- int cnt)
+static int msm_gpummu_attach(struct msm_mmu *mmu)
{
return 0;
}
-static void msm_gpummu_detach(struct msm_mmu *mmu, const char * const *names,
- int cnt)
+static void msm_gpummu_detach(struct msm_mmu *mmu)
{
}
diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c
index 8c95c31e2b12..ad58cfe5998e 100644
--- a/drivers/gpu/drm/msm/msm_iommu.c
+++ b/drivers/gpu/drm/msm/msm_iommu.c
@@ -23,16 +23,14 @@ static int msm_fault_handler(struct iommu_domain *domain, struct device *dev,
return 0;
}
-static int msm_iommu_attach(struct msm_mmu *mmu, const char * const *names,
- int cnt)
+static int msm_iommu_attach(struct msm_mmu *mmu)
{
struct msm_iommu *iommu = to_msm_iommu(mmu);
return iommu_attach_device(iommu->domain, mmu->dev);
}
-static void msm_iommu_detach(struct msm_mmu *mmu, const char * const *names,
- int cnt)
+static void msm_iommu_detach(struct msm_mmu *mmu)
{
struct msm_iommu *iommu = to_msm_iommu(mmu);
diff --git a/drivers/gpu/drm/msm/msm_mmu.h b/drivers/gpu/drm/msm/msm_mmu.h
index 871d56303697..67a623f14319 100644
--- a/drivers/gpu/drm/msm/msm_mmu.h
+++ b/drivers/gpu/drm/msm/msm_mmu.h
@@ -10,8 +10,8 @@
#include <linux/iommu.h>
struct msm_mmu_funcs {
- int (*attach)(struct msm_mmu *mmu, const char * const *names, int cnt);
- void (*detach)(struct msm_mmu *mmu, const char * const *names, int cnt);
+ int (*attach)(struct msm_mmu *mmu);
+ void (*detach)(struct msm_mmu *mmu);
int (*map)(struct msm_mmu *mmu, uint64_t iova, struct sg_table *sgt,
unsigned len, int prot);
int (*unmap)(struct msm_mmu *mmu, uint64_t iova, unsigned len);
diff --git a/drivers/gpu/drm/msm/msm_rd.c b/drivers/gpu/drm/msm/msm_rd.c
index c7832a951039..af7ceb246c7c 100644
--- a/drivers/gpu/drm/msm/msm_rd.c
+++ b/drivers/gpu/drm/msm/msm_rd.c
@@ -298,7 +298,7 @@ void msm_rd_debugfs_cleanup(struct msm_drm_private *priv)
static void snapshot_buf(struct msm_rd_state *rd,
struct msm_gem_submit *submit, int idx,
- uint64_t iova, uint32_t size)
+ uint64_t iova, uint32_t size, bool full)
{
struct msm_gem_object *obj = submit->bos[idx].obj;
unsigned offset = 0;
@@ -318,6 +318,9 @@ static void snapshot_buf(struct msm_rd_state *rd,
rd_write_section(rd, RD_GPUADDR,
(uint32_t[3]){ iova, size, iova >> 32 }, 12);
+ if (!full)
+ return;
+
/* But only dump the contents of buffers marked READ */
if (!(submit->bos[idx].flags & MSM_SUBMIT_BO_READ))
return;
@@ -381,18 +384,21 @@ void msm_rd_dump_submit(struct msm_rd_state *rd, struct msm_gem_submit *submit,
rd_write_section(rd, RD_CMD, msg, ALIGN(n, 4));
for (i = 0; i < submit->nr_bos; i++)
- if (should_dump(submit, i))
- snapshot_buf(rd, submit, i, 0, 0);
+ snapshot_buf(rd, submit, i, 0, 0, should_dump(submit, i));
for (i = 0; i < submit->nr_cmds; i++) {
- uint64_t iova = submit->cmd[i].iova;
uint32_t szd = submit->cmd[i].size; /* in dwords */
/* snapshot cmdstream bo's (if we haven't already): */
if (!should_dump(submit, i)) {
snapshot_buf(rd, submit, submit->cmd[i].idx,
- submit->cmd[i].iova, szd * 4);
+ submit->cmd[i].iova, szd * 4, true);
}
+ }
+
+ for (i = 0; i < submit->nr_cmds; i++) {
+ uint64_t iova = submit->cmd[i].iova;
+ uint32_t szd = submit->cmd[i].size; /* in dwords */
switch (submit->cmd[i].type) {
case MSM_SUBMIT_CMD_IB_TARGET_BUF:
diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c
index 668d4bd0c118..df9bf1fd1bc0 100644
--- a/drivers/gpu/drm/nouveau/nouveau_svm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_svm.c
@@ -88,6 +88,7 @@ nouveau_ivmm_find(struct nouveau_svm *svm, u64 inst)
}
struct nouveau_svmm {
+ struct mmu_notifier notifier;
struct nouveau_vmm *vmm;
struct {
unsigned long start;
@@ -95,9 +96,6 @@ struct nouveau_svmm {
} unmanaged;
struct mutex mutex;
-
- struct mm_struct *mm;
- struct hmm_mirror mirror;
};
#define SVMM_DBG(s,f,a...) \
@@ -251,10 +249,11 @@ nouveau_svmm_invalidate(struct nouveau_svmm *svmm, u64 start, u64 limit)
}
static int
-nouveau_svmm_sync_cpu_device_pagetables(struct hmm_mirror *mirror,
- const struct mmu_notifier_range *update)
+nouveau_svmm_invalidate_range_start(struct mmu_notifier *mn,
+ const struct mmu_notifier_range *update)
{
- struct nouveau_svmm *svmm = container_of(mirror, typeof(*svmm), mirror);
+ struct nouveau_svmm *svmm =
+ container_of(mn, struct nouveau_svmm, notifier);
unsigned long start = update->start;
unsigned long limit = update->end;
@@ -264,6 +263,9 @@ nouveau_svmm_sync_cpu_device_pagetables(struct hmm_mirror *mirror,
SVMM_DBG(svmm, "invalidate %016lx-%016lx", start, limit);
mutex_lock(&svmm->mutex);
+ if (unlikely(!svmm->vmm))
+ goto out;
+
if (limit > svmm->unmanaged.start && start < svmm->unmanaged.limit) {
if (start < svmm->unmanaged.start) {
nouveau_svmm_invalidate(svmm, start,
@@ -273,19 +275,20 @@ nouveau_svmm_sync_cpu_device_pagetables(struct hmm_mirror *mirror,
}
nouveau_svmm_invalidate(svmm, start, limit);
+
+out:
mutex_unlock(&svmm->mutex);
return 0;
}
-static void
-nouveau_svmm_release(struct hmm_mirror *mirror)
+static void nouveau_svmm_free_notifier(struct mmu_notifier *mn)
{
+ kfree(container_of(mn, struct nouveau_svmm, notifier));
}
-static const struct hmm_mirror_ops
-nouveau_svmm = {
- .sync_cpu_device_pagetables = nouveau_svmm_sync_cpu_device_pagetables,
- .release = nouveau_svmm_release,
+static const struct mmu_notifier_ops nouveau_mn_ops = {
+ .invalidate_range_start = nouveau_svmm_invalidate_range_start,
+ .free_notifier = nouveau_svmm_free_notifier,
};
void
@@ -293,8 +296,10 @@ nouveau_svmm_fini(struct nouveau_svmm **psvmm)
{
struct nouveau_svmm *svmm = *psvmm;
if (svmm) {
- hmm_mirror_unregister(&svmm->mirror);
- kfree(*psvmm);
+ mutex_lock(&svmm->mutex);
+ svmm->vmm = NULL;
+ mutex_unlock(&svmm->mutex);
+ mmu_notifier_put(&svmm->notifier);
*psvmm = NULL;
}
}
@@ -320,7 +325,7 @@ nouveau_svmm_init(struct drm_device *dev, void *data,
mutex_lock(&cli->mutex);
if (cli->svm.cli) {
ret = -EBUSY;
- goto done;
+ goto out_free;
}
/* Allocate a new GPU VMM that can support SVM (managed by the
@@ -335,24 +340,26 @@ nouveau_svmm_init(struct drm_device *dev, void *data,
.fault_replay = true,
}, sizeof(struct gp100_vmm_v0), &cli->svm.vmm);
if (ret)
- goto done;
-
- /* Enable HMM mirroring of CPU address-space to VMM. */
- svmm->mm = get_task_mm(current);
- down_write(&svmm->mm->mmap_sem);
- svmm->mirror.ops = &nouveau_svmm;
- ret = hmm_mirror_register(&svmm->mirror, svmm->mm);
- if (ret == 0) {
- cli->svm.svmm = svmm;
- cli->svm.cli = cli;
- }
- up_write(&svmm->mm->mmap_sem);
- mmput(svmm->mm);
+ goto out_free;
-done:
+ down_write(&current->mm->mmap_sem);
+ svmm->notifier.ops = &nouveau_mn_ops;
+ ret = __mmu_notifier_register(&svmm->notifier, current->mm);
if (ret)
- nouveau_svmm_fini(&svmm);
+ goto out_mm_unlock;
+ /* Note, ownership of svmm transfers to mmu_notifier */
+
+ cli->svm.svmm = svmm;
+ cli->svm.cli = cli;
+ up_write(&current->mm->mmap_sem);
mutex_unlock(&cli->mutex);
+ return 0;
+
+out_mm_unlock:
+ up_write(&current->mm->mmap_sem);
+out_free:
+ mutex_unlock(&cli->mutex);
+ kfree(svmm);
return ret;
}
@@ -475,43 +482,90 @@ nouveau_svm_fault_cache(struct nouveau_svm *svm,
fault->inst, fault->addr, fault->access);
}
-static inline bool
-nouveau_range_done(struct hmm_range *range)
+struct svm_notifier {
+ struct mmu_interval_notifier notifier;
+ struct nouveau_svmm *svmm;
+};
+
+static bool nouveau_svm_range_invalidate(struct mmu_interval_notifier *mni,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq)
{
- bool ret = hmm_range_valid(range);
+ struct svm_notifier *sn =
+ container_of(mni, struct svm_notifier, notifier);
- hmm_range_unregister(range);
- return ret;
+ /*
+ * serializes the update to mni->invalidate_seq done by caller and
+ * prevents invalidation of the PTE from progressing while HW is being
+ * programmed. This is very hacky and only works because the normal
+ * notifier that does invalidation is always called after the range
+ * notifier.
+ */
+ if (mmu_notifier_range_blockable(range))
+ mutex_lock(&sn->svmm->mutex);
+ else if (!mutex_trylock(&sn->svmm->mutex))
+ return false;
+ mmu_interval_set_seq(mni, cur_seq);
+ mutex_unlock(&sn->svmm->mutex);
+ return true;
}
-static int
-nouveau_range_fault(struct nouveau_svmm *svmm, struct hmm_range *range)
+static const struct mmu_interval_notifier_ops nouveau_svm_mni_ops = {
+ .invalidate = nouveau_svm_range_invalidate,
+};
+
+static int nouveau_range_fault(struct nouveau_svmm *svmm,
+ struct nouveau_drm *drm, void *data, u32 size,
+ u64 *pfns, struct svm_notifier *notifier)
{
+ unsigned long timeout =
+ jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
+ /* Have HMM fault pages within the fault window to the GPU. */
+ struct hmm_range range = {
+ .notifier = &notifier->notifier,
+ .start = notifier->notifier.interval_tree.start,
+ .end = notifier->notifier.interval_tree.last + 1,
+ .pfns = pfns,
+ .flags = nouveau_svm_pfn_flags,
+ .values = nouveau_svm_pfn_values,
+ .pfn_shift = NVIF_VMM_PFNMAP_V0_ADDR_SHIFT,
+ };
+ struct mm_struct *mm = notifier->notifier.mm;
long ret;
- range->default_flags = 0;
- range->pfn_flags_mask = -1UL;
+ while (true) {
+ if (time_after(jiffies, timeout))
+ return -EBUSY;
+
+ range.notifier_seq = mmu_interval_read_begin(range.notifier);
+ range.default_flags = 0;
+ range.pfn_flags_mask = -1UL;
+ down_read(&mm->mmap_sem);
+ ret = hmm_range_fault(&range, 0);
+ up_read(&mm->mmap_sem);
+ if (ret <= 0) {
+ if (ret == 0 || ret == -EBUSY)
+ continue;
+ return ret;
+ }
- ret = hmm_range_register(range, &svmm->mirror);
- if (ret) {
- up_read(&svmm->mm->mmap_sem);
- return (int)ret;
+ mutex_lock(&svmm->mutex);
+ if (mmu_interval_read_retry(range.notifier,
+ range.notifier_seq)) {
+ mutex_unlock(&svmm->mutex);
+ continue;
+ }
+ break;
}
- if (!hmm_range_wait_until_valid(range, HMM_RANGE_DEFAULT_TIMEOUT)) {
- up_read(&svmm->mm->mmap_sem);
- return -EBUSY;
- }
+ nouveau_dmem_convert_pfn(drm, &range);
- ret = hmm_range_fault(range, 0);
- if (ret <= 0) {
- if (ret == 0)
- ret = -EBUSY;
- up_read(&svmm->mm->mmap_sem);
- hmm_range_unregister(range);
- return ret;
- }
- return 0;
+ svmm->vmm->vmm.object.client->super = true;
+ ret = nvif_object_ioctl(&svmm->vmm->vmm.object, data, size, NULL);
+ svmm->vmm->vmm.object.client->super = false;
+ mutex_unlock(&svmm->mutex);
+
+ return ret;
}
static int
@@ -531,7 +585,6 @@ nouveau_svm_fault(struct nvif_notify *notify)
} i;
u64 phys[16];
} args;
- struct hmm_range range;
struct vm_area_struct *vma;
u64 inst, start, limit;
int fi, fn, pi, fill;
@@ -587,6 +640,9 @@ nouveau_svm_fault(struct nvif_notify *notify)
args.i.p.version = 0;
for (fi = 0; fn = fi + 1, fi < buffer->fault_nr; fi = fn) {
+ struct svm_notifier notifier;
+ struct mm_struct *mm;
+
/* Cancel any faults from non-SVM channels. */
if (!(svmm = buffer->fault[fi]->svmm)) {
nouveau_svm_fault_cancel_fault(svm, buffer->fault[fi]);
@@ -606,24 +662,32 @@ nouveau_svm_fault(struct nvif_notify *notify)
start = max_t(u64, start, svmm->unmanaged.limit);
SVMM_DBG(svmm, "wndw %016llx-%016llx", start, limit);
+ mm = svmm->notifier.mm;
+ if (!mmget_not_zero(mm)) {
+ nouveau_svm_fault_cancel_fault(svm, buffer->fault[fi]);
+ continue;
+ }
+
/* Intersect fault window with the CPU VMA, cancelling
* the fault if the address is invalid.
*/
- down_read(&svmm->mm->mmap_sem);
- vma = find_vma_intersection(svmm->mm, start, limit);
+ down_read(&mm->mmap_sem);
+ vma = find_vma_intersection(mm, start, limit);
if (!vma) {
SVMM_ERR(svmm, "wndw %016llx-%016llx", start, limit);
- up_read(&svmm->mm->mmap_sem);
+ up_read(&mm->mmap_sem);
+ mmput(mm);
nouveau_svm_fault_cancel_fault(svm, buffer->fault[fi]);
continue;
}
start = max_t(u64, start, vma->vm_start);
limit = min_t(u64, limit, vma->vm_end);
+ up_read(&mm->mmap_sem);
SVMM_DBG(svmm, "wndw %016llx-%016llx", start, limit);
if (buffer->fault[fi]->addr != start) {
SVMM_ERR(svmm, "addr %016llx", buffer->fault[fi]->addr);
- up_read(&svmm->mm->mmap_sem);
+ mmput(mm);
nouveau_svm_fault_cancel_fault(svm, buffer->fault[fi]);
continue;
}
@@ -679,33 +743,19 @@ nouveau_svm_fault(struct nvif_notify *notify)
args.i.p.addr,
args.i.p.addr + args.i.p.size, fn - fi);
- /* Have HMM fault pages within the fault window to the GPU. */
- range.start = args.i.p.addr;
- range.end = args.i.p.addr + args.i.p.size;
- range.pfns = args.phys;
- range.flags = nouveau_svm_pfn_flags;
- range.values = nouveau_svm_pfn_values;
- range.pfn_shift = NVIF_VMM_PFNMAP_V0_ADDR_SHIFT;
-again:
- ret = nouveau_range_fault(svmm, &range);
- if (ret == 0) {
- mutex_lock(&svmm->mutex);
- if (!nouveau_range_done(&range)) {
- mutex_unlock(&svmm->mutex);
- goto again;
- }
-
- nouveau_dmem_convert_pfn(svm->drm, &range);
-
- svmm->vmm->vmm.object.client->super = true;
- ret = nvif_object_ioctl(&svmm->vmm->vmm.object,
- &args, sizeof(args.i) +
- pi * sizeof(args.phys[0]),
- NULL);
- svmm->vmm->vmm.object.client->super = false;
- mutex_unlock(&svmm->mutex);
- up_read(&svmm->mm->mmap_sem);
+ notifier.svmm = svmm;
+ ret = mmu_interval_notifier_insert(&notifier.notifier,
+ svmm->notifier.mm,
+ args.i.p.addr, args.i.p.size,
+ &nouveau_svm_mni_ops);
+ if (!ret) {
+ ret = nouveau_range_fault(
+ svmm, svm->drm, &args,
+ sizeof(args.i) + pi * sizeof(args.phys[0]),
+ args.phys, &notifier);
+ mmu_interval_notifier_remove(&notifier.notifier);
}
+ mmput(mm);
/* Cancel any faults in the window whose pages didn't manage
* to keep their valid bit, or stay writeable when required.
@@ -714,10 +764,10 @@ again:
*/
while (fi < fn) {
struct nouveau_svm_fault *fault = buffer->fault[fi++];
- pi = (fault->addr - range.start) >> PAGE_SHIFT;
+ pi = (fault->addr - args.i.p.addr) >> PAGE_SHIFT;
if (ret ||
- !(range.pfns[pi] & NVIF_VMM_PFNMAP_V0_V) ||
- (!(range.pfns[pi] & NVIF_VMM_PFNMAP_V0_W) &&
+ !(args.phys[pi] & NVIF_VMM_PFNMAP_V0_V) ||
+ (!(args.phys[pi] & NVIF_VMM_PFNMAP_V0_W) &&
fault->access != 0 && fault->access != 3)) {
nouveau_svm_fault_cancel_fault(svm, fault);
continue;
diff --git a/drivers/gpu/drm/omapdrm/omap_gem.c b/drivers/gpu/drm/omapdrm/omap_gem.c
index e518d93ca6df..d08ae95ecc0a 100644
--- a/drivers/gpu/drm/omapdrm/omap_gem.c
+++ b/drivers/gpu/drm/omapdrm/omap_gem.c
@@ -843,9 +843,13 @@ fail:
*/
static void omap_gem_unpin_locked(struct drm_gem_object *obj)
{
+ struct omap_drm_private *priv = obj->dev->dev_private;
struct omap_gem_object *omap_obj = to_omap_bo(obj);
int ret;
+ if (omap_gem_is_contiguous(omap_obj) || !priv->has_dmm)
+ return;
+
if (refcount_dec_and_test(&omap_obj->dma_addr_cnt)) {
ret = tiler_unpin(omap_obj->block);
if (ret) {
diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index daff9a2af3be..40a7e702c2a9 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -6965,8 +6965,8 @@ static int cik_irq_init(struct radeon_device *rdev)
}
/* setup interrupt control */
- /* XXX this should actually be a bus address, not an MC address. same on older asics */
- WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
+ /* set dummy read address to dummy page address */
+ WREG32(INTERRUPT_CNTL2, rdev->dummy_page.addr >> 8);
interrupt_cntl = RREG32(INTERRUPT_CNTL);
/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
* IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
@@ -9500,7 +9500,6 @@ static void cik_pcie_gen3_enable(struct radeon_device *rdev)
{
struct pci_dev *root = rdev->pdev->bus->self;
enum pci_bus_speed speed_cap;
- int bridge_pos, gpu_pos;
u32 speed_cntl, current_data_rate;
int i;
u16 tmp16;
@@ -9542,12 +9541,7 @@ static void cik_pcie_gen3_enable(struct radeon_device *rdev)
DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
}
- bridge_pos = pci_pcie_cap(root);
- if (!bridge_pos)
- return;
-
- gpu_pos = pci_pcie_cap(rdev->pdev);
- if (!gpu_pos)
+ if (!pci_is_pcie(root) || !pci_is_pcie(rdev->pdev))
return;
if (speed_cap == PCIE_SPEED_8_0GT) {
@@ -9557,14 +9551,17 @@ static void cik_pcie_gen3_enable(struct radeon_device *rdev)
u16 bridge_cfg2, gpu_cfg2;
u32 max_lw, current_lw, tmp;
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
- pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+ &bridge_cfg);
+ pcie_capability_read_word(rdev->pdev, PCI_EXP_LNKCTL,
+ &gpu_cfg);
tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
- pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
+ pcie_capability_write_word(root, PCI_EXP_LNKCTL, tmp16);
tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
- pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
+ pcie_capability_write_word(rdev->pdev, PCI_EXP_LNKCTL,
+ tmp16);
tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
@@ -9582,15 +9579,23 @@ static void cik_pcie_gen3_enable(struct radeon_device *rdev)
for (i = 0; i < 10; i++) {
/* check status */
- pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
+ pcie_capability_read_word(rdev->pdev,
+ PCI_EXP_DEVSTA,
+ &tmp16);
if (tmp16 & PCI_EXP_DEVSTA_TRPND)
break;
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
- pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+ &bridge_cfg);
+ pcie_capability_read_word(rdev->pdev,
+ PCI_EXP_LNKCTL,
+ &gpu_cfg);
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
- pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
+ &bridge_cfg2);
+ pcie_capability_read_word(rdev->pdev,
+ PCI_EXP_LNKCTL2,
+ &gpu_cfg2);
tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
tmp |= LC_SET_QUIESCE;
@@ -9603,26 +9608,45 @@ static void cik_pcie_gen3_enable(struct radeon_device *rdev)
msleep(100);
/* linkctl */
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+ &tmp16);
tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
- pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
+ pcie_capability_write_word(root, PCI_EXP_LNKCTL,
+ tmp16);
- pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
+ pcie_capability_read_word(rdev->pdev,
+ PCI_EXP_LNKCTL,
+ &tmp16);
tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
- pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
+ pcie_capability_write_word(rdev->pdev,
+ PCI_EXP_LNKCTL,
+ tmp16);
/* linkctl2 */
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
- tmp16 &= ~((1 << 4) | (7 << 9));
- tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
- pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
-
- pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
- tmp16 &= ~((1 << 4) | (7 << 9));
- tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
- pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
+ &tmp16);
+ tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN);
+ tmp16 |= (bridge_cfg2 &
+ (PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN));
+ pcie_capability_write_word(root,
+ PCI_EXP_LNKCTL2,
+ tmp16);
+
+ pcie_capability_read_word(rdev->pdev,
+ PCI_EXP_LNKCTL2,
+ &tmp16);
+ tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN);
+ tmp16 |= (gpu_cfg2 &
+ (PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN));
+ pcie_capability_write_word(rdev->pdev,
+ PCI_EXP_LNKCTL2,
+ tmp16);
tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
tmp &= ~LC_SET_QUIESCE;
@@ -9636,15 +9660,15 @@ static void cik_pcie_gen3_enable(struct radeon_device *rdev)
speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
- pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
- tmp16 &= ~0xf;
+ pcie_capability_read_word(rdev->pdev, PCI_EXP_LNKCTL2, &tmp16);
+ tmp16 &= ~PCI_EXP_LNKCTL2_TLS;
if (speed_cap == PCIE_SPEED_8_0GT)
- tmp16 |= 3; /* gen3 */
+ tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */
else if (speed_cap == PCIE_SPEED_5_0GT)
- tmp16 |= 2; /* gen2 */
+ tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */
else
- tmp16 |= 1; /* gen1 */
- pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
+ tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */
+ pcie_capability_write_word(rdev->pdev, PCI_EXP_LNKCTL2, tmp16);
speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 7089dfc8c2a9..110fb38004b1 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -1826,8 +1826,8 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
track->textures[i].use_pitch = 1;
} else {
track->textures[i].use_pitch = 0;
- track->textures[i].width = 1 << ((idx_value >> RADEON_TXFORMAT_WIDTH_SHIFT) & RADEON_TXFORMAT_WIDTH_MASK);
- track->textures[i].height = 1 << ((idx_value >> RADEON_TXFORMAT_HEIGHT_SHIFT) & RADEON_TXFORMAT_HEIGHT_MASK);
+ track->textures[i].width = 1 << ((idx_value & RADEON_TXFORMAT_WIDTH_MASK) >> RADEON_TXFORMAT_WIDTH_SHIFT);
+ track->textures[i].height = 1 << ((idx_value & RADEON_TXFORMAT_HEIGHT_MASK) >> RADEON_TXFORMAT_HEIGHT_SHIFT);
}
if (idx_value & RADEON_TXFORMAT_CUBIC_MAP_ENABLE)
track->textures[i].tex_coord_type = 2;
diff --git a/drivers/gpu/drm/radeon/r200.c b/drivers/gpu/drm/radeon/r200.c
index 840401413c58..f5f2ffea5ab2 100644
--- a/drivers/gpu/drm/radeon/r200.c
+++ b/drivers/gpu/drm/radeon/r200.c
@@ -476,8 +476,8 @@ int r200_packet0_check(struct radeon_cs_parser *p,
track->textures[i].use_pitch = 1;
} else {
track->textures[i].use_pitch = 0;
- track->textures[i].width = 1 << ((idx_value >> RADEON_TXFORMAT_WIDTH_SHIFT) & RADEON_TXFORMAT_WIDTH_MASK);
- track->textures[i].height = 1 << ((idx_value >> RADEON_TXFORMAT_HEIGHT_SHIFT) & RADEON_TXFORMAT_HEIGHT_MASK);
+ track->textures[i].width = 1 << ((idx_value & RADEON_TXFORMAT_WIDTH_MASK) >> RADEON_TXFORMAT_WIDTH_SHIFT);
+ track->textures[i].height = 1 << ((idx_value & RADEON_TXFORMAT_HEIGHT_MASK) >> RADEON_TXFORMAT_HEIGHT_SHIFT);
}
if (idx_value & R200_TXFORMAT_LOOKUP_DISABLE)
track->textures[i].lookup_disable = true;
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index e937cc01910d..033bc466a862 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -3696,8 +3696,8 @@ int r600_irq_init(struct radeon_device *rdev)
}
/* setup interrupt control */
- /* set dummy read address to ring address */
- WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
+ /* set dummy read address to dummy page address */
+ WREG32(INTERRUPT_CNTL2, rdev->dummy_page.addr >> 8);
interrupt_cntl = RREG32(INTERRUPT_CNTL);
/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
* IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index d59b004f6695..30e32adc1fc6 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -68,6 +68,10 @@
#include <linux/hashtable.h>
#include <linux/dma-fence.h>
+#ifdef CONFIG_MMU_NOTIFIER
+#include <linux/mmu_notifier.h>
+#endif
+
#include <drm/ttm/ttm_bo_api.h>
#include <drm/ttm/ttm_bo_driver.h>
#include <drm/ttm/ttm_placement.h>
@@ -509,8 +513,9 @@ struct radeon_bo {
struct ttm_bo_kmap_obj dma_buf_vmap;
pid_t pid;
- struct radeon_mn *mn;
- struct list_head mn_list;
+#ifdef CONFIG_MMU_NOTIFIER
+ struct mmu_interval_notifier notifier;
+#endif
};
#define gem_to_radeon_bo(gobj) container_of((gobj), struct radeon_bo, tbo.base)
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index 41f08321a361..fd74e2611185 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -379,10 +379,6 @@ radeon_pci_remove(struct pci_dev *pdev)
static void
radeon_pci_shutdown(struct pci_dev *pdev)
{
-#ifdef CONFIG_PPC64
- struct drm_device *ddev = pci_get_drvdata(pdev);
-#endif
-
/* if we are running in a VM, make sure the device
* torn down properly on reboot/shutdown
*/
@@ -390,13 +386,14 @@ radeon_pci_shutdown(struct pci_dev *pdev)
radeon_pci_remove(pdev);
#ifdef CONFIG_PPC64
- /* Some adapters need to be suspended before a
+ /*
+ * Some adapters need to be suspended before a
* shutdown occurs in order to prevent an error
* during kexec.
* Make this power specific becauase it breaks
* some non-power boards.
*/
- radeon_suspend_kms(ddev, true, true, false);
+ radeon_suspend_kms(pci_get_drvdata(pdev), true, true, false);
#endif
}
diff --git a/drivers/gpu/drm/radeon/radeon_mn.c b/drivers/gpu/drm/radeon/radeon_mn.c
index dbab9a3a969b..f93829f08a4d 100644
--- a/drivers/gpu/drm/radeon/radeon_mn.c
+++ b/drivers/gpu/drm/radeon/radeon_mn.c
@@ -36,131 +36,51 @@
#include "radeon.h"
-struct radeon_mn {
- struct mmu_notifier mn;
-
- /* objects protected by lock */
- struct mutex lock;
- struct rb_root_cached objects;
-};
-
-struct radeon_mn_node {
- struct interval_tree_node it;
- struct list_head bos;
-};
-
/**
- * radeon_mn_invalidate_range_start - callback to notify about mm change
+ * radeon_mn_invalidate - callback to notify about mm change
*
* @mn: our notifier
- * @mn: the mm this callback is about
- * @start: start of updated range
- * @end: end of updated range
+ * @range: the VMA under invalidation
*
* We block for all BOs between start and end to be idle and
* unmap them by move them into system domain again.
*/
-static int radeon_mn_invalidate_range_start(struct mmu_notifier *mn,
- const struct mmu_notifier_range *range)
+static bool radeon_mn_invalidate(struct mmu_interval_notifier *mn,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq)
{
- struct radeon_mn *rmn = container_of(mn, struct radeon_mn, mn);
+ struct radeon_bo *bo = container_of(mn, struct radeon_bo, notifier);
struct ttm_operation_ctx ctx = { false, false };
- struct interval_tree_node *it;
- unsigned long end;
- int ret = 0;
-
- /* notification is exclusive, but interval is inclusive */
- end = range->end - 1;
-
- /* TODO we should be able to split locking for interval tree and
- * the tear down.
- */
- if (mmu_notifier_range_blockable(range))
- mutex_lock(&rmn->lock);
- else if (!mutex_trylock(&rmn->lock))
- return -EAGAIN;
-
- it = interval_tree_iter_first(&rmn->objects, range->start, end);
- while (it) {
- struct radeon_mn_node *node;
- struct radeon_bo *bo;
- long r;
-
- if (!mmu_notifier_range_blockable(range)) {
- ret = -EAGAIN;
- goto out_unlock;
- }
-
- node = container_of(it, struct radeon_mn_node, it);
- it = interval_tree_iter_next(it, range->start, end);
+ long r;
- list_for_each_entry(bo, &node->bos, mn_list) {
+ if (!bo->tbo.ttm || bo->tbo.ttm->state != tt_bound)
+ return true;
- if (!bo->tbo.ttm || bo->tbo.ttm->state != tt_bound)
- continue;
+ if (!mmu_notifier_range_blockable(range))
+ return false;
- r = radeon_bo_reserve(bo, true);
- if (r) {
- DRM_ERROR("(%ld) failed to reserve user bo\n", r);
- continue;
- }
-
- r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv,
- true, false, MAX_SCHEDULE_TIMEOUT);
- if (r <= 0)
- DRM_ERROR("(%ld) failed to wait for user bo\n", r);
-
- radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_CPU);
- r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
- if (r)
- DRM_ERROR("(%ld) failed to validate user bo\n", r);
-
- radeon_bo_unreserve(bo);
- }
+ r = radeon_bo_reserve(bo, true);
+ if (r) {
+ DRM_ERROR("(%ld) failed to reserve user bo\n", r);
+ return true;
}
-
-out_unlock:
- mutex_unlock(&rmn->lock);
-
- return ret;
-}
-
-static void radeon_mn_release(struct mmu_notifier *mn, struct mm_struct *mm)
-{
- struct mmu_notifier_range range = {
- .mm = mm,
- .start = 0,
- .end = ULONG_MAX,
- .flags = 0,
- .event = MMU_NOTIFY_UNMAP,
- };
-
- radeon_mn_invalidate_range_start(mn, &range);
-}
-
-static struct mmu_notifier *radeon_mn_alloc_notifier(struct mm_struct *mm)
-{
- struct radeon_mn *rmn;
- rmn = kzalloc(sizeof(*rmn), GFP_KERNEL);
- if (!rmn)
- return ERR_PTR(-ENOMEM);
+ r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv, true, false,
+ MAX_SCHEDULE_TIMEOUT);
+ if (r <= 0)
+ DRM_ERROR("(%ld) failed to wait for user bo\n", r);
- mutex_init(&rmn->lock);
- rmn->objects = RB_ROOT_CACHED;
- return &rmn->mn;
-}
+ radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_CPU);
+ r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (r)
+ DRM_ERROR("(%ld) failed to validate user bo\n", r);
-static void radeon_mn_free_notifier(struct mmu_notifier *mn)
-{
- kfree(container_of(mn, struct radeon_mn, mn));
+ radeon_bo_unreserve(bo);
+ return true;
}
-static const struct mmu_notifier_ops radeon_mn_ops = {
- .release = radeon_mn_release,
- .invalidate_range_start = radeon_mn_invalidate_range_start,
- .alloc_notifier = radeon_mn_alloc_notifier,
- .free_notifier = radeon_mn_free_notifier,
+static const struct mmu_interval_notifier_ops radeon_mn_ops = {
+ .invalidate = radeon_mn_invalidate,
};
/**
@@ -174,51 +94,20 @@ static const struct mmu_notifier_ops radeon_mn_ops = {
*/
int radeon_mn_register(struct radeon_bo *bo, unsigned long addr)
{
- unsigned long end = addr + radeon_bo_size(bo) - 1;
- struct mmu_notifier *mn;
- struct radeon_mn *rmn;
- struct radeon_mn_node *node = NULL;
- struct list_head bos;
- struct interval_tree_node *it;
-
- mn = mmu_notifier_get(&radeon_mn_ops, current->mm);
- if (IS_ERR(mn))
- return PTR_ERR(mn);
- rmn = container_of(mn, struct radeon_mn, mn);
-
- INIT_LIST_HEAD(&bos);
-
- mutex_lock(&rmn->lock);
-
- while ((it = interval_tree_iter_first(&rmn->objects, addr, end))) {
- kfree(node);
- node = container_of(it, struct radeon_mn_node, it);
- interval_tree_remove(&node->it, &rmn->objects);
- addr = min(it->start, addr);
- end = max(it->last, end);
- list_splice(&node->bos, &bos);
- }
-
- if (!node) {
- node = kmalloc(sizeof(struct radeon_mn_node), GFP_KERNEL);
- if (!node) {
- mutex_unlock(&rmn->lock);
- return -ENOMEM;
- }
- }
-
- bo->mn = rmn;
-
- node->it.start = addr;
- node->it.last = end;
- INIT_LIST_HEAD(&node->bos);
- list_splice(&bos, &node->bos);
- list_add(&bo->mn_list, &node->bos);
-
- interval_tree_insert(&node->it, &rmn->objects);
-
- mutex_unlock(&rmn->lock);
-
+ int ret;
+
+ ret = mmu_interval_notifier_insert(&bo->notifier, current->mm, addr,
+ radeon_bo_size(bo), &radeon_mn_ops);
+ if (ret)
+ return ret;
+
+ /*
+ * FIXME: radeon appears to allow get_user_pages to run during
+ * invalidate_range_start/end, which is not a safe way to read the
+ * PTEs. It should use the mmu_interval_read_begin() scheme around the
+ * get_user_pages to ensure that the PTEs are read properly
+ */
+ mmu_interval_read_begin(&bo->notifier);
return 0;
}
@@ -231,27 +120,8 @@ int radeon_mn_register(struct radeon_bo *bo, unsigned long addr)
*/
void radeon_mn_unregister(struct radeon_bo *bo)
{
- struct radeon_mn *rmn = bo->mn;
- struct list_head *head;
-
- if (!rmn)
+ if (!bo->notifier.mm)
return;
-
- mutex_lock(&rmn->lock);
- /* save the next list entry for later */
- head = bo->mn_list.next;
-
- list_del(&bo->mn_list);
-
- if (list_empty(head)) {
- struct radeon_mn_node *node;
- node = container_of(head, struct radeon_mn_node, bos);
- interval_tree_remove(&node->it, &rmn->objects);
- kfree(node);
- }
-
- mutex_unlock(&rmn->lock);
-
- mmu_notifier_put(&rmn->mn);
- bo->mn = NULL;
+ mmu_interval_notifier_remove(&bo->notifier);
+ bo->notifier.mm = NULL;
}
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index 05894d198a79..d7eea75b2c27 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -3257,7 +3257,7 @@ static void si_gpu_init(struct radeon_device *rdev)
/* XXX what about 12? */
rdev->config.si.tile_config |= (3 << 0);
break;
- }
+ }
switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
case 0: /* four banks */
rdev->config.si.tile_config |= 0 << 4;
@@ -5997,8 +5997,8 @@ static int si_irq_init(struct radeon_device *rdev)
}
/* setup interrupt control */
- /* set dummy read address to ring address */
- WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
+ /* set dummy read address to dummy page address */
+ WREG32(INTERRUPT_CNTL2, rdev->dummy_page.addr >> 8);
interrupt_cntl = RREG32(INTERRUPT_CNTL);
/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
* IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
@@ -7087,7 +7087,6 @@ static void si_pcie_gen3_enable(struct radeon_device *rdev)
{
struct pci_dev *root = rdev->pdev->bus->self;
enum pci_bus_speed speed_cap;
- int bridge_pos, gpu_pos;
u32 speed_cntl, current_data_rate;
int i;
u16 tmp16;
@@ -7129,12 +7128,7 @@ static void si_pcie_gen3_enable(struct radeon_device *rdev)
DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
}
- bridge_pos = pci_pcie_cap(root);
- if (!bridge_pos)
- return;
-
- gpu_pos = pci_pcie_cap(rdev->pdev);
- if (!gpu_pos)
+ if (!pci_is_pcie(root) || !pci_is_pcie(rdev->pdev))
return;
if (speed_cap == PCIE_SPEED_8_0GT) {
@@ -7144,14 +7138,17 @@ static void si_pcie_gen3_enable(struct radeon_device *rdev)
u16 bridge_cfg2, gpu_cfg2;
u32 max_lw, current_lw, tmp;
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
- pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+ &bridge_cfg);
+ pcie_capability_read_word(rdev->pdev, PCI_EXP_LNKCTL,
+ &gpu_cfg);
tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
- pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
+ pcie_capability_write_word(root, PCI_EXP_LNKCTL, tmp16);
tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
- pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
+ pcie_capability_write_word(rdev->pdev, PCI_EXP_LNKCTL,
+ tmp16);
tmp = RREG32_PCIE(PCIE_LC_STATUS1);
max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
@@ -7169,15 +7166,23 @@ static void si_pcie_gen3_enable(struct radeon_device *rdev)
for (i = 0; i < 10; i++) {
/* check status */
- pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
+ pcie_capability_read_word(rdev->pdev,
+ PCI_EXP_DEVSTA,
+ &tmp16);
if (tmp16 & PCI_EXP_DEVSTA_TRPND)
break;
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
- pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+ &bridge_cfg);
+ pcie_capability_read_word(rdev->pdev,
+ PCI_EXP_LNKCTL,
+ &gpu_cfg);
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
- pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
+ &bridge_cfg2);
+ pcie_capability_read_word(rdev->pdev,
+ PCI_EXP_LNKCTL2,
+ &gpu_cfg2);
tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
tmp |= LC_SET_QUIESCE;
@@ -7190,26 +7195,46 @@ static void si_pcie_gen3_enable(struct radeon_device *rdev)
msleep(100);
/* linkctl */
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+ &tmp16);
tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
- pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
+ pcie_capability_write_word(root,
+ PCI_EXP_LNKCTL,
+ tmp16);
- pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
+ pcie_capability_read_word(rdev->pdev,
+ PCI_EXP_LNKCTL,
+ &tmp16);
tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
- pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
+ pcie_capability_write_word(rdev->pdev,
+ PCI_EXP_LNKCTL,
+ tmp16);
/* linkctl2 */
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
- tmp16 &= ~((1 << 4) | (7 << 9));
- tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
- pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
-
- pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
- tmp16 &= ~((1 << 4) | (7 << 9));
- tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
- pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
+ &tmp16);
+ tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN);
+ tmp16 |= (bridge_cfg2 &
+ (PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN));
+ pcie_capability_write_word(root,
+ PCI_EXP_LNKCTL2,
+ tmp16);
+
+ pcie_capability_read_word(rdev->pdev,
+ PCI_EXP_LNKCTL2,
+ &tmp16);
+ tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN);
+ tmp16 |= (gpu_cfg2 &
+ (PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN));
+ pcie_capability_write_word(rdev->pdev,
+ PCI_EXP_LNKCTL2,
+ tmp16);
tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
tmp &= ~LC_SET_QUIESCE;
@@ -7223,15 +7248,15 @@ static void si_pcie_gen3_enable(struct radeon_device *rdev)
speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
- pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
- tmp16 &= ~0xf;
+ pcie_capability_read_word(rdev->pdev, PCI_EXP_LNKCTL2, &tmp16);
+ tmp16 &= ~PCI_EXP_LNKCTL2_TLS;
if (speed_cap == PCIE_SPEED_8_0GT)
- tmp16 |= 3; /* gen3 */
+ tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */
else if (speed_cap == PCIE_SPEED_5_0GT)
- tmp16 |= 2; /* gen2 */
+ tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */
else
- tmp16 |= 1; /* gen1 */
- pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
+ tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */
+ pcie_capability_write_word(rdev->pdev, PCI_EXP_LNKCTL2, tmp16);
speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c
index fad72799b8df..42651d737c55 100644
--- a/drivers/gpu/drm/sun4i/sun4i_tcon.c
+++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c
@@ -489,7 +489,7 @@ static void sun4i_tcon0_mode_set_rgb(struct sun4i_tcon *tcon,
WARN_ON(!tcon->quirks->has_channel_0);
- tcon->dclk_min_div = 6;
+ tcon->dclk_min_div = 1;
tcon->dclk_max_div = 127;
sun4i_tcon0_mode_set_common(tcon, mode);
diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
index 5b1f9ff97576..714af052fbef 100644
--- a/drivers/gpu/drm/tegra/dc.c
+++ b/drivers/gpu/drm/tegra/dc.c
@@ -837,16 +837,15 @@ static int tegra_cursor_atomic_check(struct drm_plane *plane,
static void tegra_cursor_atomic_update(struct drm_plane *plane,
struct drm_plane_state *old_state)
{
- struct tegra_bo *bo = tegra_fb_get_plane(plane->state->fb, 0);
+ struct tegra_plane_state *state = to_tegra_plane_state(plane->state);
struct tegra_dc *dc = to_tegra_dc(plane->state->crtc);
- struct drm_plane_state *state = plane->state;
u32 value = CURSOR_CLIP_DISPLAY;
/* rien ne va plus */
if (!plane->state->crtc || !plane->state->fb)
return;
- switch (state->crtc_w) {
+ switch (plane->state->crtc_w) {
case 32:
value |= CURSOR_SIZE_32x32;
break;
@@ -864,16 +863,16 @@ static void tegra_cursor_atomic_update(struct drm_plane *plane,
break;
default:
- WARN(1, "cursor size %ux%u not supported\n", state->crtc_w,
- state->crtc_h);
+ WARN(1, "cursor size %ux%u not supported\n",
+ plane->state->crtc_w, plane->state->crtc_h);
return;
}
- value |= (bo->iova >> 10) & 0x3fffff;
+ value |= (state->iova[0] >> 10) & 0x3fffff;
tegra_dc_writel(dc, value, DC_DISP_CURSOR_START_ADDR);
#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
- value = (bo->iova >> 32) & 0x3;
+ value = (state->iova[0] >> 32) & 0x3;
tegra_dc_writel(dc, value, DC_DISP_CURSOR_START_ADDR_HI);
#endif
@@ -892,7 +891,8 @@ static void tegra_cursor_atomic_update(struct drm_plane *plane,
tegra_dc_writel(dc, value, DC_DISP_BLEND_CURSOR_CONTROL);
/* position the cursor */
- value = (state->crtc_y & 0x3fff) << 16 | (state->crtc_x & 0x3fff);
+ value = (plane->state->crtc_y & 0x3fff) << 16 |
+ (plane->state->crtc_x & 0x3fff);
tegra_dc_writel(dc, value, DC_DISP_CURSOR_POSITION);
}
@@ -2017,7 +2017,7 @@ static int tegra_dc_init(struct host1x_client *client)
dev_warn(dc->dev, "failed to allocate syncpoint\n");
err = host1x_client_iommu_attach(client);
- if (err < 0) {
+ if (err < 0 && err != -ENODEV) {
dev_err(client->dev, "failed to attach to domain: %d\n", err);
return err;
}
diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index 56e5e7a5c108..f455ce71e85d 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -920,10 +920,8 @@ int host1x_client_iommu_attach(struct host1x_client *client)
if (tegra->domain) {
group = iommu_group_get(client->dev);
- if (!group) {
- dev_err(client->dev, "failed to get IOMMU group\n");
+ if (!group)
return -ENODEV;
- }
if (domain != tegra->domain) {
err = iommu_attach_group(tegra->domain, group);
@@ -1243,6 +1241,9 @@ static int host1x_drm_remove(struct host1x_device *dev)
drm_atomic_helper_shutdown(drm);
drm_mode_config_cleanup(drm);
+ if (tegra->hub)
+ tegra_display_hub_cleanup(tegra->hub);
+
err = host1x_device_exit(dev);
if (err < 0)
dev_err(&dev->dev, "host1x device cleanup failed: %d\n", err);
diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c
index 746dae32c484..bc15b430156d 100644
--- a/drivers/gpu/drm/tegra/gem.c
+++ b/drivers/gpu/drm/tegra/gem.c
@@ -27,6 +27,29 @@ static void tegra_bo_put(struct host1x_bo *bo)
drm_gem_object_put_unlocked(&obj->gem);
}
+/* XXX move this into lib/scatterlist.c? */
+static int sg_alloc_table_from_sg(struct sg_table *sgt, struct scatterlist *sg,
+ unsigned int nents, gfp_t gfp_mask)
+{
+ struct scatterlist *dst;
+ unsigned int i;
+ int err;
+
+ err = sg_alloc_table(sgt, nents, gfp_mask);
+ if (err < 0)
+ return err;
+
+ dst = sgt->sgl;
+
+ for (i = 0; i < nents; i++) {
+ sg_set_page(dst, sg_page(sg), sg->length, 0);
+ dst = sg_next(dst);
+ sg = sg_next(sg);
+ }
+
+ return 0;
+}
+
static struct sg_table *tegra_bo_pin(struct device *dev, struct host1x_bo *bo,
dma_addr_t *phys)
{
@@ -52,11 +75,31 @@ static struct sg_table *tegra_bo_pin(struct device *dev, struct host1x_bo *bo,
return ERR_PTR(-ENOMEM);
if (obj->pages) {
+ /*
+ * If the buffer object was allocated from the explicit IOMMU
+ * API code paths, construct an SG table from the pages.
+ */
err = sg_alloc_table_from_pages(sgt, obj->pages, obj->num_pages,
0, obj->gem.size, GFP_KERNEL);
if (err < 0)
goto free;
+ } else if (obj->sgt) {
+ /*
+ * If the buffer object already has an SG table but no pages
+ * were allocated for it, it means the buffer was imported and
+ * the SG table needs to be copied to avoid overwriting any
+ * other potential users of the original SG table.
+ */
+ err = sg_alloc_table_from_sg(sgt, obj->sgt->sgl, obj->sgt->nents,
+ GFP_KERNEL);
+ if (err < 0)
+ goto free;
} else {
+ /*
+ * If the buffer object had no pages allocated and if it was
+ * not imported, it had to be allocated with the DMA API, so
+ * the DMA API helper can be used.
+ */
err = dma_get_sgtable(dev, sgt, obj->vaddr, obj->iova,
obj->gem.size);
if (err < 0)
@@ -397,13 +440,6 @@ static struct tegra_bo *tegra_bo_import(struct drm_device *drm,
err = tegra_bo_iommu_map(tegra, bo);
if (err < 0)
goto detach;
- } else {
- if (bo->sgt->nents > 1) {
- err = -EINVAL;
- goto detach;
- }
-
- bo->iova = sg_dma_address(bo->sgt->sgl);
}
bo->gem.import_attach = attach;
diff --git a/drivers/gpu/drm/tegra/hub.c b/drivers/gpu/drm/tegra/hub.c
index 2b4082d0bc9e..47d985ac7cd7 100644
--- a/drivers/gpu/drm/tegra/hub.c
+++ b/drivers/gpu/drm/tegra/hub.c
@@ -605,11 +605,8 @@ static struct tegra_display_hub_state *
tegra_display_hub_get_state(struct tegra_display_hub *hub,
struct drm_atomic_state *state)
{
- struct drm_device *drm = dev_get_drvdata(hub->client.parent);
struct drm_private_state *priv;
- WARN_ON(!drm_modeset_is_locked(&drm->mode_config.connection_mutex));
-
priv = drm_atomic_get_private_obj_state(state, &hub->base);
if (IS_ERR(priv))
return ERR_CAST(priv);
diff --git a/drivers/gpu/drm/tegra/plane.c b/drivers/gpu/drm/tegra/plane.c
index 163b590be224..cadcdd9ea427 100644
--- a/drivers/gpu/drm/tegra/plane.c
+++ b/drivers/gpu/drm/tegra/plane.c
@@ -129,6 +129,17 @@ static int tegra_dc_pin(struct tegra_dc *dc, struct tegra_plane_state *state)
goto unpin;
}
+ /*
+ * The display controller needs contiguous memory, so
+ * fail if the buffer is discontiguous and we fail to
+ * map its SG table to a single contiguous chunk of
+ * I/O virtual memory.
+ */
+ if (err > 1) {
+ err = -EINVAL;
+ goto unpin;
+ }
+
state->iova[i] = sg_dma_address(sgt->sgl);
state->sgt[i] = sgt;
} else {
diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c
index 615cb319fa8b..a68d3b36b972 100644
--- a/drivers/gpu/drm/tegra/sor.c
+++ b/drivers/gpu/drm/tegra/sor.c
@@ -3912,8 +3912,7 @@ static int tegra_sor_remove(struct platform_device *pdev)
return 0;
}
-#ifdef CONFIG_PM
-static int tegra_sor_suspend(struct device *dev)
+static int tegra_sor_runtime_suspend(struct device *dev)
{
struct tegra_sor *sor = dev_get_drvdata(dev);
int err;
@@ -3935,7 +3934,7 @@ static int tegra_sor_suspend(struct device *dev)
return 0;
}
-static int tegra_sor_resume(struct device *dev)
+static int tegra_sor_runtime_resume(struct device *dev)
{
struct tegra_sor *sor = dev_get_drvdata(dev);
int err;
@@ -3967,10 +3966,39 @@ static int tegra_sor_resume(struct device *dev)
return 0;
}
-#endif
+
+static int tegra_sor_suspend(struct device *dev)
+{
+ struct tegra_sor *sor = dev_get_drvdata(dev);
+ int err;
+
+ if (sor->hdmi_supply) {
+ err = regulator_disable(sor->hdmi_supply);
+ if (err < 0)
+ return err;
+ }
+
+ return 0;
+}
+
+static int tegra_sor_resume(struct device *dev)
+{
+ struct tegra_sor *sor = dev_get_drvdata(dev);
+ int err;
+
+ if (sor->hdmi_supply) {
+ err = regulator_enable(sor->hdmi_supply);
+ if (err < 0)
+ return err;
+ }
+
+ return 0;
+}
static const struct dev_pm_ops tegra_sor_pm_ops = {
- SET_RUNTIME_PM_OPS(tegra_sor_suspend, tegra_sor_resume, NULL)
+ SET_RUNTIME_PM_OPS(tegra_sor_runtime_suspend, tegra_sor_runtime_resume,
+ NULL)
+ SET_SYSTEM_SLEEP_PM_OPS(tegra_sor_suspend, tegra_sor_resume)
};
struct platform_driver tegra_sor_driver = {
diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c
index 9444ba183990..3526c2892ddb 100644
--- a/drivers/gpu/drm/tegra/vic.c
+++ b/drivers/gpu/drm/tegra/vic.c
@@ -167,7 +167,7 @@ static int vic_init(struct host1x_client *client)
int err;
err = host1x_client_iommu_attach(client);
- if (err < 0) {
+ if (err < 0 && err != -ENODEV) {
dev_err(vic->dev, "failed to attach to domain: %d\n", err);
return err;
}
@@ -386,13 +386,14 @@ static const struct vic_config vic_t194_config = {
.supports_sid = true,
};
-static const struct of_device_id vic_match[] = {
+static const struct of_device_id tegra_vic_of_match[] = {
{ .compatible = "nvidia,tegra124-vic", .data = &vic_t124_config },
{ .compatible = "nvidia,tegra210-vic", .data = &vic_t210_config },
{ .compatible = "nvidia,tegra186-vic", .data = &vic_t186_config },
{ .compatible = "nvidia,tegra194-vic", .data = &vic_t194_config },
{ },
};
+MODULE_DEVICE_TABLE(of, tegra_vic_of_match);
static int vic_probe(struct platform_device *pdev)
{
@@ -516,7 +517,7 @@ static const struct dev_pm_ops vic_pm_ops = {
struct platform_driver tegra_vic_driver = {
.driver = {
.name = "tegra-vic",
- .of_match_table = vic_match,
+ .of_match_table = tegra_vic_of_match,
.pm = &vic_pm_ops
},
.probe = vic_probe,
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index 4b34a278d65b..11863fbdd5d6 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -42,8 +42,6 @@
#include <linux/uaccess.h>
#include <linux/mem_encrypt.h>
-#define TTM_BO_VM_NUM_PREFAULT 16
-
static vm_fault_t ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo,
struct vm_fault *vmf)
{
@@ -106,25 +104,30 @@ static unsigned long ttm_bo_io_mem_pfn(struct ttm_buffer_object *bo,
+ page_offset;
}
-static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
+/**
+ * ttm_bo_vm_reserve - Reserve a buffer object in a retryable vm callback
+ * @bo: The buffer object
+ * @vmf: The fault structure handed to the callback
+ *
+ * vm callbacks like fault() and *_mkwrite() allow for the mm_sem to be dropped
+ * during long waits, and after the wait the callback will be restarted. This
+ * is to allow other threads using the same virtual memory space concurrent
+ * access to map(), unmap() completely unrelated buffer objects. TTM buffer
+ * object reservations sometimes wait for GPU and should therefore be
+ * considered long waits. This function reserves the buffer object interruptibly
+ * taking this into account. Starvation is avoided by the vm system not
+ * allowing too many repeated restarts.
+ * This function is intended to be used in customized fault() and _mkwrite()
+ * handlers.
+ *
+ * Return:
+ * 0 on success and the bo was reserved.
+ * VM_FAULT_RETRY if blocking wait.
+ * VM_FAULT_NOPAGE if blocking wait and retrying was not allowed.
+ */
+vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo,
+ struct vm_fault *vmf)
{
- struct vm_area_struct *vma = vmf->vma;
- struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
- vma->vm_private_data;
- struct ttm_bo_device *bdev = bo->bdev;
- unsigned long page_offset;
- unsigned long page_last;
- unsigned long pfn;
- struct ttm_tt *ttm = NULL;
- struct page *page;
- int err;
- int i;
- vm_fault_t ret = VM_FAULT_NOPAGE;
- unsigned long address = vmf->address;
- struct ttm_mem_type_manager *man =
- &bdev->man[bo->mem.mem_type];
- struct vm_area_struct cvma;
-
/*
* Work around locking order reversal in fault / nopfn
* between mmap_sem and bo_reserve: Perform a trylock operation
@@ -151,14 +154,54 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
return VM_FAULT_NOPAGE;
}
+ return 0;
+}
+EXPORT_SYMBOL(ttm_bo_vm_reserve);
+
+/**
+ * ttm_bo_vm_fault_reserved - TTM fault helper
+ * @vmf: The struct vm_fault given as argument to the fault callback
+ * @prot: The page protection to be used for this memory area.
+ * @num_prefault: Maximum number of prefault pages. The caller may want to
+ * specify this based on madvice settings and the size of the GPU object
+ * backed by the memory.
+ *
+ * This function inserts one or more page table entries pointing to the
+ * memory backing the buffer object, and then returns a return code
+ * instructing the caller to retry the page access.
+ *
+ * Return:
+ * VM_FAULT_NOPAGE on success or pending signal
+ * VM_FAULT_SIGBUS on unspecified error
+ * VM_FAULT_OOM on out-of-memory
+ * VM_FAULT_RETRY if retryable wait
+ */
+vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
+ pgprot_t prot,
+ pgoff_t num_prefault)
+{
+ struct vm_area_struct *vma = vmf->vma;
+ struct vm_area_struct cvma = *vma;
+ struct ttm_buffer_object *bo = vma->vm_private_data;
+ struct ttm_bo_device *bdev = bo->bdev;
+ unsigned long page_offset;
+ unsigned long page_last;
+ unsigned long pfn;
+ struct ttm_tt *ttm = NULL;
+ struct page *page;
+ int err;
+ pgoff_t i;
+ vm_fault_t ret = VM_FAULT_NOPAGE;
+ unsigned long address = vmf->address;
+ struct ttm_mem_type_manager *man =
+ &bdev->man[bo->mem.mem_type];
+
/*
* Refuse to fault imported pages. This should be handled
* (if at all) by redirecting mmap to the exporter.
*/
- if (bo->ttm && (bo->ttm->page_flags & TTM_PAGE_FLAG_SG)) {
- ret = VM_FAULT_SIGBUS;
- goto out_unlock;
- }
+ if (bo->ttm && (bo->ttm->page_flags & TTM_PAGE_FLAG_SG))
+ return VM_FAULT_SIGBUS;
if (bdev->driver->fault_reserve_notify) {
struct dma_fence *moving = dma_fence_get(bo->moving);
@@ -169,11 +212,9 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
break;
case -EBUSY:
case -ERESTARTSYS:
- ret = VM_FAULT_NOPAGE;
- goto out_unlock;
+ return VM_FAULT_NOPAGE;
default:
- ret = VM_FAULT_SIGBUS;
- goto out_unlock;
+ return VM_FAULT_SIGBUS;
}
if (bo->moving != moving) {
@@ -189,21 +230,12 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
* move.
*/
ret = ttm_bo_vm_fault_idle(bo, vmf);
- if (unlikely(ret != 0)) {
- if (ret == VM_FAULT_RETRY &&
- !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
- /* The BO has already been unreserved. */
- return ret;
- }
-
- goto out_unlock;
- }
+ if (unlikely(ret != 0))
+ return ret;
err = ttm_mem_io_lock(man, true);
- if (unlikely(err != 0)) {
- ret = VM_FAULT_NOPAGE;
- goto out_unlock;
- }
+ if (unlikely(err != 0))
+ return VM_FAULT_NOPAGE;
err = ttm_mem_io_reserve_vm(bo);
if (unlikely(err != 0)) {
ret = VM_FAULT_SIGBUS;
@@ -220,18 +252,8 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
goto out_io_unlock;
}
- /*
- * Make a local vma copy to modify the page_prot member
- * and vm_flags if necessary. The vma parameter is protected
- * by mmap_sem in write mode.
- */
- cvma = *vma;
- cvma.vm_page_prot = vm_get_page_prot(cvma.vm_flags);
-
- if (bo->mem.bus.is_iomem) {
- cvma.vm_page_prot = ttm_io_prot(bo->mem.placement,
- cvma.vm_page_prot);
- } else {
+ cvma.vm_page_prot = ttm_io_prot(bo->mem.placement, prot);
+ if (!bo->mem.bus.is_iomem) {
struct ttm_operation_ctx ctx = {
.interruptible = false,
.no_wait_gpu = false,
@@ -240,24 +262,21 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
};
ttm = bo->ttm;
- cvma.vm_page_prot = ttm_io_prot(bo->mem.placement,
- cvma.vm_page_prot);
-
- /* Allocate all page at once, most common usage */
- if (ttm_tt_populate(ttm, &ctx)) {
+ if (ttm_tt_populate(bo->ttm, &ctx)) {
ret = VM_FAULT_OOM;
goto out_io_unlock;
}
+ } else {
+ /* Iomem should not be marked encrypted */
+ cvma.vm_page_prot = pgprot_decrypted(cvma.vm_page_prot);
}
/*
* Speculatively prefault a number of pages. Only error on
* first page.
*/
- for (i = 0; i < TTM_BO_VM_NUM_PREFAULT; ++i) {
+ for (i = 0; i < num_prefault; ++i) {
if (bo->mem.bus.is_iomem) {
- /* Iomem should not be marked encrypted */
- cvma.vm_page_prot = pgprot_decrypted(cvma.vm_page_prot);
pfn = ttm_bo_io_mem_pfn(bo, page_offset);
} else {
page = ttm->pages[page_offset];
@@ -293,28 +312,49 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
ret = VM_FAULT_NOPAGE;
out_io_unlock:
ttm_mem_io_unlock(man);
-out_unlock:
+ return ret;
+}
+EXPORT_SYMBOL(ttm_bo_vm_fault_reserved);
+
+static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
+{
+ struct vm_area_struct *vma = vmf->vma;
+ pgprot_t prot;
+ struct ttm_buffer_object *bo = vma->vm_private_data;
+ vm_fault_t ret;
+
+ ret = ttm_bo_vm_reserve(bo, vmf);
+ if (ret)
+ return ret;
+
+ prot = vm_get_page_prot(vma->vm_flags);
+ ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT);
+ if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
+ return ret;
+
dma_resv_unlock(bo->base.resv);
+
return ret;
}
-static void ttm_bo_vm_open(struct vm_area_struct *vma)
+void ttm_bo_vm_open(struct vm_area_struct *vma)
{
- struct ttm_buffer_object *bo =
- (struct ttm_buffer_object *)vma->vm_private_data;
+ struct ttm_buffer_object *bo = vma->vm_private_data;
WARN_ON(bo->bdev->dev_mapping != vma->vm_file->f_mapping);
ttm_bo_get(bo);
}
+EXPORT_SYMBOL(ttm_bo_vm_open);
-static void ttm_bo_vm_close(struct vm_area_struct *vma)
+void ttm_bo_vm_close(struct vm_area_struct *vma)
{
- struct ttm_buffer_object *bo = (struct ttm_buffer_object *)vma->vm_private_data;
+ struct ttm_buffer_object *bo = vma->vm_private_data;
ttm_bo_put(bo);
vma->vm_private_data = NULL;
}
+EXPORT_SYMBOL(ttm_bo_vm_close);
static int ttm_bo_vm_access_kmap(struct ttm_buffer_object *bo,
unsigned long offset,
diff --git a/drivers/gpu/drm/vmwgfx/Kconfig b/drivers/gpu/drm/vmwgfx/Kconfig
index 6b28a326f8bb..15acdf2a7c0f 100644
--- a/drivers/gpu/drm/vmwgfx/Kconfig
+++ b/drivers/gpu/drm/vmwgfx/Kconfig
@@ -8,6 +8,7 @@ config DRM_VMWGFX
select FB_CFB_IMAGEBLIT
select DRM_TTM
select FB
+ select MAPPING_DIRTY_HELPERS
# Only needed for the transitional use of drm_crtc_init - can be removed
# again once vmwgfx sets up the primary plane itself.
select DRM_KMS_HELPER
diff --git a/drivers/gpu/drm/vmwgfx/Makefile b/drivers/gpu/drm/vmwgfx/Makefile
index 8841bd30e1e5..c877a21a0739 100644
--- a/drivers/gpu/drm/vmwgfx/Makefile
+++ b/drivers/gpu/drm/vmwgfx/Makefile
@@ -8,7 +8,7 @@ vmwgfx-y := vmwgfx_execbuf.o vmwgfx_gmr.o vmwgfx_kms.o vmwgfx_drv.o \
vmwgfx_cmdbuf_res.o vmwgfx_cmdbuf.o vmwgfx_stdu.o \
vmwgfx_cotable.o vmwgfx_so.o vmwgfx_binding.o vmwgfx_msg.o \
vmwgfx_simple_resource.o vmwgfx_va.o vmwgfx_blit.o \
- vmwgfx_validation.o \
+ vmwgfx_validation.o vmwgfx_page_dirty.o \
ttm_object.o ttm_lock.o
obj-$(CONFIG_DRM_VMWGFX) := vmwgfx.o
diff --git a/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h b/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h
index f2bfd3d80598..61414f105c67 100644
--- a/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h
+++ b/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h
@@ -1280,7 +1280,6 @@ svga3dsurface_get_pixel_offset(SVGA3dSurfaceFormat format,
return offset;
}
-
static inline u32
svga3dsurface_get_image_offset(SVGA3dSurfaceFormat format,
surf_size_struct baseLevelSize,
@@ -1375,4 +1374,236 @@ svga3dsurface_is_screen_target_format(SVGA3dSurfaceFormat format)
return svga3dsurface_is_dx_screen_target_format(format);
}
+/**
+ * struct svga3dsurface_mip - Mimpmap level information
+ * @bytes: Bytes required in the backing store of this mipmap level.
+ * @img_stride: Byte stride per image.
+ * @row_stride: Byte stride per block row.
+ * @size: The size of the mipmap.
+ */
+struct svga3dsurface_mip {
+ size_t bytes;
+ size_t img_stride;
+ size_t row_stride;
+ struct drm_vmw_size size;
+
+};
+
+/**
+ * struct svga3dsurface_cache - Cached surface information
+ * @desc: Pointer to the surface descriptor
+ * @mip: Array of mipmap level information. Valid size is @num_mip_levels.
+ * @mip_chain_bytes: Bytes required in the backing store for the whole chain
+ * of mip levels.
+ * @sheet_bytes: Bytes required in the backing store for a sheet
+ * representing a single sample.
+ * @num_mip_levels: Valid size of the @mip array. Number of mipmap levels in
+ * a chain.
+ * @num_layers: Number of slices in an array texture or number of faces in
+ * a cubemap texture.
+ */
+struct svga3dsurface_cache {
+ const struct svga3d_surface_desc *desc;
+ struct svga3dsurface_mip mip[DRM_VMW_MAX_MIP_LEVELS];
+ size_t mip_chain_bytes;
+ size_t sheet_bytes;
+ u32 num_mip_levels;
+ u32 num_layers;
+};
+
+/**
+ * struct svga3dsurface_loc - Surface location
+ * @sub_resource: Surface subresource. Defined as layer * num_mip_levels +
+ * mip_level.
+ * @x: X coordinate.
+ * @y: Y coordinate.
+ * @z: Z coordinate.
+ */
+struct svga3dsurface_loc {
+ u32 sub_resource;
+ u32 x, y, z;
+};
+
+/**
+ * svga3dsurface_subres - Compute the subresource from layer and mipmap.
+ * @cache: Surface layout data.
+ * @mip_level: The mipmap level.
+ * @layer: The surface layer (face or array slice).
+ *
+ * Return: The subresource.
+ */
+static inline u32 svga3dsurface_subres(const struct svga3dsurface_cache *cache,
+ u32 mip_level, u32 layer)
+{
+ return cache->num_mip_levels * layer + mip_level;
+}
+
+/**
+ * svga3dsurface_setup_cache - Build a surface cache entry
+ * @size: The surface base level dimensions.
+ * @format: The surface format.
+ * @num_mip_levels: Number of mipmap levels.
+ * @num_layers: Number of layers.
+ * @cache: Pointer to a struct svga3dsurface_cach object to be filled in.
+ *
+ * Return: Zero on success, -EINVAL on invalid surface layout.
+ */
+static inline int svga3dsurface_setup_cache(const struct drm_vmw_size *size,
+ SVGA3dSurfaceFormat format,
+ u32 num_mip_levels,
+ u32 num_layers,
+ u32 num_samples,
+ struct svga3dsurface_cache *cache)
+{
+ const struct svga3d_surface_desc *desc;
+ u32 i;
+
+ memset(cache, 0, sizeof(*cache));
+ cache->desc = desc = svga3dsurface_get_desc(format);
+ cache->num_mip_levels = num_mip_levels;
+ cache->num_layers = num_layers;
+ for (i = 0; i < cache->num_mip_levels; i++) {
+ struct svga3dsurface_mip *mip = &cache->mip[i];
+
+ mip->size = svga3dsurface_get_mip_size(*size, i);
+ mip->bytes = svga3dsurface_get_image_buffer_size
+ (desc, &mip->size, 0);
+ mip->row_stride =
+ __KERNEL_DIV_ROUND_UP(mip->size.width,
+ desc->block_size.width) *
+ desc->bytes_per_block * num_samples;
+ if (!mip->row_stride)
+ goto invalid_dim;
+
+ mip->img_stride =
+ __KERNEL_DIV_ROUND_UP(mip->size.height,
+ desc->block_size.height) *
+ mip->row_stride;
+ if (!mip->img_stride)
+ goto invalid_dim;
+
+ cache->mip_chain_bytes += mip->bytes;
+ }
+ cache->sheet_bytes = cache->mip_chain_bytes * num_layers;
+ if (!cache->sheet_bytes)
+ goto invalid_dim;
+
+ return 0;
+
+invalid_dim:
+ VMW_DEBUG_USER("Invalid surface layout for dirty tracking.\n");
+ return -EINVAL;
+}
+
+/**
+ * svga3dsurface_get_loc - Get a surface location from an offset into the
+ * backing store
+ * @cache: Surface layout data.
+ * @loc: Pointer to a struct svga3dsurface_loc to be filled in.
+ * @offset: Offset into the surface backing store.
+ */
+static inline void
+svga3dsurface_get_loc(const struct svga3dsurface_cache *cache,
+ struct svga3dsurface_loc *loc,
+ size_t offset)
+{
+ const struct svga3dsurface_mip *mip = &cache->mip[0];
+ const struct svga3d_surface_desc *desc = cache->desc;
+ u32 layer;
+ int i;
+
+ if (offset >= cache->sheet_bytes)
+ offset %= cache->sheet_bytes;
+
+ layer = offset / cache->mip_chain_bytes;
+ offset -= layer * cache->mip_chain_bytes;
+ for (i = 0; i < cache->num_mip_levels; ++i, ++mip) {
+ if (mip->bytes > offset)
+ break;
+ offset -= mip->bytes;
+ }
+
+ loc->sub_resource = svga3dsurface_subres(cache, i, layer);
+ loc->z = offset / mip->img_stride;
+ offset -= loc->z * mip->img_stride;
+ loc->z *= desc->block_size.depth;
+ loc->y = offset / mip->row_stride;
+ offset -= loc->y * mip->row_stride;
+ loc->y *= desc->block_size.height;
+ loc->x = offset / desc->bytes_per_block;
+ loc->x *= desc->block_size.width;
+}
+
+/**
+ * svga3dsurface_inc_loc - Clamp increment a surface location with one block
+ * size
+ * in each dimension.
+ * @loc: Pointer to a struct svga3dsurface_loc to be incremented.
+ *
+ * When computing the size of a range as size = end - start, the range does not
+ * include the end element. However a location representing the last byte
+ * of a touched region in the backing store *is* included in the range.
+ * This function modifies such a location to match the end definition
+ * given as start + size which is the one used in a SVGA3dBox.
+ */
+static inline void
+svga3dsurface_inc_loc(const struct svga3dsurface_cache *cache,
+ struct svga3dsurface_loc *loc)
+{
+ const struct svga3d_surface_desc *desc = cache->desc;
+ u32 mip = loc->sub_resource % cache->num_mip_levels;
+ const struct drm_vmw_size *size = &cache->mip[mip].size;
+
+ loc->sub_resource++;
+ loc->x += desc->block_size.width;
+ if (loc->x > size->width)
+ loc->x = size->width;
+ loc->y += desc->block_size.height;
+ if (loc->y > size->height)
+ loc->y = size->height;
+ loc->z += desc->block_size.depth;
+ if (loc->z > size->depth)
+ loc->z = size->depth;
+}
+
+/**
+ * svga3dsurface_min_loc - The start location in a subresource
+ * @cache: Surface layout data.
+ * @sub_resource: The subresource.
+ * @loc: Pointer to a struct svga3dsurface_loc to be filled in.
+ */
+static inline void
+svga3dsurface_min_loc(const struct svga3dsurface_cache *cache,
+ u32 sub_resource,
+ struct svga3dsurface_loc *loc)
+{
+ loc->sub_resource = sub_resource;
+ loc->x = loc->y = loc->z = 0;
+}
+
+/**
+ * svga3dsurface_min_loc - The end location in a subresource
+ * @cache: Surface layout data.
+ * @sub_resource: The subresource.
+ * @loc: Pointer to a struct svga3dsurface_loc to be filled in.
+ *
+ * Following the end definition given in svga3dsurface_inc_loc(),
+ * Compute the end location of a surface subresource.
+ */
+static inline void
+svga3dsurface_max_loc(const struct svga3dsurface_cache *cache,
+ u32 sub_resource,
+ struct svga3dsurface_loc *loc)
+{
+ const struct drm_vmw_size *size;
+ u32 mip;
+
+ loc->sub_resource = sub_resource + 1;
+ mip = sub_resource % cache->num_mip_levels;
+ size = &cache->mip[mip].size;
+ loc->x = size->width;
+ loc->y = size->height;
+ loc->z = size->depth;
+}
+
#endif /* _SVGA3D_SURFACEDEFS_H_ */
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
index 74016a08d118..8b71bf6b58ef 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
@@ -462,6 +462,8 @@ void vmw_bo_bo_free(struct ttm_buffer_object *bo)
{
struct vmw_buffer_object *vmw_bo = vmw_buffer_object(bo);
+ WARN_ON(vmw_bo->dirty);
+ WARN_ON(!RB_EMPTY_ROOT(&vmw_bo->res_tree));
vmw_bo_unmap(vmw_bo);
kfree(vmw_bo);
}
@@ -475,8 +477,11 @@ void vmw_bo_bo_free(struct ttm_buffer_object *bo)
static void vmw_user_bo_destroy(struct ttm_buffer_object *bo)
{
struct vmw_user_buffer_object *vmw_user_bo = vmw_user_buffer_object(bo);
+ struct vmw_buffer_object *vbo = &vmw_user_bo->vbo;
- vmw_bo_unmap(&vmw_user_bo->vbo);
+ WARN_ON(vbo->dirty);
+ WARN_ON(!RB_EMPTY_ROOT(&vbo->res_tree));
+ vmw_bo_unmap(vbo);
ttm_prime_object_kfree(vmw_user_bo, prime);
}
@@ -511,8 +516,7 @@ int vmw_bo_init(struct vmw_private *dev_priv,
memset(vmw_bo, 0, sizeof(*vmw_bo));
BUILD_BUG_ON(TTM_MAX_BO_PRIORITY <= 3);
vmw_bo->base.priority = 3;
-
- INIT_LIST_HEAD(&vmw_bo->res_list);
+ vmw_bo->res_tree = RB_ROOT;
ret = ttm_bo_init(bdev, &vmw_bo->base, size,
ttm_bo_type_device, placement,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index b18842f73081..a31e726d6d71 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -56,9 +56,9 @@
#define VMWGFX_DRIVER_NAME "vmwgfx"
-#define VMWGFX_DRIVER_DATE "20180704"
+#define VMWGFX_DRIVER_DATE "20190328"
#define VMWGFX_DRIVER_MAJOR 2
-#define VMWGFX_DRIVER_MINOR 15
+#define VMWGFX_DRIVER_MINOR 16
#define VMWGFX_DRIVER_PATCHLEVEL 0
#define VMWGFX_FIFO_STATIC_SIZE (1024*1024)
#define VMWGFX_MAX_RELOCATIONS 2048
@@ -100,17 +100,18 @@ struct vmw_fpriv {
/**
* struct vmw_buffer_object - TTM buffer object with vmwgfx additions
* @base: The TTM buffer object
- * @res_list: List of resources using this buffer object as a backing MOB
+ * @res_tree: RB tree of resources using this buffer object as a backing MOB
* @pin_count: pin depth
* @cpu_writers: Number of synccpu write grabs. Protected by reservation when
* increased. May be decreased without reservation.
* @dx_query_ctx: DX context if this buffer object is used as a DX query MOB
* @map: Kmap object for semi-persistent mappings
* @res_prios: Eviction priority counts for attached resources
+ * @dirty: structure for user-space dirty-tracking
*/
struct vmw_buffer_object {
struct ttm_buffer_object base;
- struct list_head res_list;
+ struct rb_root res_tree;
s32 pin_count;
atomic_t cpu_writers;
/* Not ref-counted. Protected by binding_mutex */
@@ -118,6 +119,7 @@ struct vmw_buffer_object {
/* Protected by reservation */
struct ttm_bo_kmap_obj map;
u32 res_prios[TTM_MAX_BO_PRIORITY];
+ struct vmw_bo_dirty *dirty;
};
/**
@@ -148,7 +150,8 @@ struct vmw_res_func;
* @res_dirty: Resource contains data not yet in the backup buffer. Protected
* by resource reserved.
* @backup_dirty: Backup buffer contains data not yet in the HW resource.
- * Protecte by resource reserved.
+ * Protected by resource reserved.
+ * @coherent: Emulate coherency by tracking vm accesses.
* @backup: The backup buffer if any. Protected by resource reserved.
* @backup_offset: Offset into the backup buffer if any. Protected by resource
* reserved. Note that only a few resource types can have a @backup_offset
@@ -157,29 +160,32 @@ struct vmw_res_func;
* pin-count greater than zero. It is not on the resource LRU lists and its
* backup buffer is pinned. Hence it can't be evicted.
* @func: Method vtable for this resource. Immutable.
+ * @mob_node; Node for the MOB backup rbtree. Protected by @backup reserved.
* @lru_head: List head for the LRU list. Protected by @dev_priv::resource_lock.
- * @mob_head: List head for the MOB backup list. Protected by @backup reserved.
* @binding_head: List head for the context binding list. Protected by
* the @dev_priv::binding_mutex
* @res_free: The resource destructor.
* @hw_destroy: Callback to destroy the resource on the device, as part of
* resource destruction.
*/
+struct vmw_resource_dirty;
struct vmw_resource {
struct kref kref;
struct vmw_private *dev_priv;
int id;
u32 used_prio;
unsigned long backup_size;
- bool res_dirty;
- bool backup_dirty;
+ u32 res_dirty : 1;
+ u32 backup_dirty : 1;
+ u32 coherent : 1;
struct vmw_buffer_object *backup;
unsigned long backup_offset;
unsigned long pin_count;
const struct vmw_res_func *func;
+ struct rb_node mob_node;
struct list_head lru_head;
- struct list_head mob_head;
struct list_head binding_head;
+ struct vmw_resource_dirty *dirty;
void (*res_free) (struct vmw_resource *res);
void (*hw_destroy) (struct vmw_resource *res);
};
@@ -678,7 +684,8 @@ extern void vmw_resource_unreference(struct vmw_resource **p_res);
extern struct vmw_resource *vmw_resource_reference(struct vmw_resource *res);
extern struct vmw_resource *
vmw_resource_reference_unless_doomed(struct vmw_resource *res);
-extern int vmw_resource_validate(struct vmw_resource *res, bool intr);
+extern int vmw_resource_validate(struct vmw_resource *res, bool intr,
+ bool dirtying);
extern int vmw_resource_reserve(struct vmw_resource *res, bool interruptible,
bool no_backup);
extern bool vmw_resource_needs_backup(const struct vmw_resource *res);
@@ -720,6 +727,10 @@ extern void vmw_resource_evict_all(struct vmw_private *dev_priv);
extern void vmw_resource_unbind_list(struct vmw_buffer_object *vbo);
void vmw_resource_mob_attach(struct vmw_resource *res);
void vmw_resource_mob_detach(struct vmw_resource *res);
+void vmw_resource_dirty_update(struct vmw_resource *res, pgoff_t start,
+ pgoff_t end);
+int vmw_resources_clean(struct vmw_buffer_object *vbo, pgoff_t start,
+ pgoff_t end, pgoff_t *num_prefault);
/**
* vmw_resource_mob_attached - Whether a resource currently has a mob attached
@@ -729,7 +740,7 @@ void vmw_resource_mob_detach(struct vmw_resource *res);
*/
static inline bool vmw_resource_mob_attached(const struct vmw_resource *res)
{
- return !list_empty(&res->mob_head);
+ return !RB_EMPTY_NODE(&res->mob_node);
}
/**
@@ -1407,6 +1418,17 @@ int vmw_host_log(const char *log);
#define VMW_DEBUG_USER(fmt, ...) \
DRM_DEBUG_DRIVER(fmt, ##__VA_ARGS__)
+/* Resource dirtying - vmwgfx_page_dirty.c */
+void vmw_bo_dirty_scan(struct vmw_buffer_object *vbo);
+int vmw_bo_dirty_add(struct vmw_buffer_object *vbo);
+void vmw_bo_dirty_transfer_to_res(struct vmw_resource *res);
+void vmw_bo_dirty_clear_res(struct vmw_resource *res);
+void vmw_bo_dirty_release(struct vmw_buffer_object *vbo);
+void vmw_bo_dirty_unmap(struct vmw_buffer_object *vbo,
+ pgoff_t start, pgoff_t end);
+vm_fault_t vmw_bo_vm_fault(struct vm_fault *vmf);
+vm_fault_t vmw_bo_vm_mkwrite(struct vm_fault *vmf);
+
/**
* VMW_DEBUG_KMS - Debug output for kernel mode-setting
*
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index ff86d49dc5e8..934ad7c0c342 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -2560,7 +2560,6 @@ static int vmw_cmd_dx_check_subresource(struct vmw_private *dev_priv,
offsetof(typeof(*cmd), sid));
cmd = container_of(header, typeof(*cmd), header);
-
return vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface,
VMW_RES_DIRTY_NONE, user_surface_converter,
&cmd->sid, NULL);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
new file mode 100644
index 000000000000..f07aa857587c
--- /dev/null
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
@@ -0,0 +1,488 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/**************************************************************************
+ *
+ * Copyright 2019 VMware, Inc., Palo Alto, CA., USA
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include "vmwgfx_drv.h"
+
+/*
+ * Different methods for tracking dirty:
+ * VMW_BO_DIRTY_PAGETABLE - Scan the pagetable for hardware dirty bits
+ * VMW_BO_DIRTY_MKWRITE - Write-protect page table entries and record write-
+ * accesses in the VM mkwrite() callback
+ */
+enum vmw_bo_dirty_method {
+ VMW_BO_DIRTY_PAGETABLE,
+ VMW_BO_DIRTY_MKWRITE,
+};
+
+/*
+ * No dirtied pages at scan trigger a transition to the _MKWRITE method,
+ * similarly a certain percentage of dirty pages trigger a transition to
+ * the _PAGETABLE method. How many triggers should we wait for before
+ * changing method?
+ */
+#define VMW_DIRTY_NUM_CHANGE_TRIGGERS 2
+
+/* Percentage to trigger a transition to the _PAGETABLE method */
+#define VMW_DIRTY_PERCENTAGE 10
+
+/**
+ * struct vmw_bo_dirty - Dirty information for buffer objects
+ * @start: First currently dirty bit
+ * @end: Last currently dirty bit + 1
+ * @method: The currently used dirty method
+ * @change_count: Number of consecutive method change triggers
+ * @ref_count: Reference count for this structure
+ * @bitmap_size: The size of the bitmap in bits. Typically equal to the
+ * nuber of pages in the bo.
+ * @size: The accounting size for this struct.
+ * @bitmap: A bitmap where each bit represents a page. A set bit means a
+ * dirty page.
+ */
+struct vmw_bo_dirty {
+ unsigned long start;
+ unsigned long end;
+ enum vmw_bo_dirty_method method;
+ unsigned int change_count;
+ unsigned int ref_count;
+ unsigned long bitmap_size;
+ size_t size;
+ unsigned long bitmap[0];
+};
+
+/**
+ * vmw_bo_dirty_scan_pagetable - Perform a pagetable scan for dirty bits
+ * @vbo: The buffer object to scan
+ *
+ * Scans the pagetable for dirty bits. Clear those bits and modify the
+ * dirty structure with the results. This function may change the
+ * dirty-tracking method.
+ */
+static void vmw_bo_dirty_scan_pagetable(struct vmw_buffer_object *vbo)
+{
+ struct vmw_bo_dirty *dirty = vbo->dirty;
+ pgoff_t offset = drm_vma_node_start(&vbo->base.base.vma_node);
+ struct address_space *mapping = vbo->base.bdev->dev_mapping;
+ pgoff_t num_marked;
+
+ num_marked = clean_record_shared_mapping_range
+ (mapping,
+ offset, dirty->bitmap_size,
+ offset, &dirty->bitmap[0],
+ &dirty->start, &dirty->end);
+ if (num_marked == 0)
+ dirty->change_count++;
+ else
+ dirty->change_count = 0;
+
+ if (dirty->change_count > VMW_DIRTY_NUM_CHANGE_TRIGGERS) {
+ dirty->change_count = 0;
+ dirty->method = VMW_BO_DIRTY_MKWRITE;
+ wp_shared_mapping_range(mapping,
+ offset, dirty->bitmap_size);
+ clean_record_shared_mapping_range(mapping,
+ offset, dirty->bitmap_size,
+ offset, &dirty->bitmap[0],
+ &dirty->start, &dirty->end);
+ }
+}
+
+/**
+ * vmw_bo_dirty_scan_mkwrite - Reset the mkwrite dirty-tracking method
+ * @vbo: The buffer object to scan
+ *
+ * Write-protect pages written to so that consecutive write accesses will
+ * trigger a call to mkwrite.
+ *
+ * This function may change the dirty-tracking method.
+ */
+static void vmw_bo_dirty_scan_mkwrite(struct vmw_buffer_object *vbo)
+{
+ struct vmw_bo_dirty *dirty = vbo->dirty;
+ unsigned long offset = drm_vma_node_start(&vbo->base.base.vma_node);
+ struct address_space *mapping = vbo->base.bdev->dev_mapping;
+ pgoff_t num_marked;
+
+ if (dirty->end <= dirty->start)
+ return;
+
+ num_marked = wp_shared_mapping_range(vbo->base.bdev->dev_mapping,
+ dirty->start + offset,
+ dirty->end - dirty->start);
+
+ if (100UL * num_marked / dirty->bitmap_size >
+ VMW_DIRTY_PERCENTAGE) {
+ dirty->change_count++;
+ } else {
+ dirty->change_count = 0;
+ }
+
+ if (dirty->change_count > VMW_DIRTY_NUM_CHANGE_TRIGGERS) {
+ pgoff_t start = 0;
+ pgoff_t end = dirty->bitmap_size;
+
+ dirty->method = VMW_BO_DIRTY_PAGETABLE;
+ clean_record_shared_mapping_range(mapping, offset, end, offset,
+ &dirty->bitmap[0],
+ &start, &end);
+ bitmap_clear(&dirty->bitmap[0], 0, dirty->bitmap_size);
+ if (dirty->start < dirty->end)
+ bitmap_set(&dirty->bitmap[0], dirty->start,
+ dirty->end - dirty->start);
+ dirty->change_count = 0;
+ }
+}
+
+/**
+ * vmw_bo_dirty_scan - Scan for dirty pages and add them to the dirty
+ * tracking structure
+ * @vbo: The buffer object to scan
+ *
+ * This function may change the dirty tracking method.
+ */
+void vmw_bo_dirty_scan(struct vmw_buffer_object *vbo)
+{
+ struct vmw_bo_dirty *dirty = vbo->dirty;
+
+ if (dirty->method == VMW_BO_DIRTY_PAGETABLE)
+ vmw_bo_dirty_scan_pagetable(vbo);
+ else
+ vmw_bo_dirty_scan_mkwrite(vbo);
+}
+
+/**
+ * vmw_bo_dirty_pre_unmap - write-protect and pick up dirty pages before
+ * an unmap_mapping_range operation.
+ * @vbo: The buffer object,
+ * @start: First page of the range within the buffer object.
+ * @end: Last page of the range within the buffer object + 1.
+ *
+ * If we're using the _PAGETABLE scan method, we may leak dirty pages
+ * when calling unmap_mapping_range(). This function makes sure we pick
+ * up all dirty pages.
+ */
+static void vmw_bo_dirty_pre_unmap(struct vmw_buffer_object *vbo,
+ pgoff_t start, pgoff_t end)
+{
+ struct vmw_bo_dirty *dirty = vbo->dirty;
+ unsigned long offset = drm_vma_node_start(&vbo->base.base.vma_node);
+ struct address_space *mapping = vbo->base.bdev->dev_mapping;
+
+ if (dirty->method != VMW_BO_DIRTY_PAGETABLE || start >= end)
+ return;
+
+ wp_shared_mapping_range(mapping, start + offset, end - start);
+ clean_record_shared_mapping_range(mapping, start + offset,
+ end - start, offset,
+ &dirty->bitmap[0], &dirty->start,
+ &dirty->end);
+}
+
+/**
+ * vmw_bo_dirty_unmap - Clear all ptes pointing to a range within a bo
+ * @vbo: The buffer object,
+ * @start: First page of the range within the buffer object.
+ * @end: Last page of the range within the buffer object + 1.
+ *
+ * This is similar to ttm_bo_unmap_virtual_locked() except it takes a subrange.
+ */
+void vmw_bo_dirty_unmap(struct vmw_buffer_object *vbo,
+ pgoff_t start, pgoff_t end)
+{
+ unsigned long offset = drm_vma_node_start(&vbo->base.base.vma_node);
+ struct address_space *mapping = vbo->base.bdev->dev_mapping;
+
+ vmw_bo_dirty_pre_unmap(vbo, start, end);
+ unmap_shared_mapping_range(mapping, (offset + start) << PAGE_SHIFT,
+ (loff_t) (end - start) << PAGE_SHIFT);
+}
+
+/**
+ * vmw_bo_dirty_add - Add a dirty-tracking user to a buffer object
+ * @vbo: The buffer object
+ *
+ * This function registers a dirty-tracking user to a buffer object.
+ * A user can be for example a resource or a vma in a special user-space
+ * mapping.
+ *
+ * Return: Zero on success, -ENOMEM on memory allocation failure.
+ */
+int vmw_bo_dirty_add(struct vmw_buffer_object *vbo)
+{
+ struct vmw_bo_dirty *dirty = vbo->dirty;
+ pgoff_t num_pages = vbo->base.num_pages;
+ size_t size, acc_size;
+ int ret;
+ static struct ttm_operation_ctx ctx = {
+ .interruptible = false,
+ .no_wait_gpu = false
+ };
+
+ if (dirty) {
+ dirty->ref_count++;
+ return 0;
+ }
+
+ size = sizeof(*dirty) + BITS_TO_LONGS(num_pages) * sizeof(long);
+ acc_size = ttm_round_pot(size);
+ ret = ttm_mem_global_alloc(&ttm_mem_glob, acc_size, &ctx);
+ if (ret) {
+ VMW_DEBUG_USER("Out of graphics memory for buffer object "
+ "dirty tracker.\n");
+ return ret;
+ }
+ dirty = kvzalloc(size, GFP_KERNEL);
+ if (!dirty) {
+ ret = -ENOMEM;
+ goto out_no_dirty;
+ }
+
+ dirty->size = acc_size;
+ dirty->bitmap_size = num_pages;
+ dirty->start = dirty->bitmap_size;
+ dirty->end = 0;
+ dirty->ref_count = 1;
+ if (num_pages < PAGE_SIZE / sizeof(pte_t)) {
+ dirty->method = VMW_BO_DIRTY_PAGETABLE;
+ } else {
+ struct address_space *mapping = vbo->base.bdev->dev_mapping;
+ pgoff_t offset = drm_vma_node_start(&vbo->base.base.vma_node);
+
+ dirty->method = VMW_BO_DIRTY_MKWRITE;
+
+ /* Write-protect and then pick up already dirty bits */
+ wp_shared_mapping_range(mapping, offset, num_pages);
+ clean_record_shared_mapping_range(mapping, offset, num_pages,
+ offset,
+ &dirty->bitmap[0],
+ &dirty->start, &dirty->end);
+ }
+
+ vbo->dirty = dirty;
+
+ return 0;
+
+out_no_dirty:
+ ttm_mem_global_free(&ttm_mem_glob, acc_size);
+ return ret;
+}
+
+/**
+ * vmw_bo_dirty_release - Release a dirty-tracking user from a buffer object
+ * @vbo: The buffer object
+ *
+ * This function releases a dirty-tracking user from a buffer object.
+ * If the reference count reaches zero, then the dirty-tracking object is
+ * freed and the pointer to it cleared.
+ *
+ * Return: Zero on success, -ENOMEM on memory allocation failure.
+ */
+void vmw_bo_dirty_release(struct vmw_buffer_object *vbo)
+{
+ struct vmw_bo_dirty *dirty = vbo->dirty;
+
+ if (dirty && --dirty->ref_count == 0) {
+ size_t acc_size = dirty->size;
+
+ kvfree(dirty);
+ ttm_mem_global_free(&ttm_mem_glob, acc_size);
+ vbo->dirty = NULL;
+ }
+}
+
+/**
+ * vmw_bo_dirty_transfer_to_res - Pick up a resource's dirty region from
+ * its backing mob.
+ * @res: The resource
+ *
+ * This function will pick up all dirty ranges affecting the resource from
+ * it's backup mob, and call vmw_resource_dirty_update() once for each
+ * range. The transferred ranges will be cleared from the backing mob's
+ * dirty tracking.
+ */
+void vmw_bo_dirty_transfer_to_res(struct vmw_resource *res)
+{
+ struct vmw_buffer_object *vbo = res->backup;
+ struct vmw_bo_dirty *dirty = vbo->dirty;
+ pgoff_t start, cur, end;
+ unsigned long res_start = res->backup_offset;
+ unsigned long res_end = res->backup_offset + res->backup_size;
+
+ WARN_ON_ONCE(res_start & ~PAGE_MASK);
+ res_start >>= PAGE_SHIFT;
+ res_end = DIV_ROUND_UP(res_end, PAGE_SIZE);
+
+ if (res_start >= dirty->end || res_end <= dirty->start)
+ return;
+
+ cur = max(res_start, dirty->start);
+ res_end = max(res_end, dirty->end);
+ while (cur < res_end) {
+ unsigned long num;
+
+ start = find_next_bit(&dirty->bitmap[0], res_end, cur);
+ if (start >= res_end)
+ break;
+
+ end = find_next_zero_bit(&dirty->bitmap[0], res_end, start + 1);
+ cur = end + 1;
+ num = end - start;
+ bitmap_clear(&dirty->bitmap[0], start, num);
+ vmw_resource_dirty_update(res, start, end);
+ }
+
+ if (res_start <= dirty->start && res_end > dirty->start)
+ dirty->start = res_end;
+ if (res_start < dirty->end && res_end >= dirty->end)
+ dirty->end = res_start;
+}
+
+/**
+ * vmw_bo_dirty_clear_res - Clear a resource's dirty region from
+ * its backing mob.
+ * @res: The resource
+ *
+ * This function will clear all dirty ranges affecting the resource from
+ * it's backup mob's dirty tracking.
+ */
+void vmw_bo_dirty_clear_res(struct vmw_resource *res)
+{
+ unsigned long res_start = res->backup_offset;
+ unsigned long res_end = res->backup_offset + res->backup_size;
+ struct vmw_buffer_object *vbo = res->backup;
+ struct vmw_bo_dirty *dirty = vbo->dirty;
+
+ res_start >>= PAGE_SHIFT;
+ res_end = DIV_ROUND_UP(res_end, PAGE_SIZE);
+
+ if (res_start >= dirty->end || res_end <= dirty->start)
+ return;
+
+ res_start = max(res_start, dirty->start);
+ res_end = min(res_end, dirty->end);
+ bitmap_clear(&dirty->bitmap[0], res_start, res_end - res_start);
+
+ if (res_start <= dirty->start && res_end > dirty->start)
+ dirty->start = res_end;
+ if (res_start < dirty->end && res_end >= dirty->end)
+ dirty->end = res_start;
+}
+
+vm_fault_t vmw_bo_vm_mkwrite(struct vm_fault *vmf)
+{
+ struct vm_area_struct *vma = vmf->vma;
+ struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
+ vma->vm_private_data;
+ vm_fault_t ret;
+ unsigned long page_offset;
+ unsigned int save_flags;
+ struct vmw_buffer_object *vbo =
+ container_of(bo, typeof(*vbo), base);
+
+ /*
+ * mkwrite() doesn't handle the VM_FAULT_RETRY return value correctly.
+ * So make sure the TTM helpers are aware.
+ */
+ save_flags = vmf->flags;
+ vmf->flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ ret = ttm_bo_vm_reserve(bo, vmf);
+ vmf->flags = save_flags;
+ if (ret)
+ return ret;
+
+ page_offset = vmf->pgoff - drm_vma_node_start(&bo->base.vma_node);
+ if (unlikely(page_offset >= bo->num_pages)) {
+ ret = VM_FAULT_SIGBUS;
+ goto out_unlock;
+ }
+
+ if (vbo->dirty && vbo->dirty->method == VMW_BO_DIRTY_MKWRITE &&
+ !test_bit(page_offset, &vbo->dirty->bitmap[0])) {
+ struct vmw_bo_dirty *dirty = vbo->dirty;
+
+ __set_bit(page_offset, &dirty->bitmap[0]);
+ dirty->start = min(dirty->start, page_offset);
+ dirty->end = max(dirty->end, page_offset + 1);
+ }
+
+out_unlock:
+ dma_resv_unlock(bo->base.resv);
+ return ret;
+}
+
+vm_fault_t vmw_bo_vm_fault(struct vm_fault *vmf)
+{
+ struct vm_area_struct *vma = vmf->vma;
+ struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
+ vma->vm_private_data;
+ struct vmw_buffer_object *vbo =
+ container_of(bo, struct vmw_buffer_object, base);
+ pgoff_t num_prefault;
+ pgprot_t prot;
+ vm_fault_t ret;
+
+ ret = ttm_bo_vm_reserve(bo, vmf);
+ if (ret)
+ return ret;
+
+ num_prefault = (vma->vm_flags & VM_RAND_READ) ? 1 :
+ TTM_BO_VM_NUM_PREFAULT;
+
+ if (vbo->dirty) {
+ pgoff_t allowed_prefault;
+ unsigned long page_offset;
+
+ page_offset = vmf->pgoff -
+ drm_vma_node_start(&bo->base.vma_node);
+ if (page_offset >= bo->num_pages ||
+ vmw_resources_clean(vbo, page_offset,
+ page_offset + PAGE_SIZE,
+ &allowed_prefault)) {
+ ret = VM_FAULT_SIGBUS;
+ goto out_unlock;
+ }
+
+ num_prefault = min(num_prefault, allowed_prefault);
+ }
+
+ /*
+ * If we don't track dirty using the MKWRITE method, make sure
+ * sure the page protection is write-enabled so we don't get
+ * a lot of unnecessary write faults.
+ */
+ if (vbo->dirty && vbo->dirty->method == VMW_BO_DIRTY_MKWRITE)
+ prot = vma->vm_page_prot;
+ else
+ prot = vm_get_page_prot(vma->vm_flags);
+
+ ret = ttm_bo_vm_fault_reserved(vmf, prot, num_prefault);
+ if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
+ return ret;
+
+out_unlock:
+ dma_resv_unlock(bo->base.resv);
+
+ return ret;
+}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
index 6dfe36fb817c..c8441030637a 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
@@ -40,11 +40,24 @@
void vmw_resource_mob_attach(struct vmw_resource *res)
{
struct vmw_buffer_object *backup = res->backup;
+ struct rb_node **new = &backup->res_tree.rb_node, *parent = NULL;
dma_resv_assert_held(res->backup->base.base.resv);
res->used_prio = (res->res_dirty) ? res->func->dirty_prio :
res->func->prio;
- list_add_tail(&res->mob_head, &backup->res_list);
+
+ while (*new) {
+ struct vmw_resource *this =
+ container_of(*new, struct vmw_resource, mob_node);
+
+ parent = *new;
+ new = (res->backup_offset < this->backup_offset) ?
+ &((*new)->rb_left) : &((*new)->rb_right);
+ }
+
+ rb_link_node(&res->mob_node, parent, new);
+ rb_insert_color(&res->mob_node, &backup->res_tree);
+
vmw_bo_prio_add(backup, res->used_prio);
}
@@ -58,7 +71,8 @@ void vmw_resource_mob_detach(struct vmw_resource *res)
dma_resv_assert_held(backup->base.base.resv);
if (vmw_resource_mob_attached(res)) {
- list_del_init(&res->mob_head);
+ rb_erase(&res->mob_node, &backup->res_tree);
+ RB_CLEAR_NODE(&res->mob_node);
vmw_bo_prio_del(backup, res->used_prio);
}
}
@@ -119,6 +133,10 @@ static void vmw_resource_release(struct kref *kref)
}
res->backup_dirty = false;
vmw_resource_mob_detach(res);
+ if (res->dirty)
+ res->func->dirty_free(res);
+ if (res->coherent)
+ vmw_bo_dirty_release(res->backup);
ttm_bo_unreserve(bo);
vmw_bo_unreference(&res->backup);
}
@@ -200,15 +218,17 @@ int vmw_resource_init(struct vmw_private *dev_priv, struct vmw_resource *res,
res->res_free = res_free;
res->dev_priv = dev_priv;
res->func = func;
+ RB_CLEAR_NODE(&res->mob_node);
INIT_LIST_HEAD(&res->lru_head);
- INIT_LIST_HEAD(&res->mob_head);
INIT_LIST_HEAD(&res->binding_head);
res->id = -1;
res->backup = NULL;
res->backup_offset = 0;
res->backup_dirty = false;
res->res_dirty = false;
+ res->coherent = false;
res->used_prio = 3;
+ res->dirty = NULL;
if (delay_id)
return 0;
else
@@ -373,7 +393,8 @@ out_no_bo:
* should be retried once resources have been freed up.
*/
static int vmw_resource_do_validate(struct vmw_resource *res,
- struct ttm_validate_buffer *val_buf)
+ struct ttm_validate_buffer *val_buf,
+ bool dirtying)
{
int ret = 0;
const struct vmw_res_func *func = res->func;
@@ -395,6 +416,39 @@ static int vmw_resource_do_validate(struct vmw_resource *res,
vmw_resource_mob_attach(res);
}
+ /*
+ * Handle the case where the backup mob is marked coherent but
+ * the resource isn't.
+ */
+ if (func->dirty_alloc && vmw_resource_mob_attached(res) &&
+ !res->coherent) {
+ if (res->backup->dirty && !res->dirty) {
+ ret = func->dirty_alloc(res);
+ if (ret)
+ return ret;
+ } else if (!res->backup->dirty && res->dirty) {
+ func->dirty_free(res);
+ }
+ }
+
+ /*
+ * Transfer the dirty regions to the resource and update
+ * the resource.
+ */
+ if (res->dirty) {
+ if (dirtying && !res->res_dirty) {
+ pgoff_t start = res->backup_offset >> PAGE_SHIFT;
+ pgoff_t end = __KERNEL_DIV_ROUND_UP
+ (res->backup_offset + res->backup_size,
+ PAGE_SIZE);
+
+ vmw_bo_dirty_unmap(res->backup, start, end);
+ }
+
+ vmw_bo_dirty_transfer_to_res(res);
+ return func->dirty_sync(res);
+ }
+
return 0;
out_bind_failed:
@@ -433,16 +487,28 @@ void vmw_resource_unreserve(struct vmw_resource *res,
if (switch_backup && new_backup != res->backup) {
if (res->backup) {
vmw_resource_mob_detach(res);
+ if (res->coherent)
+ vmw_bo_dirty_release(res->backup);
vmw_bo_unreference(&res->backup);
}
if (new_backup) {
res->backup = vmw_bo_reference(new_backup);
+
+ /*
+ * The validation code should already have added a
+ * dirty tracker here.
+ */
+ WARN_ON(res->coherent && !new_backup->dirty);
+
vmw_resource_mob_attach(res);
} else {
res->backup = NULL;
}
+ } else if (switch_backup && res->coherent) {
+ vmw_bo_dirty_release(res->backup);
}
+
if (switch_backup)
res->backup_offset = new_backup_offset;
@@ -622,6 +688,7 @@ out_no_unbind:
* to the device.
* @res: The resource to make visible to the device.
* @intr: Perform waits interruptible if possible.
+ * @dirtying: Pending GPU operation will dirty the resource
*
* On succesful return, any backup DMA buffer pointed to by @res->backup will
* be reserved and validated.
@@ -631,7 +698,8 @@ out_no_unbind:
* Return: Zero on success, -ERESTARTSYS if interrupted, negative error code
* on failure.
*/
-int vmw_resource_validate(struct vmw_resource *res, bool intr)
+int vmw_resource_validate(struct vmw_resource *res, bool intr,
+ bool dirtying)
{
int ret;
struct vmw_resource *evict_res;
@@ -648,7 +716,7 @@ int vmw_resource_validate(struct vmw_resource *res, bool intr)
if (res->backup)
val_buf.bo = &res->backup->base;
do {
- ret = vmw_resource_do_validate(res, &val_buf);
+ ret = vmw_resource_do_validate(res, &val_buf, dirtying);
if (likely(ret != -EBUSY))
break;
@@ -711,19 +779,20 @@ out_no_validate:
*/
void vmw_resource_unbind_list(struct vmw_buffer_object *vbo)
{
-
- struct vmw_resource *res, *next;
struct ttm_validate_buffer val_buf = {
.bo = &vbo->base,
.num_shared = 0
};
dma_resv_assert_held(vbo->base.base.resv);
- list_for_each_entry_safe(res, next, &vbo->res_list, mob_head) {
- if (!res->func->unbind)
- continue;
+ while (!RB_EMPTY_ROOT(&vbo->res_tree)) {
+ struct rb_node *node = vbo->res_tree.rb_node;
+ struct vmw_resource *res =
+ container_of(node, struct vmw_resource, mob_node);
+
+ if (!WARN_ON_ONCE(!res->func->unbind))
+ (void) res->func->unbind(res, res->res_dirty, &val_buf);
- (void) res->func->unbind(res, res->res_dirty, &val_buf);
res->backup_dirty = true;
res->res_dirty = false;
vmw_resource_mob_detach(res);
@@ -947,7 +1016,7 @@ int vmw_resource_pin(struct vmw_resource *res, bool interruptible)
/* Do we really need to pin the MOB as well? */
vmw_bo_pin_reserved(vbo, true);
}
- ret = vmw_resource_validate(res, interruptible);
+ ret = vmw_resource_validate(res, interruptible, true);
if (vbo)
ttm_bo_unreserve(&vbo->base);
if (ret)
@@ -1007,3 +1076,101 @@ enum vmw_res_type vmw_res_type(const struct vmw_resource *res)
{
return res->func->res_type;
}
+
+/**
+ * vmw_resource_update_dirty - Update a resource's dirty tracker with a
+ * sequential range of touched backing store memory.
+ * @res: The resource.
+ * @start: The first page touched.
+ * @end: The last page touched + 1.
+ */
+void vmw_resource_dirty_update(struct vmw_resource *res, pgoff_t start,
+ pgoff_t end)
+{
+ if (res->dirty)
+ res->func->dirty_range_add(res, start << PAGE_SHIFT,
+ end << PAGE_SHIFT);
+}
+
+/**
+ * vmw_resources_clean - Clean resources intersecting a mob range
+ * @vbo: The mob buffer object
+ * @start: The mob page offset starting the range
+ * @end: The mob page offset ending the range
+ * @num_prefault: Returns how many pages including the first have been
+ * cleaned and are ok to prefault
+ */
+int vmw_resources_clean(struct vmw_buffer_object *vbo, pgoff_t start,
+ pgoff_t end, pgoff_t *num_prefault)
+{
+ struct rb_node *cur = vbo->res_tree.rb_node;
+ struct vmw_resource *found = NULL;
+ unsigned long res_start = start << PAGE_SHIFT;
+ unsigned long res_end = end << PAGE_SHIFT;
+ unsigned long last_cleaned = 0;
+
+ /*
+ * Find the resource with lowest backup_offset that intersects the
+ * range.
+ */
+ while (cur) {
+ struct vmw_resource *cur_res =
+ container_of(cur, struct vmw_resource, mob_node);
+
+ if (cur_res->backup_offset >= res_end) {
+ cur = cur->rb_left;
+ } else if (cur_res->backup_offset + cur_res->backup_size <=
+ res_start) {
+ cur = cur->rb_right;
+ } else {
+ found = cur_res;
+ cur = cur->rb_left;
+ /* Continue to look for resources with lower offsets */
+ }
+ }
+
+ /*
+ * In order of increasing backup_offset, clean dirty resorces
+ * intersecting the range.
+ */
+ while (found) {
+ if (found->res_dirty) {
+ int ret;
+
+ if (!found->func->clean)
+ return -EINVAL;
+
+ ret = found->func->clean(found);
+ if (ret)
+ return ret;
+
+ found->res_dirty = false;
+ }
+ last_cleaned = found->backup_offset + found->backup_size;
+ cur = rb_next(&found->mob_node);
+ if (!cur)
+ break;
+
+ found = container_of(cur, struct vmw_resource, mob_node);
+ if (found->backup_offset >= res_end)
+ break;
+ }
+
+ /*
+ * Set number of pages allowed prefaulting and fence the buffer object
+ */
+ *num_prefault = 1;
+ if (last_cleaned > res_start) {
+ struct ttm_buffer_object *bo = &vbo->base;
+
+ *num_prefault = __KERNEL_DIV_ROUND_UP(last_cleaned - res_start,
+ PAGE_SIZE);
+ vmw_bo_fence_single(bo, NULL);
+ if (bo->moving)
+ dma_fence_put(bo->moving);
+ bo->moving = dma_fence_get
+ (dma_resv_get_excl(bo->base.resv));
+ }
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource_priv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_resource_priv.h
index 984e588c62ca..3b7438b2d289 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource_priv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource_priv.h
@@ -71,6 +71,13 @@ struct vmw_user_resource_conv {
* @commit_notify: If the resource is a command buffer managed resource,
* callback to notify that a define or remove command
* has been committed to the device.
+ * @dirty_alloc: Allocate a dirty tracker. NULL if dirty-tracking is not
+ * supported.
+ * @dirty_free: Free the dirty tracker.
+ * @dirty_sync: Upload the dirty mob contents to the resource.
+ * @dirty_add_range: Add a sequential dirty range to the resource
+ * dirty tracker.
+ * @clean: Clean the resource.
*/
struct vmw_res_func {
enum vmw_res_type res_type;
@@ -90,6 +97,12 @@ struct vmw_res_func {
struct ttm_validate_buffer *val_buf);
void (*commit_notify)(struct vmw_resource *res,
enum vmw_cmdbuf_res_state state);
+ int (*dirty_alloc)(struct vmw_resource *res);
+ void (*dirty_free)(struct vmw_resource *res);
+ int (*dirty_sync)(struct vmw_resource *res);
+ void (*dirty_range_add)(struct vmw_resource *res, size_t start,
+ size_t end);
+ int (*clean)(struct vmw_resource *res);
};
/**
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
index de0530b4dc1b..32b9131b2bae 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
@@ -68,6 +68,20 @@ struct vmw_surface_offset {
uint32_t bo_offset;
};
+/**
+ * vmw_surface_dirty - Surface dirty-tracker
+ * @cache: Cached layout information of the surface.
+ * @size: Accounting size for the struct vmw_surface_dirty.
+ * @num_subres: Number of subresources.
+ * @boxes: Array of SVGA3dBoxes indicating dirty regions. One per subresource.
+ */
+struct vmw_surface_dirty {
+ struct svga3dsurface_cache cache;
+ size_t size;
+ u32 num_subres;
+ SVGA3dBox boxes[0];
+};
+
static void vmw_user_surface_free(struct vmw_resource *res);
static struct vmw_resource *
vmw_user_surface_base_to_res(struct ttm_base_object *base);
@@ -96,6 +110,13 @@ vmw_gb_surface_reference_internal(struct drm_device *dev,
struct drm_vmw_gb_surface_ref_ext_rep *rep,
struct drm_file *file_priv);
+static void vmw_surface_dirty_free(struct vmw_resource *res);
+static int vmw_surface_dirty_alloc(struct vmw_resource *res);
+static int vmw_surface_dirty_sync(struct vmw_resource *res);
+static void vmw_surface_dirty_range_add(struct vmw_resource *res, size_t start,
+ size_t end);
+static int vmw_surface_clean(struct vmw_resource *res);
+
static const struct vmw_user_resource_conv user_surface_conv = {
.object_type = VMW_RES_SURFACE,
.base_obj_to_res = vmw_user_surface_base_to_res,
@@ -133,7 +154,12 @@ static const struct vmw_res_func vmw_gb_surface_func = {
.create = vmw_gb_surface_create,
.destroy = vmw_gb_surface_destroy,
.bind = vmw_gb_surface_bind,
- .unbind = vmw_gb_surface_unbind
+ .unbind = vmw_gb_surface_unbind,
+ .dirty_alloc = vmw_surface_dirty_alloc,
+ .dirty_free = vmw_surface_dirty_free,
+ .dirty_sync = vmw_surface_dirty_sync,
+ .dirty_range_add = vmw_surface_dirty_range_add,
+ .clean = vmw_surface_clean,
};
/**
@@ -639,6 +665,7 @@ static void vmw_user_surface_free(struct vmw_resource *res)
struct vmw_private *dev_priv = srf->res.dev_priv;
uint32_t size = user_srf->size;
+ WARN_ON_ONCE(res->dirty);
if (user_srf->master)
drm_master_put(&user_srf->master);
kfree(srf->offsets);
@@ -1166,10 +1193,16 @@ static int vmw_gb_surface_bind(struct vmw_resource *res,
cmd2->header.id = SVGA_3D_CMD_UPDATE_GB_SURFACE;
cmd2->header.size = sizeof(cmd2->body);
cmd2->body.sid = res->id;
- res->backup_dirty = false;
}
vmw_fifo_commit(dev_priv, submit_size);
+ if (res->backup->dirty && res->backup_dirty) {
+ /* We've just made a full upload. Cear dirty regions. */
+ vmw_bo_dirty_clear_res(res);
+ }
+
+ res->backup_dirty = false;
+
return 0;
}
@@ -1634,7 +1667,8 @@ vmw_gb_surface_define_internal(struct drm_device *dev,
}
}
} else if (req->base.drm_surface_flags &
- drm_vmw_surface_flag_create_buffer)
+ (drm_vmw_surface_flag_create_buffer |
+ drm_vmw_surface_flag_coherent))
ret = vmw_user_bo_alloc(dev_priv, tfile,
res->backup_size,
req->base.drm_surface_flags &
@@ -1648,6 +1682,26 @@ vmw_gb_surface_define_internal(struct drm_device *dev,
goto out_unlock;
}
+ if (req->base.drm_surface_flags & drm_vmw_surface_flag_coherent) {
+ struct vmw_buffer_object *backup = res->backup;
+
+ ttm_bo_reserve(&backup->base, false, false, NULL);
+ if (!res->func->dirty_alloc)
+ ret = -EINVAL;
+ if (!ret)
+ ret = vmw_bo_dirty_add(backup);
+ if (!ret) {
+ res->coherent = true;
+ ret = res->func->dirty_alloc(res);
+ }
+ ttm_bo_unreserve(&backup->base);
+ if (ret) {
+ vmw_resource_unreference(&res);
+ goto out_unlock;
+ }
+
+ }
+
tmp = vmw_resource_reference(res);
ret = ttm_prime_object_init(tfile, res->backup_size, &user_srf->prime,
req->base.drm_surface_flags &
@@ -1756,3 +1810,338 @@ out_bad_resource:
return ret;
}
+
+/**
+ * vmw_subres_dirty_add - Add a dirty region to a subresource
+ * @dirty: The surfaces's dirty tracker.
+ * @loc_start: The location corresponding to the start of the region.
+ * @loc_end: The location corresponding to the end of the region.
+ *
+ * As we are assuming that @loc_start and @loc_end represent a sequential
+ * range of backing store memory, if the region spans multiple lines then
+ * regardless of the x coordinate, the full lines are dirtied.
+ * Correspondingly if the region spans multiple z slices, then full rather
+ * than partial z slices are dirtied.
+ */
+static void vmw_subres_dirty_add(struct vmw_surface_dirty *dirty,
+ const struct svga3dsurface_loc *loc_start,
+ const struct svga3dsurface_loc *loc_end)
+{
+ const struct svga3dsurface_cache *cache = &dirty->cache;
+ SVGA3dBox *box = &dirty->boxes[loc_start->sub_resource];
+ u32 mip = loc_start->sub_resource % cache->num_mip_levels;
+ const struct drm_vmw_size *size = &cache->mip[mip].size;
+ u32 box_c2 = box->z + box->d;
+
+ if (WARN_ON(loc_start->sub_resource >= dirty->num_subres))
+ return;
+
+ if (box->d == 0 || box->z > loc_start->z)
+ box->z = loc_start->z;
+ if (box_c2 < loc_end->z)
+ box->d = loc_end->z - box->z;
+
+ if (loc_start->z + 1 == loc_end->z) {
+ box_c2 = box->y + box->h;
+ if (box->h == 0 || box->y > loc_start->y)
+ box->y = loc_start->y;
+ if (box_c2 < loc_end->y)
+ box->h = loc_end->y - box->y;
+
+ if (loc_start->y + 1 == loc_end->y) {
+ box_c2 = box->x + box->w;
+ if (box->w == 0 || box->x > loc_start->x)
+ box->x = loc_start->x;
+ if (box_c2 < loc_end->x)
+ box->w = loc_end->x - box->x;
+ } else {
+ box->x = 0;
+ box->w = size->width;
+ }
+ } else {
+ box->y = 0;
+ box->h = size->height;
+ box->x = 0;
+ box->w = size->width;
+ }
+}
+
+/**
+ * vmw_subres_dirty_full - Mark a full subresource as dirty
+ * @dirty: The surface's dirty tracker.
+ * @subres: The subresource
+ */
+static void vmw_subres_dirty_full(struct vmw_surface_dirty *dirty, u32 subres)
+{
+ const struct svga3dsurface_cache *cache = &dirty->cache;
+ u32 mip = subres % cache->num_mip_levels;
+ const struct drm_vmw_size *size = &cache->mip[mip].size;
+ SVGA3dBox *box = &dirty->boxes[subres];
+
+ box->x = 0;
+ box->y = 0;
+ box->z = 0;
+ box->w = size->width;
+ box->h = size->height;
+ box->d = size->depth;
+}
+
+/*
+ * vmw_surface_tex_dirty_add_range - The dirty_add_range callback for texture
+ * surfaces.
+ */
+static void vmw_surface_tex_dirty_range_add(struct vmw_resource *res,
+ size_t start, size_t end)
+{
+ struct vmw_surface_dirty *dirty =
+ (struct vmw_surface_dirty *) res->dirty;
+ size_t backup_end = res->backup_offset + res->backup_size;
+ struct svga3dsurface_loc loc1, loc2;
+ const struct svga3dsurface_cache *cache;
+
+ start = max_t(size_t, start, res->backup_offset) - res->backup_offset;
+ end = min(end, backup_end) - res->backup_offset;
+ cache = &dirty->cache;
+ svga3dsurface_get_loc(cache, &loc1, start);
+ svga3dsurface_get_loc(cache, &loc2, end - 1);
+ svga3dsurface_inc_loc(cache, &loc2);
+
+ if (loc1.sub_resource + 1 == loc2.sub_resource) {
+ /* Dirty range covers a single sub-resource */
+ vmw_subres_dirty_add(dirty, &loc1, &loc2);
+ } else {
+ /* Dirty range covers multiple sub-resources */
+ struct svga3dsurface_loc loc_min, loc_max;
+ u32 sub_res;
+
+ svga3dsurface_max_loc(cache, loc1.sub_resource, &loc_max);
+ vmw_subres_dirty_add(dirty, &loc1, &loc_max);
+ svga3dsurface_min_loc(cache, loc2.sub_resource - 1, &loc_min);
+ vmw_subres_dirty_add(dirty, &loc_min, &loc2);
+ for (sub_res = loc1.sub_resource + 1;
+ sub_res < loc2.sub_resource - 1; ++sub_res)
+ vmw_subres_dirty_full(dirty, sub_res);
+ }
+}
+
+/*
+ * vmw_surface_tex_dirty_add_range - The dirty_add_range callback for buffer
+ * surfaces.
+ */
+static void vmw_surface_buf_dirty_range_add(struct vmw_resource *res,
+ size_t start, size_t end)
+{
+ struct vmw_surface_dirty *dirty =
+ (struct vmw_surface_dirty *) res->dirty;
+ const struct svga3dsurface_cache *cache = &dirty->cache;
+ size_t backup_end = res->backup_offset + cache->mip_chain_bytes;
+ SVGA3dBox *box = &dirty->boxes[0];
+ u32 box_c2;
+
+ box->h = box->d = 1;
+ start = max_t(size_t, start, res->backup_offset) - res->backup_offset;
+ end = min(end, backup_end) - res->backup_offset;
+ box_c2 = box->x + box->w;
+ if (box->w == 0 || box->x > start)
+ box->x = start;
+ if (box_c2 < end)
+ box->w = end - box->x;
+}
+
+/*
+ * vmw_surface_tex_dirty_add_range - The dirty_add_range callback for surfaces
+ */
+static void vmw_surface_dirty_range_add(struct vmw_resource *res, size_t start,
+ size_t end)
+{
+ struct vmw_surface *srf = vmw_res_to_srf(res);
+
+ if (WARN_ON(end <= res->backup_offset ||
+ start >= res->backup_offset + res->backup_size))
+ return;
+
+ if (srf->format == SVGA3D_BUFFER)
+ vmw_surface_buf_dirty_range_add(res, start, end);
+ else
+ vmw_surface_tex_dirty_range_add(res, start, end);
+}
+
+/*
+ * vmw_surface_dirty_sync - The surface's dirty_sync callback.
+ */
+static int vmw_surface_dirty_sync(struct vmw_resource *res)
+{
+ struct vmw_private *dev_priv = res->dev_priv;
+ bool has_dx = 0;
+ u32 i, num_dirty;
+ struct vmw_surface_dirty *dirty =
+ (struct vmw_surface_dirty *) res->dirty;
+ size_t alloc_size;
+ const struct svga3dsurface_cache *cache = &dirty->cache;
+ struct {
+ SVGA3dCmdHeader header;
+ SVGA3dCmdDXUpdateSubResource body;
+ } *cmd1;
+ struct {
+ SVGA3dCmdHeader header;
+ SVGA3dCmdUpdateGBImage body;
+ } *cmd2;
+ void *cmd;
+
+ num_dirty = 0;
+ for (i = 0; i < dirty->num_subres; ++i) {
+ const SVGA3dBox *box = &dirty->boxes[i];
+
+ if (box->d)
+ num_dirty++;
+ }
+
+ if (!num_dirty)
+ goto out;
+
+ alloc_size = num_dirty * ((has_dx) ? sizeof(*cmd1) : sizeof(*cmd2));
+ cmd = VMW_FIFO_RESERVE(dev_priv, alloc_size);
+ if (!cmd)
+ return -ENOMEM;
+
+ cmd1 = cmd;
+ cmd2 = cmd;
+
+ for (i = 0; i < dirty->num_subres; ++i) {
+ const SVGA3dBox *box = &dirty->boxes[i];
+
+ if (!box->d)
+ continue;
+
+ /*
+ * DX_UPDATE_SUBRESOURCE is aware of array surfaces.
+ * UPDATE_GB_IMAGE is not.
+ */
+ if (has_dx) {
+ cmd1->header.id = SVGA_3D_CMD_DX_UPDATE_SUBRESOURCE;
+ cmd1->header.size = sizeof(cmd1->body);
+ cmd1->body.sid = res->id;
+ cmd1->body.subResource = i;
+ cmd1->body.box = *box;
+ cmd1++;
+ } else {
+ cmd2->header.id = SVGA_3D_CMD_UPDATE_GB_IMAGE;
+ cmd2->header.size = sizeof(cmd2->body);
+ cmd2->body.image.sid = res->id;
+ cmd2->body.image.face = i / cache->num_mip_levels;
+ cmd2->body.image.mipmap = i -
+ (cache->num_mip_levels * cmd2->body.image.face);
+ cmd2->body.box = *box;
+ cmd2++;
+ }
+
+ }
+ vmw_fifo_commit(dev_priv, alloc_size);
+ out:
+ memset(&dirty->boxes[0], 0, sizeof(dirty->boxes[0]) *
+ dirty->num_subres);
+
+ return 0;
+}
+
+/*
+ * vmw_surface_dirty_alloc - The surface's dirty_alloc callback.
+ */
+static int vmw_surface_dirty_alloc(struct vmw_resource *res)
+{
+ struct vmw_surface *srf = vmw_res_to_srf(res);
+ struct vmw_surface_dirty *dirty;
+ u32 num_layers = 1;
+ u32 num_mip;
+ u32 num_subres;
+ u32 num_samples;
+ size_t dirty_size, acc_size;
+ static struct ttm_operation_ctx ctx = {
+ .interruptible = false,
+ .no_wait_gpu = false
+ };
+ int ret;
+
+ if (srf->array_size)
+ num_layers = srf->array_size;
+ else if (srf->flags & SVGA3D_SURFACE_CUBEMAP)
+ num_layers *= SVGA3D_MAX_SURFACE_FACES;
+
+ num_mip = srf->mip_levels[0];
+ if (!num_mip)
+ num_mip = 1;
+
+ num_subres = num_layers * num_mip;
+ dirty_size = sizeof(*dirty) + num_subres * sizeof(dirty->boxes[0]);
+ acc_size = ttm_round_pot(dirty_size);
+ ret = ttm_mem_global_alloc(vmw_mem_glob(res->dev_priv),
+ acc_size, &ctx);
+ if (ret) {
+ VMW_DEBUG_USER("Out of graphics memory for surface "
+ "dirty tracker.\n");
+ return ret;
+ }
+
+ dirty = kvzalloc(dirty_size, GFP_KERNEL);
+ if (!dirty) {
+ ret = -ENOMEM;
+ goto out_no_dirty;
+ }
+
+ num_samples = max_t(u32, 1, srf->multisample_count);
+ ret = svga3dsurface_setup_cache(&srf->base_size, srf->format, num_mip,
+ num_layers, num_samples, &dirty->cache);
+ if (ret)
+ goto out_no_cache;
+
+ dirty->num_subres = num_subres;
+ dirty->size = acc_size;
+ res->dirty = (struct vmw_resource_dirty *) dirty;
+
+ return 0;
+
+out_no_cache:
+ kvfree(dirty);
+out_no_dirty:
+ ttm_mem_global_free(vmw_mem_glob(res->dev_priv), acc_size);
+ return ret;
+}
+
+/*
+ * vmw_surface_dirty_free - The surface's dirty_free callback
+ */
+static void vmw_surface_dirty_free(struct vmw_resource *res)
+{
+ struct vmw_surface_dirty *dirty =
+ (struct vmw_surface_dirty *) res->dirty;
+ size_t acc_size = dirty->size;
+
+ kvfree(dirty);
+ ttm_mem_global_free(vmw_mem_glob(res->dev_priv), acc_size);
+ res->dirty = NULL;
+}
+
+/*
+ * vmw_surface_clean - The surface's clean callback
+ */
+static int vmw_surface_clean(struct vmw_resource *res)
+{
+ struct vmw_private *dev_priv = res->dev_priv;
+ size_t alloc_size;
+ struct {
+ SVGA3dCmdHeader header;
+ SVGA3dCmdReadbackGBSurface body;
+ } *cmd;
+
+ alloc_size = sizeof(*cmd);
+ cmd = VMW_FIFO_RESERVE(dev_priv, alloc_size);
+ if (!cmd)
+ return -ENOMEM;
+
+ cmd->header.id = SVGA_3D_CMD_READBACK_GB_SURFACE;
+ cmd->header.size = sizeof(cmd->body);
+ cmd->body.sid = res->id;
+ vmw_fifo_commit(dev_priv, alloc_size);
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
index 5a7b8bb420de..ce288756531b 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
@@ -29,10 +29,23 @@
int vmw_mmap(struct file *filp, struct vm_area_struct *vma)
{
+ static const struct vm_operations_struct vmw_vm_ops = {
+ .pfn_mkwrite = vmw_bo_vm_mkwrite,
+ .page_mkwrite = vmw_bo_vm_mkwrite,
+ .fault = vmw_bo_vm_fault,
+ .open = ttm_bo_vm_open,
+ .close = ttm_bo_vm_close
+ };
struct drm_file *file_priv = filp->private_data;
struct vmw_private *dev_priv = vmw_priv(file_priv->minor->dev);
+ int ret = ttm_bo_mmap(filp, vma, &dev_priv->bdev);
- return ttm_bo_mmap(filp, vma, &dev_priv->bdev);
+ if (ret)
+ return ret;
+
+ vma->vm_ops = &vmw_vm_ops;
+
+ return 0;
}
/* struct vmw_validation_mem callback */
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c
index 7bff3628fc54..e69bc373ae2e 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c
@@ -33,6 +33,8 @@
* struct vmw_validation_bo_node - Buffer object validation metadata.
* @base: Metadata used for TTM reservation- and validation.
* @hash: A hash entry used for the duplicate detection hash table.
+ * @coherent_count: If switching backup buffers, number of new coherent
+ * resources that will have this buffer as a backup buffer.
* @as_mob: Validate as mob.
* @cpu_blit: Validate for cpu blit access.
*
@@ -42,6 +44,7 @@
struct vmw_validation_bo_node {
struct ttm_validate_buffer base;
struct drm_hash_item hash;
+ unsigned int coherent_count;
u32 as_mob : 1;
u32 cpu_blit : 1;
};
@@ -459,6 +462,19 @@ int vmw_validation_res_reserve(struct vmw_validation_context *ctx,
if (ret)
goto out_unreserve;
}
+
+ if (val->switching_backup && val->new_backup &&
+ res->coherent) {
+ struct vmw_validation_bo_node *bo_node =
+ vmw_validation_find_bo_dup(ctx,
+ val->new_backup);
+
+ if (WARN_ON(!bo_node)) {
+ ret = -EINVAL;
+ goto out_unreserve;
+ }
+ bo_node->coherent_count++;
+ }
}
return 0;
@@ -565,6 +581,9 @@ int vmw_validation_bo_validate(struct vmw_validation_context *ctx, bool intr)
int ret;
list_for_each_entry(entry, &ctx->bo_list, base.head) {
+ struct vmw_buffer_object *vbo =
+ container_of(entry->base.bo, typeof(*vbo), base);
+
if (entry->cpu_blit) {
struct ttm_operation_ctx ctx = {
.interruptible = intr,
@@ -579,6 +598,27 @@ int vmw_validation_bo_validate(struct vmw_validation_context *ctx, bool intr)
}
if (ret)
return ret;
+
+ /*
+ * Rather than having the resource code allocating the bo
+ * dirty tracker in resource_unreserve() where we can't fail,
+ * Do it here when validating the buffer object.
+ */
+ if (entry->coherent_count) {
+ unsigned int coherent_count = entry->coherent_count;
+
+ while (coherent_count) {
+ ret = vmw_bo_dirty_add(vbo);
+ if (ret)
+ return ret;
+
+ coherent_count--;
+ }
+ entry->coherent_count -= coherent_count;
+ }
+
+ if (vbo->dirty)
+ vmw_bo_dirty_scan(vbo);
}
return 0;
}
@@ -604,7 +644,8 @@ int vmw_validation_res_validate(struct vmw_validation_context *ctx, bool intr)
struct vmw_resource *res = val->res;
struct vmw_buffer_object *backup = res->backup;
- ret = vmw_resource_validate(res, intr);
+ ret = vmw_resource_validate(res, intr, val->dirty_set &&
+ val->dirty);
if (ret) {
if (ret != -ERESTARTSYS)
DRM_ERROR("Failed to validate resource.\n");
@@ -831,3 +872,34 @@ int vmw_validation_preload_res(struct vmw_validation_context *ctx,
ctx->mem_size_left += size;
return 0;
}
+
+/**
+ * vmw_validation_bo_backoff - Unreserve buffer objects registered with a
+ * validation context
+ * @ctx: The validation context
+ *
+ * This function unreserves the buffer objects previously reserved using
+ * vmw_validation_bo_reserve. It's typically used as part of an error path
+ */
+void vmw_validation_bo_backoff(struct vmw_validation_context *ctx)
+{
+ struct vmw_validation_bo_node *entry;
+
+ /*
+ * Switching coherent resource backup buffers failed.
+ * Release corresponding buffer object dirty trackers.
+ */
+ list_for_each_entry(entry, &ctx->bo_list, base.head) {
+ if (entry->coherent_count) {
+ unsigned int coherent_count = entry->coherent_count;
+ struct vmw_buffer_object *vbo =
+ container_of(entry->base.bo, typeof(*vbo),
+ base);
+
+ while (coherent_count--)
+ vmw_bo_dirty_release(vbo);
+ }
+ }
+
+ ttm_eu_backoff_reservation(&ctx->ticket, &ctx->bo_list);
+}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h
index 71ce4b318850..739906d1b3eb 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h
@@ -174,20 +174,6 @@ vmw_validation_bo_reserve(struct vmw_validation_context *ctx,
}
/**
- * vmw_validation_bo_backoff - Unreserve buffer objects registered with a
- * validation context
- * @ctx: The validation context
- *
- * This function unreserves the buffer objects previously reserved using
- * vmw_validation_bo_reserve. It's typically used as part of an error path
- */
-static inline void
-vmw_validation_bo_backoff(struct vmw_validation_context *ctx)
-{
- ttm_eu_backoff_reservation(&ctx->ticket, &ctx->bo_list);
-}
-
-/**
* vmw_validation_bo_fence - Unreserve and fence buffer objects registered
* with a validation context
* @ctx: The validation context
@@ -269,4 +255,6 @@ int vmw_validation_preload_res(struct vmw_validation_context *ctx,
unsigned int size);
void vmw_validation_res_set_dirty(struct vmw_validation_context *ctx,
void *val_private, u32 dirty);
+void vmw_validation_bo_backoff(struct vmw_validation_context *ctx);
+
#endif