From ce0e22f5d886d1b56c7ab4347c45b9ac5fcc058d Mon Sep 17 00:00:00 2001 From: Louis Li Date: Sat, 25 May 2019 06:39:47 +0800 Subject: drm/amdgpu: fix ring test failure issue during s3 in vce 3.0 (V2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [What] vce ring test fails consistently during resume in s3 cycle, due to mismatch read & write pointers. On debug/analysis its found that rptr to be compared is not being correctly updated/read, which leads to this failure. Below is the failure signature: [drm:amdgpu_vce_ring_test_ring] *ERROR* amdgpu: ring 12 test failed [drm:amdgpu_device_ip_resume_phase2] *ERROR* resume of IP block failed -110 [drm:amdgpu_device_resume] *ERROR* amdgpu_device_ip_resume failed (-110). [How] fetch rptr appropriately, meaning move its read location further down in the code flow. With this patch applied the s3 failure is no more seen for >5k s3 cycles, which otherwise is pretty consistent. V2: remove reduntant fetch of rptr Signed-off-by: Louis Li Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index c021b114c8a4..f7189e22f6b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -1072,7 +1072,7 @@ void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - uint32_t rptr = amdgpu_ring_get_rptr(ring); + uint32_t rptr; unsigned i; int r, timeout = adev->usec_timeout; @@ -1084,6 +1084,8 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring) if (r) return r; + rptr = amdgpu_ring_get_rptr(ring); + amdgpu_ring_write(ring, VCE_CMD_END); amdgpu_ring_commit(ring); -- cgit From 1019fe047a483ecfdc82661802e56871f649c46e Mon Sep 17 00:00:00 2001 From: Chengming Gui Date: Mon, 13 May 2019 17:41:19 +0800 Subject: drm/amd/powerplay: add set_power_profile_mode for raven1_refresh add gfxoff_state_changed_by_workload to control gfxoff when set power_profile_mode Signed-off-by: Chengming Gui Reviewed-by: Huang Rui Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c | 1 + drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c | 31 ++++++++++++++++++++--- drivers/gpu/drm/amd/powerplay/inc/hwmgr.h | 1 + 3 files changed, 30 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c index 6cd6497c6fc2..f1d326caf69e 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c @@ -92,6 +92,7 @@ int hwmgr_early_init(struct pp_hwmgr *hwmgr) hwmgr_set_user_specify_caps(hwmgr); hwmgr->fan_ctrl_is_in_default_mode = true; hwmgr_init_workload_prority(hwmgr); + hwmgr->gfxoff_state_changed_by_workload = false; switch (hwmgr->chip_family) { case AMDGPU_FAMILY_CI: diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c index 9a595f7525e6..e32ae9d3373c 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c @@ -1258,21 +1258,46 @@ static int smu10_get_power_profile_mode(struct pp_hwmgr *hwmgr, char *buf) return size; } +static bool smu10_is_raven1_refresh(struct pp_hwmgr *hwmgr) +{ + struct amdgpu_device *adev = hwmgr->adev; + if ((adev->asic_type == CHIP_RAVEN) && + (adev->rev_id != 0x15d8) && + (hwmgr->smu_version >= 0x41e2b)) + return true; + else + return false; +} + static int smu10_set_power_profile_mode(struct pp_hwmgr *hwmgr, long *input, uint32_t size) { int workload_type = 0; + int result = 0; if (input[size] > PP_SMC_POWER_PROFILE_COMPUTE) { pr_err("Invalid power profile mode %ld\n", input[size]); return -EINVAL; } - hwmgr->power_profile_mode = input[size]; + if (hwmgr->power_profile_mode == input[size]) + return 0; /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */ workload_type = - conv_power_profile_to_pplib_workload(hwmgr->power_profile_mode); - smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_ActiveProcessNotify, + conv_power_profile_to_pplib_workload(input[size]); + if (workload_type && + smu10_is_raven1_refresh(hwmgr) && + !hwmgr->gfxoff_state_changed_by_workload) { + smu10_gfx_off_control(hwmgr, false); + hwmgr->gfxoff_state_changed_by_workload = true; + } + result = smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_ActiveProcessNotify, 1 << workload_type); + if (!result) + hwmgr->power_profile_mode = input[size]; + if (workload_type && hwmgr->gfxoff_state_changed_by_workload) { + smu10_gfx_off_control(hwmgr, true); + hwmgr->gfxoff_state_changed_by_workload = false; + } return 0; } diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h index bac3d85e3b82..c92999aac07c 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h @@ -782,6 +782,7 @@ struct pp_hwmgr { uint32_t workload_mask; uint32_t workload_prority[Workload_Policy_Max]; uint32_t workload_setting[Workload_Policy_Max]; + bool gfxoff_state_changed_by_workload; }; int hwmgr_early_init(struct pp_hwmgr *hwmgr); -- cgit From 1929059893022a3bbed43934c7313e66aad7346b Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Mon, 27 May 2019 16:05:50 +0800 Subject: drm/amd/amdgpu: add RLC firmware to support raven1 refresh Use SMU firmware version to indentify the raven1 refresh device and then load homologous RLC FW. Signed-off-by: Prike Liang Suggested-by: Huang Rui Reviewed-by: Huang Rui Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 12 +++--------- drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 15 +++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 12 +++++++++++- 4 files changed, 30 insertions(+), 10 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index cc8ad3831982..f4ac632a87b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1589,6 +1589,7 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev) { int r = 0; int i; + uint32_t smu_version; if (adev->asic_type >= CHIP_VEGA10) { for (i = 0; i < adev->num_ip_blocks; i++) { @@ -1614,16 +1615,9 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev) } } } + r = amdgpu_pm_load_smu_firmware(adev, &smu_version); - if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->load_firmware) { - r = adev->powerplay.pp_funcs->load_firmware(adev->powerplay.pp_handle); - if (r) { - pr_err("firmware loading failed\n"); - return r; - } - } - - return 0; + return r; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 34471dbaa872..039cfa2ec89d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -2490,6 +2490,21 @@ void amdgpu_pm_print_power_states(struct amdgpu_device *adev) } +int amdgpu_pm_load_smu_firmware(struct amdgpu_device *adev, uint32_t *smu_version) +{ + int r = -EINVAL; + + if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->load_firmware) { + r = adev->powerplay.pp_funcs->load_firmware(adev->powerplay.pp_handle); + if (r) { + pr_err("smu firmware loading failed\n"); + return r; + } + *smu_version = adev->pm.fw_version; + } + return r; +} + int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) { struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h index f21a7716b90e..7ff0e7621fff 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h @@ -34,6 +34,7 @@ void amdgpu_pm_acpi_event_handler(struct amdgpu_device *adev); int amdgpu_pm_sysfs_init(struct amdgpu_device *adev); void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev); void amdgpu_pm_print_power_states(struct amdgpu_device *adev); +int amdgpu_pm_load_smu_firmware(struct amdgpu_device *adev, uint32_t *smu_version); void amdgpu_pm_compute_clocks(struct amdgpu_device *adev); void amdgpu_dpm_thermal_work_handler(struct work_struct *work); void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index ba67d1023264..b610e3b30d95 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -28,6 +28,7 @@ #include "soc15.h" #include "soc15d.h" #include "amdgpu_atomfirmware.h" +#include "amdgpu_pm.h" #include "gc/gc_9_0_offset.h" #include "gc/gc_9_0_sh_mask.h" @@ -96,6 +97,7 @@ MODULE_FIRMWARE("amdgpu/raven2_me.bin"); MODULE_FIRMWARE("amdgpu/raven2_mec.bin"); MODULE_FIRMWARE("amdgpu/raven2_mec2.bin"); MODULE_FIRMWARE("amdgpu/raven2_rlc.bin"); +MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin"); static const struct soc15_reg_golden golden_settings_gc_9_0[] = { @@ -588,7 +590,8 @@ static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev) case CHIP_RAVEN: if (adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8) break; - if ((adev->gfx.rlc_fw_version < 531) || + if ((adev->gfx.rlc_fw_version != 106 && + adev->gfx.rlc_fw_version < 531) || (adev->gfx.rlc_fw_version == 53815) || (adev->gfx.rlc_feature_version < 1) || !adev->gfx.rlc.is_rlc_v2_1) @@ -612,6 +615,7 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) unsigned int i = 0; uint16_t version_major; uint16_t version_minor; + uint32_t smu_version; DRM_DEBUG("\n"); @@ -682,6 +686,12 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) || ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF)))) snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name); + else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) && + (smu_version >= 0x41e2b)) + /** + *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly. + */ + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name); else snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); -- cgit