diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 139 |
1 files changed, 122 insertions, 17 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 654f4844b7ad..86255c13fbb7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -71,6 +71,7 @@ #include "amdgpu_xgmi.h" #include "amdgpu_ras.h" +#include "amdgpu_ras_mgr.h" #include "amdgpu_pmu.h" #include "amdgpu_fru_eeprom.h" #include "amdgpu_reset.h" @@ -179,6 +180,10 @@ struct amdgpu_init_level amdgpu_init_minimal_xgmi = { BIT(AMD_IP_BLOCK_TYPE_PSP) }; +static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev); +static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev); +static int amdgpu_device_ip_resume_phase3(struct amdgpu_device *adev); + static void amdgpu_device_load_switch_state(struct amdgpu_device *adev); static inline bool amdgpu_ip_member_of_hwini(struct amdgpu_device *adev, @@ -2494,6 +2499,7 @@ static const char *ip_block_names[] = { [AMD_IP_BLOCK_TYPE_VPE] = "vpe", [AMD_IP_BLOCK_TYPE_UMSCH_MM] = "umsch_mm", [AMD_IP_BLOCK_TYPE_ISP] = "isp", + [AMD_IP_BLOCK_TYPE_RAS] = "ras", }; static const char *ip_block_name(struct amdgpu_device *adev, enum amd_ip_block_type type) @@ -3784,7 +3790,7 @@ static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work) */ static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev) { - int i, r; + int i, r, rec; amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); @@ -3807,10 +3813,23 @@ static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev) r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]); if (r) - return r; + goto unwind; } return 0; +unwind: + rec = amdgpu_device_ip_resume_phase3(adev); + if (rec) + dev_err(adev->dev, + "amdgpu_device_ip_resume_phase3 failed during unwind: %d\n", + rec); + + amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW); + + amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE); + amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE); + + return r; } /** @@ -3826,7 +3845,7 @@ static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev) */ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev) { - int i, r; + int i, r, rec; if (adev->in_s0ix) amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry); @@ -3889,7 +3908,7 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev) r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]); if (r) - return r; + goto unwind; /* handle putting the SMC in the appropriate state */ if (!amdgpu_sriov_vf(adev)) { @@ -3899,13 +3918,40 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev) dev_err(adev->dev, "SMC failed to set mp1 state %d, %d\n", adev->mp1_state, r); - return r; + goto unwind; } } } } return 0; +unwind: + /* suspend phase 2 = resume phase 1 + resume phase 2 */ + rec = amdgpu_device_ip_resume_phase1(adev); + if (rec) { + dev_err(adev->dev, + "amdgpu_device_ip_resume_phase1 failed during unwind: %d\n", + rec); + return r; + } + + rec = amdgpu_device_fw_loading(adev); + if (rec) { + dev_err(adev->dev, + "amdgpu_device_fw_loading failed during unwind: %d\n", + rec); + return r; + } + + rec = amdgpu_device_ip_resume_phase2(adev); + if (rec) { + dev_err(adev->dev, + "amdgpu_device_ip_resume_phase2 failed during unwind: %d\n", + rec); + return r; + } + + return r; } /** @@ -4607,6 +4653,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, } INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func); + INIT_WORK(&adev->userq_reset_work, amdgpu_userq_reset_work); adev->gfx.gfx_off_req_count = 1; adev->gfx.gfx_off_residency = 0; @@ -5229,7 +5276,7 @@ void amdgpu_device_complete(struct drm_device *dev) int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients) { struct amdgpu_device *adev = drm_to_adev(dev); - int r = 0; + int r, rec; if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) return 0; @@ -5245,8 +5292,9 @@ int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients) return r; } - if (amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DEV_D3)) - dev_warn(adev->dev, "smart shift update failed\n"); + r = amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DEV_D3); + if (r) + goto unwind_sriov; if (notify_clients) drm_client_dev_suspend(adev_to_drm(adev)); @@ -5257,16 +5305,16 @@ int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients) r = amdgpu_device_ip_suspend_phase1(adev); if (r) - return r; + goto unwind_smartshift; amdgpu_amdkfd_suspend(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm); r = amdgpu_userq_suspend(adev); if (r) - return r; + goto unwind_ip_phase1; r = amdgpu_device_evict_resources(adev); if (r) - return r; + goto unwind_userq; amdgpu_ttm_set_buffer_funcs_status(adev, false); @@ -5274,16 +5322,62 @@ int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients) r = amdgpu_device_ip_suspend_phase2(adev); if (r) - return r; + goto unwind_evict; if (amdgpu_sriov_vf(adev)) amdgpu_virt_release_full_gpu(adev, false); - r = amdgpu_dpm_notify_rlc_state(adev, false); - if (r) + return 0; + +unwind_evict: + if (adev->mman.buffer_funcs_ring->sched.ready) + amdgpu_ttm_set_buffer_funcs_status(adev, true); + amdgpu_fence_driver_hw_init(adev); + +unwind_userq: + rec = amdgpu_userq_resume(adev); + if (rec) { + dev_warn(adev->dev, "failed to re-initialize user queues: %d\n", rec); + return r; + } + rec = amdgpu_amdkfd_resume(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm); + if (rec) { + dev_warn(adev->dev, "failed to re-initialize kfd: %d\n", rec); return r; + } - return 0; +unwind_ip_phase1: + /* suspend phase 1 = resume phase 3 */ + rec = amdgpu_device_ip_resume_phase3(adev); + if (rec) { + dev_warn(adev->dev, "failed to re-initialize IPs phase1: %d\n", rec); + return r; + } + +unwind_smartshift: + rec = amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DEV_D0); + if (rec) { + dev_warn(adev->dev, "failed to re-update smart shift: %d\n", rec); + return r; + } + + if (notify_clients) + drm_client_dev_resume(adev_to_drm(adev)); + + amdgpu_ras_resume(adev); + +unwind_sriov: + if (amdgpu_sriov_vf(adev)) { + rec = amdgpu_virt_request_full_gpu(adev, true); + if (rec) { + dev_warn(adev->dev, "failed to reinitialize sriov: %d\n", rec); + return r; + } + } + + adev->in_suspend = adev->in_s0ix = adev->in_s3 = false; + + return r; } static inline int amdgpu_virt_resume(struct amdgpu_device *adev) @@ -5989,6 +6083,10 @@ int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context) if (r) goto out; + r = amdgpu_userq_post_reset(tmp_adev, vram_lost); + if (r) + goto out; + drm_client_dev_resume(adev_to_drm(tmp_adev)); /* @@ -6211,6 +6309,7 @@ static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev) if (!amdgpu_sriov_vf(adev)) cancel_work(&adev->reset_work); #endif + cancel_work(&adev->userq_reset_work); if (adev->kfd.dev) cancel_work(&adev->kfd.reset_work); @@ -6331,6 +6430,8 @@ static void amdgpu_device_halt_activities(struct amdgpu_device *adev, amdgpu_device_ip_need_full_reset(tmp_adev)) amdgpu_ras_suspend(tmp_adev); + amdgpu_userq_pre_reset(tmp_adev); + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = tmp_adev->rings[i]; @@ -6560,6 +6661,9 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, goto end_reset; } + /* Cannot be called after locking reset domain */ + amdgpu_ras_pre_reset(adev, &device_list); + /* We need to lock reset domain only once both for XGMI and single device */ amdgpu_device_recovery_get_reset_lock(adev, &device_list); @@ -6590,6 +6694,7 @@ skip_sched_resume: amdgpu_device_gpu_resume(adev, &device_list, need_emergency_restart); reset_unlock: amdgpu_device_recovery_put_reset_lock(adev, &device_list); + amdgpu_ras_post_reset(adev, &device_list); end_reset: if (hive) { mutex_unlock(&hive->hive_lock); @@ -7327,7 +7432,7 @@ void amdgpu_device_flush_hdp(struct amdgpu_device *adev, return; } - amdgpu_asic_flush_hdp(adev, ring); + amdgpu_hdp_flush(adev, ring); } void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev, @@ -7340,7 +7445,7 @@ void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev, if (adev->gmc.xgmi.connected_to_cpu) return; - amdgpu_asic_invalidate_hdp(adev, ring); + amdgpu_hdp_invalidate(adev, ring); } int amdgpu_in_reset(struct amdgpu_device *adev) |
