summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
diff options
context:
space:
mode:
authorTao Zhou <tao.zhou1@amd.com>2019-08-15 16:15:08 +0800
committerAlex Deucher <alexander.deucher@amd.com>2019-09-13 17:50:40 -0500
commit87d2b92f1e9df64a74f7fda0691d4041ba2727f9 (patch)
tree76897717624eede2a434ee2ec376326211b2e816 /drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
parent78ad00c9030cf872621ef4c14c20fbe7d34d2c06 (diff)
drm/amdgpu: save umc error records
save umc error records to ras bad page array v2: add bad pages before gpu reset v3: add NULL check for adev->umc.funcs Signed-off-by: Tao Zhou <tao.zhou1@amd.com> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> Reviewed-by: Guchun Chen <guchun.chen@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c40
1 files changed, 31 insertions, 9 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 2b4eb0b9d9ce..9b06d775d137 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -247,21 +247,43 @@ static int gmc_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
struct ras_err_data *err_data,
struct amdgpu_iv_entry *entry)
{
- if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
- kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
- if (adev->umc.funcs->query_ras_error_count)
- adev->umc.funcs->query_ras_error_count(adev, err_data);
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
+ return AMDGPU_RAS_SUCCESS;
+
+ kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
+ if (adev->umc.funcs &&
+ adev->umc.funcs->query_ras_error_count)
+ adev->umc.funcs->query_ras_error_count(adev, err_data);
+
+ if (adev->umc.funcs &&
+ adev->umc.funcs->query_ras_error_address &&
+ adev->umc.max_ras_err_cnt_per_query) {
+ err_data->err_addr =
+ kcalloc(adev->umc.max_ras_err_cnt_per_query,
+ sizeof(struct eeprom_table_record), GFP_KERNEL);
+ /* still call query_ras_error_address to clear error status
+ * even NOMEM error is encountered
+ */
+ if(!err_data->err_addr)
+ DRM_WARN("Failed to alloc memory for umc error address record!\n");
+
/* umc query_ras_error_address is also responsible for clearing
* error status
*/
- if (adev->umc.funcs->query_ras_error_address)
- adev->umc.funcs->query_ras_error_address(adev, err_data);
+ adev->umc.funcs->query_ras_error_address(adev, err_data);
+ }
+
+ /* only uncorrectable error needs gpu reset */
+ if (err_data->ue_count) {
+ if (err_data->err_addr_cnt &&
+ amdgpu_ras_add_bad_pages(adev, err_data->err_addr,
+ err_data->err_addr_cnt))
+ DRM_WARN("Failed to add ras bad page!\n");
- /* only uncorrectable error needs gpu reset */
- if (err_data->ue_count)
- amdgpu_ras_reset_gpu(adev, 0);
+ amdgpu_ras_reset_gpu(adev, 0);
}
+ kfree(err_data->err_addr);
return AMDGPU_RAS_SUCCESS;
}