summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTao Zhou <tao.zhou1@amd.com>2024-10-24 15:34:27 +0800
committerAlex Deucher <alexander.deucher@amd.com>2024-12-10 10:26:46 -0500
commit07dd49e1fc42fcb67a1786c2b2f71e0dd0079935 (patch)
tree71f10dd70d8dba934ad56f475e83987500975cce
parentb02ef407729d9eb26fbc74ca229e28dbe971e1c1 (diff)
drm/amdgpu: support to find RAS bad pages via old TA
Old version of RAS TA doesn't support to convert MCA address stored on eeprom to physical address (PA), support to find all bad pages in one memory row by PA with old RAS TA. This approach is only suitable for nps1 mode. Signed-off-by: Tao Zhou <tao.zhou1@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c28
1 files changed, 25 insertions, 3 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 882a33e134d8..d41a0664aa27 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2765,9 +2765,10 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
struct ras_err_handler_data *data;
struct ras_err_data err_data;
struct eeprom_table_record *err_rec;
+ enum amdgpu_memory_partition nps = AMDGPU_NPS1_PARTITION_MODE;
int ret = 0;
uint32_t i, j, loop_cnt = 1;
- bool is_mca_add = true;
+ bool is_mca_add = true, find_pages_per_pa = false;
if (!con || !con->eh_data || !bps || pages <= 0)
return 0;
@@ -2797,12 +2798,33 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
}
loop_cnt = adev->umc.retire_unit;
+ if (adev->gmc.gmc_funcs->query_mem_partition_mode)
+ nps = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
}
for (i = 0; i < pages; i++) {
if (is_mca_add) {
- if (amdgpu_ras_mca2pa(adev, &bps[i], &err_data))
- goto free;
+ if (!find_pages_per_pa) {
+ if (amdgpu_ras_mca2pa(adev, &bps[i], &err_data)) {
+ if (!i && nps == AMDGPU_NPS1_PARTITION_MODE) {
+ /* may use old RAS TA, use PA to find pages in
+ * one row
+ */
+ if (amdgpu_umc_pages_in_a_row(adev, &err_data,
+ bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT))
+ goto free;
+ else
+ find_pages_per_pa = true;
+ } else {
+ /* unsupported cases */
+ goto free;
+ }
+ }
+ } else {
+ if (amdgpu_umc_pages_in_a_row(adev, &err_data,
+ bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT))
+ goto free;
+ }
err_rec = err_data.err_addr;
} else {