summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
diff options
context:
space:
mode:
authoryipechai <YiPeng.Chai@amd.com>2022-01-04 13:52:46 +0800
committerAlex Deucher <alexander.deucher@amd.com>2022-01-14 17:51:59 -0500
commit8b0fb0e967c1700bd729ae54b6f229501b8587ec (patch)
tree471aee3a380c35f9a6fb6e595c69691c6e7ed872 /drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
parent7cab2124058d2f5f048f435a4631e176dcd1430d (diff)
drm/amdgpu: Modify gfx block to fit for the unified ras block data and ops
1.Modify gfx block to fit for the unified ras block data and ops. 2.Change amdgpu_gfx_ras_funcs to amdgpu_gfx_ras, and the corresponding variable name remove _funcs suffix. 3.Remove the const flag of gfx ras variable so that gfx ras block can be able to be inserted into amdgpu device ras block link list. 4.Invoke amdgpu_ras_register_ras_block function to register gfx ras block into amdgpu device ras block link list. 5.Remove the redundant code about gfx in amdgpu_ras.c after using the unified ras block. 6.Fill unified ras block .name .block .ras_late_init and .ras_fini for all of gfx versions. If .ras_late_init and .ras_fini had been defined by the selected gfx version, the defined functions will take effect; if not defined, default fill with amdgpu_gfx_ras_late_init and amdgpu_gfx_ras_fini. Signed-off-by: yipechai <YiPeng.Chai@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Reviewed-by: John Clements <john.clements@amd.com> Reviewed-by: Tao Zhou <tao.zhou1@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c63
1 files changed, 42 insertions, 21 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 5b3f4beb2149..a5812c21177e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -89,6 +89,8 @@ const char *get_ras_block_str(struct ras_common_if *ras_block)
return ras_block_string[ras_block->block];
}
+#define ras_block_str(_BLOCK_) (((_BLOCK_) < (sizeof(*ras_block_string)/sizeof(const char*))) ? ras_block_string[_BLOCK_] : "Out Of Range")
+
#define ras_err_str(i) (ras_error_string[ffs(i)])
#define RAS_DEFAULT_FLAGS (AMDGPU_RAS_FLAG_INIT_BY_VBIOS)
@@ -962,6 +964,7 @@ static void amdgpu_ras_get_ecc_info(struct amdgpu_device *adev, struct ras_err_d
int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
struct ras_query_if *info)
{
+ struct amdgpu_ras_block_object* block_obj = NULL;
struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
struct ras_err_data err_data = {0, 0, 0, NULL};
int i;
@@ -969,6 +972,8 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
if (!obj)
return -EINVAL;
+ block_obj = amdgpu_ras_get_ras_block(adev, info->head.block, 0);
+
switch (info->head.block) {
case AMDGPU_RAS_BLOCK__UMC:
amdgpu_ras_get_ecc_info(adev, &err_data);
@@ -981,13 +986,16 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
}
break;
case AMDGPU_RAS_BLOCK__GFX:
- if (adev->gfx.ras_funcs &&
- adev->gfx.ras_funcs->query_ras_error_count)
- adev->gfx.ras_funcs->query_ras_error_count(adev, &err_data);
+ if (!block_obj || !block_obj->hw_ops) {
+ dev_info(adev->dev, "%s doesn't config ras function \n",
+ get_ras_block_str(&info->head));
+ return -EINVAL;
+ }
+ if (block_obj->hw_ops->query_ras_error_count)
+ block_obj->hw_ops->query_ras_error_count(adev, &err_data);
- if (adev->gfx.ras_funcs &&
- adev->gfx.ras_funcs->query_ras_error_status)
- adev->gfx.ras_funcs->query_ras_error_status(adev);
+ if (block_obj->hw_ops->query_ras_error_status)
+ block_obj->hw_ops->query_ras_error_status(adev);
break;
case AMDGPU_RAS_BLOCK__MMHUB:
if (adev->mmhub.ras_funcs &&
@@ -1074,18 +1082,23 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
int amdgpu_ras_reset_error_status(struct amdgpu_device *adev,
enum amdgpu_ras_block block)
{
+ struct amdgpu_ras_block_object* block_obj = amdgpu_ras_get_ras_block(adev, block, 0);
+
if (!amdgpu_ras_is_supported(adev, block))
return -EINVAL;
switch (block) {
case AMDGPU_RAS_BLOCK__GFX:
- if (adev->gfx.ras_funcs &&
- adev->gfx.ras_funcs->reset_ras_error_count)
- adev->gfx.ras_funcs->reset_ras_error_count(adev);
+ if (!block_obj || !block_obj->hw_ops) {
+ dev_info(adev->dev, "%s doesn't config ras function \n", ras_block_str(block));
+ return -EINVAL;
+ }
+
+ if (block_obj->hw_ops->reset_ras_error_count)
+ block_obj->hw_ops->reset_ras_error_count(adev);
- if (adev->gfx.ras_funcs &&
- adev->gfx.ras_funcs->reset_ras_error_status)
- adev->gfx.ras_funcs->reset_ras_error_status(adev);
+ if (block_obj->hw_ops->reset_ras_error_status)
+ block_obj->hw_ops->reset_ras_error_status(adev);
break;
case AMDGPU_RAS_BLOCK__MMHUB:
if (adev->mmhub.ras_funcs &&
@@ -1150,7 +1163,8 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
.address = info->address,
.value = info->value,
};
- int ret = 0;
+ int ret = -EINVAL;
+ struct amdgpu_ras_block_object* block_obj = amdgpu_ras_get_ras_block(adev, info->head.block, info->head.sub_block_index);
if (!obj)
return -EINVAL;
@@ -1164,11 +1178,13 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
switch (info->head.block) {
case AMDGPU_RAS_BLOCK__GFX:
- if (adev->gfx.ras_funcs &&
- adev->gfx.ras_funcs->ras_error_inject)
- ret = adev->gfx.ras_funcs->ras_error_inject(adev, info);
- else
- ret = -EINVAL;
+ if (!block_obj || !block_obj->hw_ops) {
+ dev_info(adev->dev, "%s doesn't config ras function \n", get_ras_block_str(&info->head));
+ return -EINVAL;
+ }
+
+ if (block_obj->hw_ops->ras_error_inject)
+ ret = block_obj->hw_ops->ras_error_inject(adev, info);
break;
case AMDGPU_RAS_BLOCK__UMC:
case AMDGPU_RAS_BLOCK__SDMA:
@@ -1800,15 +1816,20 @@ static void amdgpu_ras_log_on_err_counter(struct amdgpu_device *adev)
static void amdgpu_ras_error_status_query(struct amdgpu_device *adev,
struct ras_query_if *info)
{
+ struct amdgpu_ras_block_object* block_obj = amdgpu_ras_get_ras_block(adev, info->head.block, info->head.sub_block_index);
/*
* Only two block need to query read/write
* RspStatus at current state
*/
switch (info->head.block) {
case AMDGPU_RAS_BLOCK__GFX:
- if (adev->gfx.ras_funcs &&
- adev->gfx.ras_funcs->query_ras_error_status)
- adev->gfx.ras_funcs->query_ras_error_status(adev);
+ if (!block_obj || !block_obj->hw_ops) {
+ dev_info(adev->dev, "%s doesn't config ras function \n", get_ras_block_str(&info->head));
+ return ;
+ }
+
+ if (block_obj->hw_ops->query_ras_error_status)
+ block_obj->hw_ops->query_ras_error_status(adev);
break;
case AMDGPU_RAS_BLOCK__MMHUB:
if (adev->mmhub.ras_funcs &&