summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c53
1 files changed, 32 insertions, 21 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
index cbc40cad581b..9b3180449150 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
@@ -76,6 +76,7 @@ static void aca_banks_release(struct aca_banks *banks)
list_for_each_entry_safe(node, tmp, &banks->list, node) {
list_del(&node->node);
kvfree(node);
+ banks->nr_banks--;
}
}
@@ -130,6 +131,27 @@ static void aca_smu_bank_dump(struct amdgpu_device *adev, int idx, int total, st
RAS_EVENT_LOG(adev, event_id, HW_ERR "hardware error logged by the scrubber\n");
}
+static bool aca_bank_hwip_is_matched(struct aca_bank *bank, enum aca_hwip_type type)
+{
+
+ struct aca_hwip *hwip;
+ int hwid, mcatype;
+ u64 ipid;
+
+ if (!bank || type == ACA_HWIP_TYPE_UNKNOW)
+ return false;
+
+ hwip = &aca_hwid_mcatypes[type];
+ if (!hwip->hwid)
+ return false;
+
+ ipid = bank->regs[ACA_REG_IDX_IPID];
+ hwid = ACA_REG__IPID__HARDWAREID(ipid);
+ mcatype = ACA_REG__IPID__MCATYPE(ipid);
+
+ return hwip->hwid == hwid && hwip->mcatype == mcatype;
+}
+
static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_smu_type type,
int start, int count,
struct aca_banks *banks, struct ras_query_context *qctx)
@@ -168,6 +190,15 @@ static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_smu_
bank.smu_err_type = type;
+ /*
+ * Poison being consumed when injecting a UE while running background workloads,
+ * which are unexpected.
+ */
+ if (type == ACA_SMU_TYPE_UE &&
+ ACA_REG__STATUS__POISON(bank.regs[ACA_REG_IDX_STATUS]) &&
+ !aca_bank_hwip_is_matched(&bank, ACA_HWIP_TYPE_UMC))
+ continue;
+
aca_smu_bank_dump(adev, i, count, &bank, qctx);
ret = aca_banks_add_bank(banks, &bank);
@@ -178,27 +209,6 @@ static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_smu_
return 0;
}
-static bool aca_bank_hwip_is_matched(struct aca_bank *bank, enum aca_hwip_type type)
-{
-
- struct aca_hwip *hwip;
- int hwid, mcatype;
- u64 ipid;
-
- if (!bank || type == ACA_HWIP_TYPE_UNKNOW)
- return false;
-
- hwip = &aca_hwid_mcatypes[type];
- if (!hwip->hwid)
- return false;
-
- ipid = bank->regs[ACA_REG_IDX_IPID];
- hwid = ACA_REG__IPID__HARDWAREID(ipid);
- mcatype = ACA_REG__IPID__MCATYPE(ipid);
-
- return hwip->hwid == hwid && hwip->mcatype == mcatype;
-}
-
static bool aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type)
{
const struct aca_bank_ops *bank_ops = handle->bank_ops;
@@ -229,6 +239,7 @@ static struct aca_bank_error *new_bank_error(struct aca_error *aerr, struct aca_
mutex_lock(&aerr->lock);
list_add_tail(&bank_error->node, &aerr->list);
+ aerr->nr_errors++;
mutex_unlock(&aerr->lock);
return bank_error;