summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h56
1 files changed, 43 insertions, 13 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h
index 2da50e095883..38c88897e1ec 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h
@@ -26,6 +26,9 @@
#include <linux/list.h>
+struct ras_err_data;
+struct ras_query_context;
+
#define ACA_MAX_REGS_COUNT (16)
#define ACA_REG_FIELD(x, h, l) (((x) & GENMASK_ULL(h, l)) >> l)
@@ -68,12 +71,21 @@
#define ACA_ERROR_CE_MASK BIT_MASK(ACA_ERROR_TYPE_CE)
#define ACA_ERROR_DEFERRED_MASK BIT_MASK(ACA_ERROR_TYPE_DEFERRED)
+#define mmSMNAID_AID0_MCA_SMU 0x03b30400 /* SMN AID AID0 */
+#define mmSMNAID_XCD0_MCA_SMU 0x36430400 /* SMN AID XCD0 */
+#define mmSMNAID_XCD1_MCA_SMU 0x38430400 /* SMN AID XCD1 */
+#define mmSMNXCD_XCD0_MCA_SMU 0x40430400 /* SMN XCD XCD0 */
+
+#define ACA_BANK_ERR_IS_DEFFERED(bank) \
+ (ACA_REG__STATUS__POISON((bank)->regs[ACA_REG_IDX_STATUS]) || \
+ ACA_REG__STATUS__DEFERRED((bank)->regs[ACA_REG_IDX_STATUS]))
+
enum aca_reg_idx {
ACA_REG_IDX_CTL = 0,
ACA_REG_IDX_STATUS = 1,
ACA_REG_IDX_ADDR = 2,
ACA_REG_IDX_MISC0 = 3,
- ACA_REG_IDX_CONFG = 4,
+ ACA_REG_IDX_CONFIG = 4,
ACA_REG_IDX_IPID = 5,
ACA_REG_IDX_SYND = 6,
ACA_REG_IDX_DESTAT = 8,
@@ -99,7 +111,21 @@ enum aca_error_type {
ACA_ERROR_TYPE_COUNT
};
+enum aca_smu_type {
+ ACA_SMU_TYPE_INVALID = -1,
+ ACA_SMU_TYPE_UE = 0,
+ ACA_SMU_TYPE_CE,
+ ACA_SMU_TYPE_COUNT,
+};
+
+struct aca_hwip {
+ int hwid;
+ int mcatype;
+};
+
struct aca_bank {
+ enum aca_error_type aca_err_type;
+ enum aca_smu_type smu_err_type;
u64 regs[ACA_MAX_REGS_COUNT];
};
@@ -108,6 +134,11 @@ struct aca_bank_node {
struct list_head node;
};
+struct aca_banks {
+ int nr_banks;
+ struct list_head list;
+};
+
struct aca_bank_info {
int die_id;
int socket_id;
@@ -115,15 +146,10 @@ struct aca_bank_info {
int mcatype;
};
-struct aca_bank_report {
- struct aca_bank_info info;
- u64 count[ACA_ERROR_TYPE_COUNT];
-};
-
struct aca_bank_error {
struct list_head node;
struct aca_bank_info info;
- u64 count[ACA_ERROR_TYPE_COUNT];
+ u64 count;
};
struct aca_error {
@@ -157,9 +183,8 @@ struct aca_handle {
};
struct aca_bank_ops {
- int (*aca_bank_generate_report)(struct aca_handle *handle, struct aca_bank *bank, enum aca_error_type type,
- struct aca_bank_report *report, void *data);
- bool (*aca_bank_is_valid)(struct aca_handle *handle, struct aca_bank *bank, enum aca_error_type type,
+ int (*aca_bank_parser)(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type, void *data);
+ bool (*aca_bank_is_valid)(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type,
void *data);
};
@@ -167,13 +192,15 @@ struct aca_smu_funcs {
int max_ue_bank_count;
int max_ce_bank_count;
int (*set_debug_mode)(struct amdgpu_device *adev, bool enable);
- int (*get_valid_aca_count)(struct amdgpu_device *adev, enum aca_error_type type, u32 *count);
- int (*get_valid_aca_bank)(struct amdgpu_device *adev, enum aca_error_type type, int idx, struct aca_bank *bank);
+ int (*get_valid_aca_count)(struct amdgpu_device *adev, enum aca_smu_type type, u32 *count);
+ int (*get_valid_aca_bank)(struct amdgpu_device *adev, enum aca_smu_type type, int idx, struct aca_bank *bank);
+ int (*parse_error_code)(struct amdgpu_device *adev, struct aca_bank *bank);
};
struct amdgpu_aca {
struct aca_handle_manager mgr;
const struct aca_smu_funcs *smu_funcs;
+ atomic_t ue_update_flag;
bool is_enabled;
};
@@ -196,7 +223,10 @@ int amdgpu_aca_add_handle(struct amdgpu_device *adev, struct aca_handle *handle,
const char *name, const struct aca_info *aca_info, void *data);
void amdgpu_aca_remove_handle(struct aca_handle *handle);
int amdgpu_aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *handle,
- enum aca_error_type type, void *data);
+ enum aca_error_type type, struct ras_err_data *err_data,
+ struct ras_query_context *qctx);
int amdgpu_aca_smu_set_debug_mode(struct amdgpu_device *adev, bool en);
void amdgpu_aca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root);
+int aca_error_cache_log_bank_error(struct aca_handle *handle, struct aca_bank_info *info,
+ enum aca_error_type type, u64 count);
#endif