summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h71
1 files changed, 68 insertions, 3 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h
index 54d9bfe0881d..2e5d63957e71 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h
@@ -26,8 +26,17 @@
#include <linux/i2c.h>
+#define RAS_TABLE_VER_V1 0x00010000
+#define RAS_TABLE_VER_V2_1 0x00021000
+#define RAS_TABLE_VER_V3 0x00030000
+
struct amdgpu_device;
+enum amdgpu_ras_gpu_health_status {
+ GPU_HEALTH_USABLE = 0,
+ GPU_RETIRED__ECC_REACH_THRESHOLD = 2,
+};
+
enum amdgpu_ras_eeprom_err_type {
AMDGPU_RAS_EEPROM_ERR_NA,
AMDGPU_RAS_EEPROM_ERR_RECOVERABLE,
@@ -43,9 +52,18 @@ struct amdgpu_ras_eeprom_table_header {
uint32_t checksum;
} __packed;
+struct amdgpu_ras_eeprom_table_ras_info {
+ u8 rma_status;
+ u8 health_percent;
+ u16 ecc_page_threshold;
+ u32 padding[64 - 1];
+} __packed;
+
struct amdgpu_ras_eeprom_control {
struct amdgpu_ras_eeprom_table_header tbl_hdr;
+ struct amdgpu_ras_eeprom_table_ras_info tbl_rai;
+
/* Base I2C EEPPROM 19-bit memory address,
* where the table is located. For more information,
* see top of amdgpu_eeprom.c.
@@ -58,11 +76,24 @@ struct amdgpu_ras_eeprom_control {
* right after the header.
*/
u32 ras_header_offset;
+ u32 ras_info_offset;
u32 ras_record_offset;
/* Number of records in the table.
*/
u32 ras_num_recs;
+ u32 ras_num_recs_old;
+
+ /* the bad page number is ras_num_recs or
+ * ras_num_recs * umc.retire_unit
+ */
+ u32 ras_num_bad_pages;
+
+ /* Number of records store mca address */
+ u32 ras_num_mca_recs;
+
+ /* Number of records store physical address */
+ u32 ras_num_pa_recs;
/* First record index to read, 0-based.
* Range is [0, num_recs-1]. This is
@@ -84,6 +115,8 @@ struct amdgpu_ras_eeprom_control {
/* Record channel info which occurred bad pages
*/
u32 bad_channel_bitmap;
+
+ bool is_eeprom_valid;
};
/*
@@ -111,8 +144,7 @@ struct eeprom_table_record {
unsigned char mcumc_id;
} __packed;
-int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control,
- bool *exceed_err_limit);
+int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control);
int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control);
@@ -124,10 +156,43 @@ int amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control,
int amdgpu_ras_eeprom_append(struct amdgpu_ras_eeprom_control *control,
struct eeprom_table_record *records, const u32 num);
-uint32_t amdgpu_ras_eeprom_max_record_count(void);
+uint32_t amdgpu_ras_eeprom_max_record_count(struct amdgpu_ras_eeprom_control *control);
void amdgpu_ras_debugfs_set_ret_size(struct amdgpu_ras_eeprom_control *control);
+int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control);
+
+void amdgpu_ras_eeprom_check_and_recover(struct amdgpu_device *adev);
+
+bool amdgpu_ras_smu_eeprom_supported(struct amdgpu_device *adev);
+
+int amdgpu_ras_smu_get_table_version(struct amdgpu_device *adev,
+ uint32_t *table_version);
+
+int amdgpu_ras_smu_get_badpage_count(struct amdgpu_device *adev,
+ uint32_t *count, uint32_t timeout);
+
+int amdgpu_ras_smu_get_badpage_mca_addr(struct amdgpu_device *adev,
+ uint16_t index, uint64_t *mca_addr);
+
+int amdgpu_ras_smu_set_timestamp(struct amdgpu_device *adev,
+ uint64_t timestamp);
+
+int amdgpu_ras_smu_get_timestamp(struct amdgpu_device *adev,
+ uint16_t index, uint64_t *timestamp);
+
+int amdgpu_ras_smu_get_badpage_ipid(struct amdgpu_device *adev,
+ uint16_t index, uint64_t *ipid);
+
+int amdgpu_ras_smu_erase_ras_table(struct amdgpu_device *adev,
+ uint32_t *result);
+
+int amdgpu_ras_eeprom_read_idx(struct amdgpu_ras_eeprom_control *control,
+ struct eeprom_table_record *record, u32 rec_idx,
+ const u32 num);
+
+int amdgpu_ras_eeprom_update_record_num(struct amdgpu_ras_eeprom_control *control);
+
extern const struct file_operations amdgpu_ras_debugfs_eeprom_size_ops;
extern const struct file_operations amdgpu_ras_debugfs_eeprom_table_ops;