summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm
diff options
context:
space:
mode:
authorTao Zhou <tao.zhou1@amd.com>2023-05-17 14:20:46 +0800
committerAlex Deucher <alexander.deucher@amd.com>2023-09-11 17:10:35 -0400
commit40a08fe890640f9c61d4443c218b61f745b11b2d (patch)
tree00da7bdb0bd94c18bcaa1390ea95df4e45198255 /drivers/gpu/drm
parentca7aa3bf31dc7539af950e7c99c330aa3e9b2a0b (diff)
drm/amdgpu: add address conversion for UMC v12
Convert MCA error address to physical address and find out all pages in one physical row. Signed-off-by: Tao Zhou <tao.zhou1@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v12_0.c91
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v12_0.h64
3 files changed, 159 insertions, 1 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
index 43321f57f557..417a6726c71b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
@@ -32,6 +32,11 @@
* is the index of 8KB block
*/
#define ADDR_OF_8KB_BLOCK(addr) (((addr) & ~0xffULL) << 5)
+/*
+ * (addr / 256) * 32768, the higher 26 bits in ErrorAddr
+ * is the index of 8KB block
+ */
+#define ADDR_OF_32KB_BLOCK(addr) (((addr) & ~0xffULL) << 7)
/* channel index is the index of 256B block */
#define ADDR_OF_256B_BLOCK(channel_index) ((channel_index) << 8)
/* offset in 256B block */
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
index ac52889601e7..aeb01cf22256 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
@@ -27,6 +27,14 @@
#include "umc/umc_12_0_0_offset.h"
#include "umc/umc_12_0_0_sh_mask.h"
+/* mapping of MCA error address to normalized address */
+static const uint32_t umc_v12_0_ma2na_mapping[] = {
+ 0, 5, 6, 8, 9, 14, 12, 13,
+ 10, 11, 15, 16, 17, 18, 19, 20,
+ 21, 22, 23, 24, 25, 26, 27, 28,
+ 24, 7, 29, 30,
+};
+
static inline uint64_t get_umc_v12_0_reg_offset(struct amdgpu_device *adev,
uint32_t node_inst,
uint32_t umc_inst,
@@ -137,12 +145,93 @@ static void umc_v12_0_query_ras_error_count(struct amdgpu_device *adev,
umc_v12_0_reset_error_count(adev);
}
+static bool umc_v12_0_bit_wise_xor(uint32_t val)
+{
+ bool result = 0;
+ int i;
+
+ for (i = 0; i < 32; i++)
+ result = result ^ ((val >> i) & 0x1);
+
+ return result;
+}
+
static void umc_v12_0_convert_error_address(struct amdgpu_device *adev,
struct ras_err_data *err_data, uint64_t err_addr,
uint32_t ch_inst, uint32_t umc_inst,
uint32_t node_inst)
{
-
+ uint32_t channel_index, i;
+ uint64_t soc_pa, na, retired_page, column;
+ uint32_t bank_hash0, bank_hash1, bank_hash2, bank_hash3, col, row;
+ uint32_t bank0, bank1, bank2, bank3, bank;
+
+ bank_hash0 = (err_addr >> UMC_V12_0_MCA_B0_BIT) & 0x1ULL;
+ bank_hash1 = (err_addr >> UMC_V12_0_MCA_B1_BIT) & 0x1ULL;
+ bank_hash2 = (err_addr >> UMC_V12_0_MCA_B2_BIT) & 0x1ULL;
+ bank_hash3 = (err_addr >> UMC_V12_0_MCA_B3_BIT) & 0x1ULL;
+ col = (err_addr >> 1) & 0x1fULL;
+ row = (err_addr >> 10) & 0x3fffULL;
+
+ /* apply bank hash algorithm */
+ bank0 =
+ bank_hash0 ^ (UMC_V12_0_XOR_EN0 &
+ (umc_v12_0_bit_wise_xor(col & UMC_V12_0_COL_XOR0) ^
+ (umc_v12_0_bit_wise_xor(row & UMC_V12_0_ROW_XOR0))));
+ bank1 =
+ bank_hash1 ^ (UMC_V12_0_XOR_EN1 &
+ (umc_v12_0_bit_wise_xor(col & UMC_V12_0_COL_XOR1) ^
+ (umc_v12_0_bit_wise_xor(row & UMC_V12_0_ROW_XOR1))));
+ bank2 =
+ bank_hash2 ^ (UMC_V12_0_XOR_EN2 &
+ (umc_v12_0_bit_wise_xor(col & UMC_V12_0_COL_XOR2) ^
+ (umc_v12_0_bit_wise_xor(row & UMC_V12_0_ROW_XOR2))));
+ bank3 =
+ bank_hash3 ^ (UMC_V12_0_XOR_EN3 &
+ (umc_v12_0_bit_wise_xor(col & UMC_V12_0_COL_XOR3) ^
+ (umc_v12_0_bit_wise_xor(row & UMC_V12_0_ROW_XOR3))));
+
+ bank = bank0 | (bank1 << 1) | (bank2 << 2) | (bank3 << 3);
+ err_addr &= ~0x3c0ULL;
+ err_addr |= (bank << UMC_V12_0_MCA_B0_BIT);
+
+ na = 0x0;
+ /* convert mca error address to normalized address */
+ for (i = 1; i < ARRAY_SIZE(umc_v12_0_ma2na_mapping); i++)
+ na |= ((err_addr >> i) & 0x1ULL) << umc_v12_0_ma2na_mapping[i];
+
+ channel_index =
+ adev->umc.channel_idx_tbl[node_inst * adev->umc.umc_inst_num *
+ adev->umc.channel_inst_num +
+ umc_inst * adev->umc.channel_inst_num +
+ ch_inst];
+ /* translate umc channel address to soc pa, 3 parts are included */
+ soc_pa = ADDR_OF_32KB_BLOCK(na) |
+ ADDR_OF_256B_BLOCK(channel_index) |
+ OFFSET_IN_256B_BLOCK(na);
+
+ /* the umc channel bits are not original values, they are hashed */
+ UMC_V12_0_SET_CHANNEL_HASH(channel_index, soc_pa);
+
+ /* clear [C3 C2] in soc physical address */
+ soc_pa &= ~(0x3ULL << UMC_V12_0_PA_C2_BIT);
+ /* clear [C4] in soc physical address */
+ soc_pa &= ~(0x1ULL << UMC_V12_0_PA_C4_BIT);
+
+ /* loop for all possibilities of [C4 C3 C2] */
+ for (column = 0; column < UMC_V12_0_NA_MAP_PA_NUM; column++) {
+ retired_page = soc_pa | ((column & 0x3) << UMC_V12_0_PA_C2_BIT);
+ retired_page |= (((column & 0x4) >> 2) << UMC_V12_0_PA_C4_BIT);
+ dev_info(adev->dev, "Error Address(PA): 0x%llx\n", retired_page);
+ amdgpu_umc_fill_error_record(err_data, err_addr,
+ retired_page, channel_index, umc_inst);
+
+ /* shift R13 bit */
+ retired_page ^= (0x1ULL << UMC_V12_0_PA_R13_BIT);
+ dev_info(adev->dev, "Error Address(PA): 0x%llx\n", retired_page);
+ amdgpu_umc_fill_error_record(err_data, err_addr,
+ retired_page, channel_index, umc_inst);
+ }
}
static int umc_v12_0_query_error_address(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h
index 2e63cc30766b..be65f274b7a5 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h
@@ -51,6 +51,70 @@
#define UMC_V12_0_TOTAL_CHANNEL_NUM(adev) \
(UMC_V12_0_CHANNEL_INSTANCE_NUM * (adev)->gmc.num_umc)
+/* one piece of normalized address is mapped to 8 pieces of physical address */
+#define UMC_V12_0_NA_MAP_PA_NUM 8
+/* bank bits in MCA error address */
+#define UMC_V12_0_MCA_B0_BIT 6
+#define UMC_V12_0_MCA_B1_BIT 7
+#define UMC_V12_0_MCA_B2_BIT 8
+#define UMC_V12_0_MCA_B3_BIT 9
+/* column bits in SOC physical address */
+#define UMC_V12_0_PA_C2_BIT 15
+#define UMC_V12_0_PA_C4_BIT 21
+/* row bits in SOC physical address */
+#define UMC_V12_0_PA_R13_BIT 35
+/* channel index bits in SOC physical address */
+#define UMC_V12_0_PA_CH4_BIT 12
+#define UMC_V12_0_PA_CH5_BIT 13
+#define UMC_V12_0_PA_CH6_BIT 14
+
+/* bank hash settings */
+#define UMC_V12_0_XOR_EN0 1
+#define UMC_V12_0_XOR_EN1 1
+#define UMC_V12_0_XOR_EN2 1
+#define UMC_V12_0_XOR_EN3 1
+#define UMC_V12_0_COL_XOR0 0x0
+#define UMC_V12_0_COL_XOR1 0x0
+#define UMC_V12_0_COL_XOR2 0x800
+#define UMC_V12_0_COL_XOR3 0x1000
+#define UMC_V12_0_ROW_XOR0 0x11111
+#define UMC_V12_0_ROW_XOR1 0x22222
+#define UMC_V12_0_ROW_XOR2 0x4444
+#define UMC_V12_0_ROW_XOR3 0x8888
+
+/* channel hash settings */
+#define UMC_V12_0_HASH_4K 0
+#define UMC_V12_0_HASH_64K 1
+#define UMC_V12_0_HASH_2M 1
+#define UMC_V12_0_HASH_1G 1
+#define UMC_V12_0_HASH_1T 1
+
+/* XOR some bits of PA into CH4~CH6 bits (bits 12~14 of PA),
+ * hash bit is only effective when related setting is enabled
+ */
+#define UMC_V12_0_CHANNEL_HASH_CH4(channel_idx, pa) ((((channel_idx) >> 5) & 0x1) ^ \
+ (((pa) >> 20) & 0x1ULL & UMC_V12_0_HASH_64K) ^ \
+ (((pa) >> 27) & 0x1ULL & UMC_V12_0_HASH_2M) ^ \
+ (((pa) >> 34) & 0x1ULL & UMC_V12_0_HASH_1G) ^ \
+ (((pa) >> 41) & 0x1ULL & UMC_V12_0_HASH_1T))
+#define UMC_V12_0_CHANNEL_HASH_CH5(channel_idx, pa) ((((channel_idx) >> 6) & 0x1) ^ \
+ (((pa) >> 21) & 0x1ULL & UMC_V12_0_HASH_64K) ^ \
+ (((pa) >> 28) & 0x1ULL & UMC_V12_0_HASH_2M) ^ \
+ (((pa) >> 35) & 0x1ULL & UMC_V12_0_HASH_1G) ^ \
+ (((pa) >> 42) & 0x1ULL & UMC_V12_0_HASH_1T))
+#define UMC_V12_0_CHANNEL_HASH_CH6(channel_idx, pa) ((((channel_idx) >> 4) & 0x1) ^ \
+ (((pa) >> 19) & 0x1ULL & UMC_V12_0_HASH_64K) ^ \
+ (((pa) >> 26) & 0x1ULL & UMC_V12_0_HASH_2M) ^ \
+ (((pa) >> 33) & 0x1ULL & UMC_V12_0_HASH_1G) ^ \
+ (((pa) >> 40) & 0x1ULL & UMC_V12_0_HASH_1T) ^ \
+ (((pa) >> 47) & 0x1ULL & UMC_V12_0_HASH_4K))
+#define UMC_V12_0_SET_CHANNEL_HASH(channel_idx, pa) do { \
+ (pa) &= ~(0x7ULL << UMC_V12_0_PA_CH4_BIT); \
+ (pa) |= (UMC_V12_0_CHANNEL_HASH_CH4(channel_idx, pa) << UMC_V12_0_PA_CH4_BIT); \
+ (pa) |= (UMC_V12_0_CHANNEL_HASH_CH5(channel_idx, pa) << UMC_V12_0_PA_CH5_BIT); \
+ (pa) |= (UMC_V12_0_CHANNEL_HASH_CH6(channel_idx, pa) << UMC_V12_0_PA_CH6_BIT); \
+ } while (0)
+
extern struct amdgpu_umc_ras umc_v12_0_ras;
#endif