summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYazen Ghannam <yazen.ghannam@amd.com>2025-04-01 20:49:00 +0000
committerBorislav Petkov (AMD) <bp@alien8.de>2025-04-07 15:06:06 +0200
commit6c44e5354d4d16d9d891a419ca3f57abfe18ce7a (patch)
tree62cea8b2db584d99edf880a054a1e8ca83daf67d
parent0af2f6be1b4281385b618cb86ad946eded089ac8 (diff)
RAS/AMD/ATL: Include row[13] bit in row retirement
Based on feedback from hardware folks, row[13] is part of the variable bits within a physical row (along with all column bits). Only half the physical addresses affected by a row are calculated if this bit is not included. Add the row[13] bit to the row retirement flow. Fixes: 3b566b30b414 ("RAS/AMD/ATL: Add MI300 row retirement support") Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com> Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de> Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250401-fix-fmpm-extra-records-v1-1-840bcf7a8ac5@amd.com
-rw-r--r--drivers/ras/amd/atl/umc.c19
1 files changed, 18 insertions, 1 deletions
diff --git a/drivers/ras/amd/atl/umc.c b/drivers/ras/amd/atl/umc.c
index dc8aa12f63c8..cb8ace3d4e42 100644
--- a/drivers/ras/amd/atl/umc.c
+++ b/drivers/ras/amd/atl/umc.c
@@ -320,7 +320,7 @@ static unsigned long convert_dram_to_norm_addr_mi300(unsigned long addr)
* See amd_atl::convert_dram_to_norm_addr_mi300() for MI300 address formats.
*/
#define MI300_NUM_COL BIT(HWEIGHT(MI300_UMC_MCA_COL))
-static void retire_row_mi300(struct atl_err *a_err)
+static void _retire_row_mi300(struct atl_err *a_err)
{
unsigned long addr;
struct page *p;
@@ -351,6 +351,23 @@ static void retire_row_mi300(struct atl_err *a_err)
}
}
+/*
+ * In addition to the column bits, the row[13] bit should also be included when
+ * calculating addresses affected by a physical row.
+ *
+ * Instead of running through another loop over a single bit, just run through
+ * the column bits twice and flip the row[13] bit in-between.
+ *
+ * See MI300_UMC_MCA_ROW for the row bits in MCA_ADDR_UMC value.
+ */
+#define MI300_UMC_MCA_ROW13 BIT(23)
+static void retire_row_mi300(struct atl_err *a_err)
+{
+ _retire_row_mi300(a_err);
+ a_err->addr ^= MI300_UMC_MCA_ROW13;
+ _retire_row_mi300(a_err);
+}
+
void amd_retire_dram_row(struct atl_err *a_err)
{
if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous)