summaryrefslogtreecommitdiff
path: root/drivers/accel/habanalabs/gaudi2/gaudi2.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/accel/habanalabs/gaudi2/gaudi2.c')
-rw-r--r--drivers/accel/habanalabs/gaudi2/gaudi2.c3179
1 files changed, 2226 insertions, 953 deletions
diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c
index f1f2a58ee68c..b8c0689dba64 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c
@@ -23,7 +23,8 @@
#define GAUDI2_DMA_POOL_BLK_SIZE SZ_256 /* 256 bytes */
#define GAUDI2_RESET_TIMEOUT_MSEC 2000 /* 2000ms */
-#define GAUDI2_RESET_POLL_TIMEOUT_USEC 50000 /* 50ms */
+
+#define GAUDI2_RESET_POLL_TIMEOUT_USEC 500000 /* 500ms */
#define GAUDI2_PLDM_HRESET_TIMEOUT_MSEC 25000 /* 25s */
#define GAUDI2_PLDM_SRESET_TIMEOUT_MSEC 25000 /* 25s */
#define GAUDI2_PLDM_RESET_POLL_TIMEOUT_USEC 3000000 /* 3s */
@@ -56,16 +57,15 @@
#define GAUDI2_NA_EVENT_CAUSE 0xFF
#define GAUDI2_NUM_OF_QM_ERR_CAUSE 18
-#define GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE 25
+#define GAUDI2_NUM_OF_LOWER_QM_ERR_CAUSE 25
#define GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE 3
#define GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE 14
#define GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE 3
#define GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE 2
#define GAUDI2_NUM_OF_ROT_ERR_CAUSE 22
-#define GAUDI2_NUM_OF_TPC_INTR_CAUSE 30
+#define GAUDI2_NUM_OF_TPC_INTR_CAUSE 31
#define GAUDI2_NUM_OF_DEC_ERR_CAUSE 25
#define GAUDI2_NUM_OF_MME_ERR_CAUSE 16
-#define GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE 5
#define GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE 7
#define GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE 8
#define GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE 19
@@ -86,10 +86,11 @@
#define KDMA_TIMEOUT_USEC USEC_PER_SEC
-#define IS_DMA_IDLE(dma_core_idle_ind_mask) \
- (!((dma_core_idle_ind_mask) & \
- ((DCORE0_EDMA0_CORE_IDLE_IND_MASK_DESC_CNT_STS_MASK) | \
- (DCORE0_EDMA0_CORE_IDLE_IND_MASK_COMP_MASK))))
+#define IS_DMA_IDLE(dma_core_sts0) \
+ (!((dma_core_sts0) & (DCORE0_EDMA0_CORE_STS0_BUSY_MASK)))
+
+#define IS_DMA_HALTED(dma_core_sts1) \
+ ((dma_core_sts1) & (DCORE0_EDMA0_CORE_STS1_IS_HALT_MASK))
#define IS_MME_IDLE(mme_arch_sts) (((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK)
@@ -132,6 +133,287 @@
#define ENGINE_ID_DCORE_OFFSET (GAUDI2_DCORE1_ENGINE_ID_EDMA_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0)
+/* RAZWI initiator coordinates */
+#define RAZWI_GET_AXUSER_XY(x) \
+ ((x & 0xF8001FF0) >> 4)
+
+#define RAZWI_GET_AXUSER_LOW_XY(x) \
+ ((x & 0x00001FF0) >> 4)
+
+#define RAZWI_INITIATOR_AXUER_L_X_SHIFT 0
+#define RAZWI_INITIATOR_AXUER_L_X_MASK 0x1F
+#define RAZWI_INITIATOR_AXUER_L_Y_SHIFT 5
+#define RAZWI_INITIATOR_AXUER_L_Y_MASK 0xF
+
+#define RAZWI_INITIATOR_AXUER_H_X_SHIFT 23
+#define RAZWI_INITIATOR_AXUER_H_X_MASK 0x1F
+
+#define RAZWI_INITIATOR_ID_X_Y_LOW(x, y) \
+ ((((y) & RAZWI_INITIATOR_AXUER_L_Y_MASK) << RAZWI_INITIATOR_AXUER_L_Y_SHIFT) | \
+ (((x) & RAZWI_INITIATOR_AXUER_L_X_MASK) << RAZWI_INITIATOR_AXUER_L_X_SHIFT))
+
+#define RAZWI_INITIATOR_ID_X_HIGH(x) \
+ (((x) & RAZWI_INITIATOR_AXUER_H_X_MASK) << RAZWI_INITIATOR_AXUER_H_X_SHIFT)
+
+#define RAZWI_INITIATOR_ID_X_Y(xl, yl, xh) \
+ (RAZWI_INITIATOR_ID_X_Y_LOW(xl, yl) | RAZWI_INITIATOR_ID_X_HIGH(xh))
+
+#define PSOC_RAZWI_ENG_STR_SIZE 128
+#define PSOC_RAZWI_MAX_ENG_PER_RTR 5
+
+/* HW scrambles only bits 0-25 */
+#define HW_UNSCRAMBLED_BITS_MASK GENMASK_ULL(63, 26)
+
+#define GAUDI2_GLBL_ERR_MAX_CAUSE_NUM 17
+
+struct gaudi2_razwi_info {
+ u32 axuser_xy;
+ u32 rtr_ctrl;
+ u16 eng_id;
+ char *eng_name;
+};
+
+static struct gaudi2_razwi_info common_razwi_info[] = {
+ {RAZWI_INITIATOR_ID_X_Y(2, 4, 0), mmDCORE0_RTR0_CTRL_BASE,
+ GAUDI2_DCORE0_ENGINE_ID_DEC_0, "DEC0"},
+ {RAZWI_INITIATOR_ID_X_Y(2, 4, 4), mmDCORE0_RTR0_CTRL_BASE,
+ GAUDI2_DCORE0_ENGINE_ID_DEC_1, "DEC1"},
+ {RAZWI_INITIATOR_ID_X_Y(17, 4, 18), mmDCORE1_RTR7_CTRL_BASE,
+ GAUDI2_DCORE1_ENGINE_ID_DEC_0, "DEC2"},
+ {RAZWI_INITIATOR_ID_X_Y(17, 4, 14), mmDCORE1_RTR7_CTRL_BASE,
+ GAUDI2_DCORE1_ENGINE_ID_DEC_1, "DEC3"},
+ {RAZWI_INITIATOR_ID_X_Y(2, 11, 0), mmDCORE2_RTR0_CTRL_BASE,
+ GAUDI2_DCORE2_ENGINE_ID_DEC_0, "DEC4"},
+ {RAZWI_INITIATOR_ID_X_Y(2, 11, 4), mmDCORE2_RTR0_CTRL_BASE,
+ GAUDI2_DCORE2_ENGINE_ID_DEC_1, "DEC5"},
+ {RAZWI_INITIATOR_ID_X_Y(17, 11, 18), mmDCORE3_RTR7_CTRL_BASE,
+ GAUDI2_DCORE3_ENGINE_ID_DEC_0, "DEC6"},
+ {RAZWI_INITIATOR_ID_X_Y(17, 11, 14), mmDCORE3_RTR7_CTRL_BASE,
+ GAUDI2_DCORE3_ENGINE_ID_DEC_1, "DEC7"},
+ {RAZWI_INITIATOR_ID_X_Y(2, 4, 6), mmDCORE0_RTR0_CTRL_BASE,
+ GAUDI2_PCIE_ENGINE_ID_DEC_0, "DEC8"},
+ {RAZWI_INITIATOR_ID_X_Y(2, 4, 7), mmDCORE0_RTR0_CTRL_BASE,
+ GAUDI2_PCIE_ENGINE_ID_DEC_0, "DEC9"},
+ {RAZWI_INITIATOR_ID_X_Y(3, 4, 2), mmDCORE0_RTR1_CTRL_BASE,
+ GAUDI2_DCORE0_ENGINE_ID_TPC_0, "TPC0"},
+ {RAZWI_INITIATOR_ID_X_Y(3, 4, 4), mmDCORE0_RTR1_CTRL_BASE,
+ GAUDI2_DCORE0_ENGINE_ID_TPC_1, "TPC1"},
+ {RAZWI_INITIATOR_ID_X_Y(4, 4, 2), mmDCORE0_RTR2_CTRL_BASE,
+ GAUDI2_DCORE0_ENGINE_ID_TPC_2, "TPC2"},
+ {RAZWI_INITIATOR_ID_X_Y(4, 4, 4), mmDCORE0_RTR2_CTRL_BASE,
+ GAUDI2_DCORE0_ENGINE_ID_TPC_3, "TPC3"},
+ {RAZWI_INITIATOR_ID_X_Y(5, 4, 2), mmDCORE0_RTR3_CTRL_BASE,
+ GAUDI2_DCORE0_ENGINE_ID_TPC_4, "TPC4"},
+ {RAZWI_INITIATOR_ID_X_Y(5, 4, 4), mmDCORE0_RTR3_CTRL_BASE,
+ GAUDI2_DCORE0_ENGINE_ID_TPC_5, "TPC5"},
+ {RAZWI_INITIATOR_ID_X_Y(16, 4, 14), mmDCORE1_RTR6_CTRL_BASE,
+ GAUDI2_DCORE1_ENGINE_ID_TPC_0, "TPC6"},
+ {RAZWI_INITIATOR_ID_X_Y(16, 4, 16), mmDCORE1_RTR6_CTRL_BASE,
+ GAUDI2_DCORE1_ENGINE_ID_TPC_1, "TPC7"},
+ {RAZWI_INITIATOR_ID_X_Y(15, 4, 14), mmDCORE1_RTR5_CTRL_BASE,
+ GAUDI2_DCORE1_ENGINE_ID_TPC_2, "TPC8"},
+ {RAZWI_INITIATOR_ID_X_Y(15, 4, 16), mmDCORE1_RTR5_CTRL_BASE,
+ GAUDI2_DCORE1_ENGINE_ID_TPC_3, "TPC9"},
+ {RAZWI_INITIATOR_ID_X_Y(14, 4, 14), mmDCORE1_RTR4_CTRL_BASE,
+ GAUDI2_DCORE1_ENGINE_ID_TPC_4, "TPC10"},
+ {RAZWI_INITIATOR_ID_X_Y(14, 4, 16), mmDCORE1_RTR4_CTRL_BASE,
+ GAUDI2_DCORE1_ENGINE_ID_TPC_5, "TPC11"},
+ {RAZWI_INITIATOR_ID_X_Y(5, 11, 2), mmDCORE2_RTR3_CTRL_BASE,
+ GAUDI2_DCORE2_ENGINE_ID_TPC_0, "TPC12"},
+ {RAZWI_INITIATOR_ID_X_Y(5, 11, 4), mmDCORE2_RTR3_CTRL_BASE,
+ GAUDI2_DCORE2_ENGINE_ID_TPC_1, "TPC13"},
+ {RAZWI_INITIATOR_ID_X_Y(4, 11, 2), mmDCORE2_RTR2_CTRL_BASE,
+ GAUDI2_DCORE2_ENGINE_ID_TPC_2, "TPC14"},
+ {RAZWI_INITIATOR_ID_X_Y(4, 11, 4), mmDCORE2_RTR2_CTRL_BASE,
+ GAUDI2_DCORE2_ENGINE_ID_TPC_3, "TPC15"},
+ {RAZWI_INITIATOR_ID_X_Y(3, 11, 2), mmDCORE2_RTR1_CTRL_BASE,
+ GAUDI2_DCORE2_ENGINE_ID_TPC_4, "TPC16"},
+ {RAZWI_INITIATOR_ID_X_Y(3, 11, 4), mmDCORE2_RTR1_CTRL_BASE,
+ GAUDI2_DCORE2_ENGINE_ID_TPC_5, "TPC17"},
+ {RAZWI_INITIATOR_ID_X_Y(14, 11, 14), mmDCORE3_RTR4_CTRL_BASE,
+ GAUDI2_DCORE3_ENGINE_ID_TPC_0, "TPC18"},
+ {RAZWI_INITIATOR_ID_X_Y(14, 11, 16), mmDCORE3_RTR4_CTRL_BASE,
+ GAUDI2_DCORE3_ENGINE_ID_TPC_1, "TPC19"},
+ {RAZWI_INITIATOR_ID_X_Y(15, 11, 14), mmDCORE3_RTR5_CTRL_BASE,
+ GAUDI2_DCORE3_ENGINE_ID_TPC_2, "TPC20"},
+ {RAZWI_INITIATOR_ID_X_Y(15, 11, 16), mmDCORE3_RTR5_CTRL_BASE,
+ GAUDI2_DCORE3_ENGINE_ID_TPC_3, "TPC21"},
+ {RAZWI_INITIATOR_ID_X_Y(16, 11, 14), mmDCORE3_RTR6_CTRL_BASE,
+ GAUDI2_DCORE3_ENGINE_ID_TPC_4, "TPC22"},
+ {RAZWI_INITIATOR_ID_X_Y(16, 11, 16), mmDCORE3_RTR6_CTRL_BASE,
+ GAUDI2_DCORE3_ENGINE_ID_TPC_5, "TPC23"},
+ {RAZWI_INITIATOR_ID_X_Y(2, 4, 2), mmDCORE0_RTR0_CTRL_BASE,
+ GAUDI2_DCORE3_ENGINE_ID_TPC_5, "TPC24"},
+ {RAZWI_INITIATOR_ID_X_Y(17, 4, 8), mmDCORE1_RTR7_CTRL_BASE,
+ GAUDI2_ENGINE_ID_NIC0_0, "NIC0"},
+ {RAZWI_INITIATOR_ID_X_Y(17, 4, 10), mmDCORE1_RTR7_CTRL_BASE,
+ GAUDI2_ENGINE_ID_NIC0_1, "NIC1"},
+ {RAZWI_INITIATOR_ID_X_Y(17, 4, 12), mmDCORE1_RTR7_CTRL_BASE,
+ GAUDI2_ENGINE_ID_NIC1_0, "NIC2"},
+ {RAZWI_INITIATOR_ID_X_Y(17, 4, 14), mmDCORE1_RTR7_CTRL_BASE,
+ GAUDI2_ENGINE_ID_NIC1_1, "NIC3"},
+ {RAZWI_INITIATOR_ID_X_Y(17, 4, 15), mmDCORE1_RTR7_CTRL_BASE,
+ GAUDI2_ENGINE_ID_NIC2_0, "NIC4"},
+ {RAZWI_INITIATOR_ID_X_Y(2, 11, 2), mmDCORE2_RTR0_CTRL_BASE,
+ GAUDI2_ENGINE_ID_NIC2_1, "NIC5"},
+ {RAZWI_INITIATOR_ID_X_Y(2, 11, 4), mmDCORE2_RTR0_CTRL_BASE,
+ GAUDI2_ENGINE_ID_NIC3_0, "NIC6"},
+ {RAZWI_INITIATOR_ID_X_Y(2, 11, 6), mmDCORE2_RTR0_CTRL_BASE,
+ GAUDI2_ENGINE_ID_NIC3_1, "NIC7"},
+ {RAZWI_INITIATOR_ID_X_Y(2, 11, 8), mmDCORE2_RTR0_CTRL_BASE,
+ GAUDI2_ENGINE_ID_NIC4_0, "NIC8"},
+ {RAZWI_INITIATOR_ID_X_Y(17, 11, 12), mmDCORE3_RTR7_CTRL_BASE,
+ GAUDI2_ENGINE_ID_NIC4_1, "NIC9"},
+ {RAZWI_INITIATOR_ID_X_Y(17, 11, 14), mmDCORE3_RTR7_CTRL_BASE,
+ GAUDI2_ENGINE_ID_NIC5_0, "NIC10"},
+ {RAZWI_INITIATOR_ID_X_Y(17, 11, 16), mmDCORE3_RTR7_CTRL_BASE,
+ GAUDI2_ENGINE_ID_NIC5_1, "NIC11"},
+ {RAZWI_INITIATOR_ID_X_Y(2, 4, 2), mmDCORE0_RTR0_CTRL_BASE,
+ GAUDI2_ENGINE_ID_PDMA_0, "PDMA0"},
+ {RAZWI_INITIATOR_ID_X_Y(2, 4, 3), mmDCORE0_RTR0_CTRL_BASE,
+ GAUDI2_ENGINE_ID_PDMA_1, "PDMA1"},
+ {RAZWI_INITIATOR_ID_X_Y(2, 4, 4), mmDCORE0_RTR0_CTRL_BASE,
+ GAUDI2_ENGINE_ID_SIZE, "PMMU"},
+ {RAZWI_INITIATOR_ID_X_Y(2, 4, 5), mmDCORE0_RTR0_CTRL_BASE,
+ GAUDI2_ENGINE_ID_SIZE, "PCIE"},
+ {RAZWI_INITIATOR_ID_X_Y(17, 4, 16), mmDCORE1_RTR7_CTRL_BASE,
+ GAUDI2_ENGINE_ID_ARC_FARM, "ARC_FARM"},
+ {RAZWI_INITIATOR_ID_X_Y(17, 4, 17), mmDCORE1_RTR7_CTRL_BASE,
+ GAUDI2_ENGINE_ID_KDMA, "KDMA"},
+ {RAZWI_INITIATOR_ID_X_Y(1, 5, 1), mmSFT0_HBW_RTR_IF1_RTR_CTRL_BASE,
+ GAUDI2_DCORE0_ENGINE_ID_EDMA_0, "EDMA0"},
+ {RAZWI_INITIATOR_ID_X_Y(1, 5, 1), mmSFT0_HBW_RTR_IF0_RTR_CTRL_BASE,
+ GAUDI2_DCORE0_ENGINE_ID_EDMA_1, "EDMA1"},
+ {RAZWI_INITIATOR_ID_X_Y(18, 5, 18), mmSFT1_HBW_RTR_IF1_RTR_CTRL_BASE,
+ GAUDI2_DCORE1_ENGINE_ID_EDMA_0, "EDMA2"},
+ {RAZWI_INITIATOR_ID_X_Y(18, 5, 18), mmSFT1_HBW_RTR_IF0_RTR_CTRL_BASE,
+ GAUDI2_DCORE1_ENGINE_ID_EDMA_1, "EDMA3"},
+ {RAZWI_INITIATOR_ID_X_Y(1, 10, 1), mmSFT2_HBW_RTR_IF0_RTR_CTRL_BASE,
+ GAUDI2_DCORE2_ENGINE_ID_EDMA_0, "EDMA4"},
+ {RAZWI_INITIATOR_ID_X_Y(1, 10, 1), mmSFT2_HBW_RTR_IF1_RTR_CTRL_BASE,
+ GAUDI2_DCORE2_ENGINE_ID_EDMA_1, "EDMA5"},
+ {RAZWI_INITIATOR_ID_X_Y(18, 10, 18), mmSFT2_HBW_RTR_IF0_RTR_CTRL_BASE,
+ GAUDI2_DCORE3_ENGINE_ID_EDMA_0, "EDMA6"},
+ {RAZWI_INITIATOR_ID_X_Y(18, 10, 18), mmSFT2_HBW_RTR_IF1_RTR_CTRL_BASE,
+ GAUDI2_DCORE3_ENGINE_ID_EDMA_1, "EDMA7"},
+ {RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE,
+ GAUDI2_ENGINE_ID_SIZE, "HMMU0"},
+ {RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE,
+ GAUDI2_ENGINE_ID_SIZE, "HMMU1"},
+ {RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE,
+ GAUDI2_ENGINE_ID_SIZE, "HMMU2"},
+ {RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE,
+ GAUDI2_ENGINE_ID_SIZE, "HMMU3"},
+ {RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE,
+ GAUDI2_ENGINE_ID_SIZE, "HMMU4"},
+ {RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE,
+ GAUDI2_ENGINE_ID_SIZE, "HMMU5"},
+ {RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE,
+ GAUDI2_ENGINE_ID_SIZE, "HMMU6"},
+ {RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE,
+ GAUDI2_ENGINE_ID_SIZE, "HMMU7"},
+ {RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE,
+ GAUDI2_ENGINE_ID_SIZE, "HMMU8"},
+ {RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE,
+ GAUDI2_ENGINE_ID_SIZE, "HMMU9"},
+ {RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE,
+ GAUDI2_ENGINE_ID_SIZE, "HMMU10"},
+ {RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE,
+ GAUDI2_ENGINE_ID_SIZE, "HMMU11"},
+ {RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE,
+ GAUDI2_ENGINE_ID_SIZE, "HMMU12"},
+ {RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE,
+ GAUDI2_ENGINE_ID_SIZE, "HMMU13"},
+ {RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE,
+ GAUDI2_ENGINE_ID_SIZE, "HMMU14"},
+ {RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE,
+ GAUDI2_ENGINE_ID_SIZE, "HMMU15"},
+ {RAZWI_INITIATOR_ID_X_Y(2, 11, 2), mmDCORE2_RTR0_CTRL_BASE,
+ GAUDI2_ENGINE_ID_ROT_0, "ROT0"},
+ {RAZWI_INITIATOR_ID_X_Y(17, 11, 16), mmDCORE3_RTR7_CTRL_BASE,
+ GAUDI2_ENGINE_ID_ROT_1, "ROT1"},
+ {RAZWI_INITIATOR_ID_X_Y(2, 11, 2), mmDCORE2_RTR0_CTRL_BASE,
+ GAUDI2_ENGINE_ID_PSOC, "CPU"},
+ {RAZWI_INITIATOR_ID_X_Y(17, 11, 11), mmDCORE3_RTR7_CTRL_BASE,
+ GAUDI2_ENGINE_ID_PSOC, "PSOC"}
+};
+
+static struct gaudi2_razwi_info mme_razwi_info[] = {
+ /* MME X high coordinate is N/A, hence using only low coordinates */
+ {RAZWI_INITIATOR_ID_X_Y_LOW(7, 4), mmDCORE0_RTR5_CTRL_BASE,
+ GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_WAP0"},
+ {RAZWI_INITIATOR_ID_X_Y_LOW(9, 4), mmDCORE0_RTR7_CTRL_BASE,
+ GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_WAP1"},
+ {RAZWI_INITIATOR_ID_X_Y_LOW(8, 4), mmDCORE0_RTR6_CTRL_BASE,
+ GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_CTRL_WR"},
+ {RAZWI_INITIATOR_ID_X_Y_LOW(9, 4), mmDCORE0_RTR7_CTRL_BASE,
+ GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_CTRL_RD"},
+ {RAZWI_INITIATOR_ID_X_Y_LOW(6, 4), mmDCORE0_RTR4_CTRL_BASE,
+ GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE0"},
+ {RAZWI_INITIATOR_ID_X_Y_LOW(6, 4), mmDCORE0_RTR4_CTRL_BASE,
+ GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE1"},
+ {RAZWI_INITIATOR_ID_X_Y_LOW(7, 4), mmDCORE0_RTR5_CTRL_BASE,
+ GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE2"},
+ {RAZWI_INITIATOR_ID_X_Y_LOW(8, 4), mmDCORE0_RTR6_CTRL_BASE,
+ GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE3"},
+ {RAZWI_INITIATOR_ID_X_Y_LOW(9, 4), mmDCORE0_RTR7_CTRL_BASE,
+ GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE4"},
+ {RAZWI_INITIATOR_ID_X_Y_LOW(12, 4), mmDCORE1_RTR2_CTRL_BASE,
+ GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_WAP0"},
+ {RAZWI_INITIATOR_ID_X_Y_LOW(10, 4), mmDCORE1_RTR0_CTRL_BASE,
+ GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_WAP1"},
+ {RAZWI_INITIATOR_ID_X_Y_LOW(11, 4), mmDCORE1_RTR1_CTRL_BASE,
+ GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_CTRL_WR"},
+ {RAZWI_INITIATOR_ID_X_Y_LOW(10, 4), mmDCORE1_RTR0_CTRL_BASE,
+ GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_CTRL_RD"},
+ {RAZWI_INITIATOR_ID_X_Y_LOW(13, 4), mmDCORE1_RTR3_CTRL_BASE,
+ GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE0"},
+ {RAZWI_INITIATOR_ID_X_Y_LOW(13, 4), mmDCORE1_RTR3_CTRL_BASE,
+ GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE1"},
+ {RAZWI_INITIATOR_ID_X_Y_LOW(12, 4), mmDCORE1_RTR2_CTRL_BASE,
+ GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE2"},
+ {RAZWI_INITIATOR_ID_X_Y_LOW(11, 4), mmDCORE1_RTR1_CTRL_BASE,
+ GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE3"},
+ {RAZWI_INITIATOR_ID_X_Y_LOW(10, 4), mmDCORE1_RTR0_CTRL_BASE,
+ GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE4"},
+ {RAZWI_INITIATOR_ID_X_Y_LOW(7, 11), mmDCORE2_RTR5_CTRL_BASE,
+ GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_WAP0"},
+ {RAZWI_INITIATOR_ID_X_Y_LOW(9, 11), mmDCORE2_RTR7_CTRL_BASE,
+ GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_WAP1"},
+ {RAZWI_INITIATOR_ID_X_Y_LOW(8, 11), mmDCORE2_RTR6_CTRL_BASE,
+ GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_CTRL_WR"},
+ {RAZWI_INITIATOR_ID_X_Y_LOW(9, 11), mmDCORE2_RTR7_CTRL_BASE,
+ GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_CTRL_RD"},
+ {RAZWI_INITIATOR_ID_X_Y_LOW(6, 11), mmDCORE2_RTR4_CTRL_BASE,
+ GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE0"},
+ {RAZWI_INITIATOR_ID_X_Y_LOW(6, 11), mmDCORE2_RTR4_CTRL_BASE,
+ GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE1"},
+ {RAZWI_INITIATOR_ID_X_Y_LOW(7, 11), mmDCORE2_RTR5_CTRL_BASE,
+ GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE2"},
+ {RAZWI_INITIATOR_ID_X_Y_LOW(8, 11), mmDCORE2_RTR6_CTRL_BASE,
+ GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE3"},
+ {RAZWI_INITIATOR_ID_X_Y_LOW(9, 11), mmDCORE2_RTR7_CTRL_BASE,
+ GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE4"},
+ {RAZWI_INITIATOR_ID_X_Y_LOW(12, 11), mmDCORE3_RTR2_CTRL_BASE,
+ GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_WAP0"},
+ {RAZWI_INITIATOR_ID_X_Y_LOW(10, 11), mmDCORE3_RTR0_CTRL_BASE,
+ GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_WAP1"},
+ {RAZWI_INITIATOR_ID_X_Y_LOW(11, 11), mmDCORE3_RTR1_CTRL_BASE,
+ GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_CTRL_WR"},
+ {RAZWI_INITIATOR_ID_X_Y_LOW(10, 11), mmDCORE3_RTR0_CTRL_BASE,
+ GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_CTRL_RD"},
+ {RAZWI_INITIATOR_ID_X_Y_LOW(13, 11), mmDCORE3_RTR3_CTRL_BASE,
+ GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE0"},
+ {RAZWI_INITIATOR_ID_X_Y_LOW(13, 11), mmDCORE3_RTR3_CTRL_BASE,
+ GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE1"},
+ {RAZWI_INITIATOR_ID_X_Y_LOW(12, 11), mmDCORE3_RTR2_CTRL_BASE,
+ GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE2"},
+ {RAZWI_INITIATOR_ID_X_Y_LOW(11, 11), mmDCORE3_RTR1_CTRL_BASE,
+ GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE3"},
+ {RAZWI_INITIATOR_ID_X_Y_LOW(10, 11), mmDCORE3_RTR0_CTRL_BASE,
+ GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE4"}
+};
+
enum hl_pmmu_fatal_cause {
LATENCY_RD_OUT_FIFO_OVERRUN,
LATENCY_WR_OUT_FIFO_OVERRUN,
@@ -446,6 +728,354 @@ static const int gaudi2_dma_core_async_event_id[] = {
[DMA_CORE_ID_KDMA] = GAUDI2_EVENT_KDMA0_CORE,
};
+const char *gaudi2_engine_id_str[] = {
+ __stringify(GAUDI2_DCORE0_ENGINE_ID_EDMA_0),
+ __stringify(GAUDI2_DCORE0_ENGINE_ID_EDMA_1),
+ __stringify(GAUDI2_DCORE0_ENGINE_ID_MME),
+ __stringify(GAUDI2_DCORE0_ENGINE_ID_TPC_0),
+ __stringify(GAUDI2_DCORE0_ENGINE_ID_TPC_1),
+ __stringify(GAUDI2_DCORE0_ENGINE_ID_TPC_2),
+ __stringify(GAUDI2_DCORE0_ENGINE_ID_TPC_3),
+ __stringify(GAUDI2_DCORE0_ENGINE_ID_TPC_4),
+ __stringify(GAUDI2_DCORE0_ENGINE_ID_TPC_5),
+ __stringify(GAUDI2_DCORE0_ENGINE_ID_DEC_0),
+ __stringify(GAUDI2_DCORE0_ENGINE_ID_DEC_1),
+ __stringify(GAUDI2_DCORE1_ENGINE_ID_EDMA_0),
+ __stringify(GAUDI2_DCORE1_ENGINE_ID_EDMA_1),
+ __stringify(GAUDI2_DCORE1_ENGINE_ID_MME),
+ __stringify(GAUDI2_DCORE1_ENGINE_ID_TPC_0),
+ __stringify(GAUDI2_DCORE1_ENGINE_ID_TPC_1),
+ __stringify(GAUDI2_DCORE1_ENGINE_ID_TPC_2),
+ __stringify(GAUDI2_DCORE1_ENGINE_ID_TPC_3),
+ __stringify(GAUDI2_DCORE1_ENGINE_ID_TPC_4),
+ __stringify(GAUDI2_DCORE1_ENGINE_ID_TPC_5),
+ __stringify(GAUDI2_DCORE1_ENGINE_ID_DEC_0),
+ __stringify(GAUDI2_DCORE1_ENGINE_ID_DEC_1),
+ __stringify(GAUDI2_DCORE2_ENGINE_ID_EDMA_0),
+ __stringify(GAUDI2_DCORE2_ENGINE_ID_EDMA_1),
+ __stringify(GAUDI2_DCORE2_ENGINE_ID_MME),
+ __stringify(GAUDI2_DCORE2_ENGINE_ID_TPC_0),
+ __stringify(GAUDI2_DCORE2_ENGINE_ID_TPC_1),
+ __stringify(GAUDI2_DCORE2_ENGINE_ID_TPC_2),
+ __stringify(GAUDI2_DCORE2_ENGINE_ID_TPC_3),
+ __stringify(GAUDI2_DCORE2_ENGINE_ID_TPC_4),
+ __stringify(GAUDI2_DCORE2_ENGINE_ID_TPC_5),
+ __stringify(GAUDI2_DCORE2_ENGINE_ID_DEC_0),
+ __stringify(GAUDI2_DCORE2_ENGINE_ID_DEC_1),
+ __stringify(GAUDI2_DCORE3_ENGINE_ID_EDMA_0),
+ __stringify(GAUDI2_DCORE3_ENGINE_ID_EDMA_1),
+ __stringify(GAUDI2_DCORE3_ENGINE_ID_MME),
+ __stringify(GAUDI2_DCORE3_ENGINE_ID_TPC_0),
+ __stringify(GAUDI2_DCORE3_ENGINE_ID_TPC_1),
+ __stringify(GAUDI2_DCORE3_ENGINE_ID_TPC_2),
+ __stringify(GAUDI2_DCORE3_ENGINE_ID_TPC_3),
+ __stringify(GAUDI2_DCORE3_ENGINE_ID_TPC_4),
+ __stringify(GAUDI2_DCORE3_ENGINE_ID_TPC_5),
+ __stringify(GAUDI2_DCORE3_ENGINE_ID_DEC_0),
+ __stringify(GAUDI2_DCORE3_ENGINE_ID_DEC_1),
+ __stringify(GAUDI2_DCORE0_ENGINE_ID_TPC_6),
+ __stringify(GAUDI2_ENGINE_ID_PDMA_0),
+ __stringify(GAUDI2_ENGINE_ID_PDMA_1),
+ __stringify(GAUDI2_ENGINE_ID_ROT_0),
+ __stringify(GAUDI2_ENGINE_ID_ROT_1),
+ __stringify(GAUDI2_PCIE_ENGINE_ID_DEC_0),
+ __stringify(GAUDI2_PCIE_ENGINE_ID_DEC_1),
+ __stringify(GAUDI2_ENGINE_ID_NIC0_0),
+ __stringify(GAUDI2_ENGINE_ID_NIC0_1),
+ __stringify(GAUDI2_ENGINE_ID_NIC1_0),
+ __stringify(GAUDI2_ENGINE_ID_NIC1_1),
+ __stringify(GAUDI2_ENGINE_ID_NIC2_0),
+ __stringify(GAUDI2_ENGINE_ID_NIC2_1),
+ __stringify(GAUDI2_ENGINE_ID_NIC3_0),
+ __stringify(GAUDI2_ENGINE_ID_NIC3_1),
+ __stringify(GAUDI2_ENGINE_ID_NIC4_0),
+ __stringify(GAUDI2_ENGINE_ID_NIC4_1),
+ __stringify(GAUDI2_ENGINE_ID_NIC5_0),
+ __stringify(GAUDI2_ENGINE_ID_NIC5_1),
+ __stringify(GAUDI2_ENGINE_ID_NIC6_0),
+ __stringify(GAUDI2_ENGINE_ID_NIC6_1),
+ __stringify(GAUDI2_ENGINE_ID_NIC7_0),
+ __stringify(GAUDI2_ENGINE_ID_NIC7_1),
+ __stringify(GAUDI2_ENGINE_ID_NIC8_0),
+ __stringify(GAUDI2_ENGINE_ID_NIC8_1),
+ __stringify(GAUDI2_ENGINE_ID_NIC9_0),
+ __stringify(GAUDI2_ENGINE_ID_NIC9_1),
+ __stringify(GAUDI2_ENGINE_ID_NIC10_0),
+ __stringify(GAUDI2_ENGINE_ID_NIC10_1),
+ __stringify(GAUDI2_ENGINE_ID_NIC11_0),
+ __stringify(GAUDI2_ENGINE_ID_NIC11_1),
+ __stringify(GAUDI2_ENGINE_ID_PCIE),
+ __stringify(GAUDI2_ENGINE_ID_PSOC),
+ __stringify(GAUDI2_ENGINE_ID_ARC_FARM),
+ __stringify(GAUDI2_ENGINE_ID_KDMA),
+ __stringify(GAUDI2_ENGINE_ID_SIZE),
+};
+
+const char *gaudi2_queue_id_str[] = {
+ __stringify(GAUDI2_QUEUE_ID_PDMA_0_0),
+ __stringify(GAUDI2_QUEUE_ID_PDMA_0_1),
+ __stringify(GAUDI2_QUEUE_ID_PDMA_0_2),
+ __stringify(GAUDI2_QUEUE_ID_PDMA_0_3),
+ __stringify(GAUDI2_QUEUE_ID_PDMA_1_0),
+ __stringify(GAUDI2_QUEUE_ID_PDMA_1_1),
+ __stringify(GAUDI2_QUEUE_ID_PDMA_1_2),
+ __stringify(GAUDI2_QUEUE_ID_PDMA_1_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_MME_0_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_MME_0_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_MME_0_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_MME_0_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_0_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_0_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_0_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_0_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_1_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_1_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_1_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_1_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_2_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_2_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_2_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_2_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_3_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_3_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_3_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_3_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_4_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_4_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_4_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_4_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_5_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_5_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_5_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_5_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_6_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_6_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_6_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_6_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_MME_0_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_MME_0_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_MME_0_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_MME_0_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_0_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_0_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_0_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_0_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_1_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_1_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_1_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_1_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_2_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_2_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_2_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_2_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_3_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_3_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_3_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_3_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_4_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_4_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_4_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_4_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_5_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_5_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_5_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_5_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_MME_0_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_MME_0_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_MME_0_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_MME_0_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_0_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_0_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_0_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_0_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_1_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_1_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_1_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_1_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_2_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_2_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_2_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_2_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_3_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_3_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_3_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_3_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_4_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_4_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_4_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_4_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_5_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_5_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_5_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_5_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_MME_0_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_MME_0_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_MME_0_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_MME_0_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_0_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_0_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_0_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_0_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_1_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_1_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_1_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_1_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_2_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_2_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_2_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_2_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_3_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_3_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_3_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_3_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_4_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_4_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_4_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_4_3),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_5_0),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_5_1),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_5_2),
+ __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_5_3),
+ __stringify(GAUDI2_QUEUE_ID_NIC_0_0),
+ __stringify(GAUDI2_QUEUE_ID_NIC_0_1),
+ __stringify(GAUDI2_QUEUE_ID_NIC_0_2),
+ __stringify(GAUDI2_QUEUE_ID_NIC_0_3),
+ __stringify(GAUDI2_QUEUE_ID_NIC_1_0),
+ __stringify(GAUDI2_QUEUE_ID_NIC_1_1),
+ __stringify(GAUDI2_QUEUE_ID_NIC_1_2),
+ __stringify(GAUDI2_QUEUE_ID_NIC_1_3),
+ __stringify(GAUDI2_QUEUE_ID_NIC_2_0),
+ __stringify(GAUDI2_QUEUE_ID_NIC_2_1),
+ __stringify(GAUDI2_QUEUE_ID_NIC_2_2),
+ __stringify(GAUDI2_QUEUE_ID_NIC_2_3),
+ __stringify(GAUDI2_QUEUE_ID_NIC_3_0),
+ __stringify(GAUDI2_QUEUE_ID_NIC_3_1),
+ __stringify(GAUDI2_QUEUE_ID_NIC_3_2),
+ __stringify(GAUDI2_QUEUE_ID_NIC_3_3),
+ __stringify(GAUDI2_QUEUE_ID_NIC_4_0),
+ __stringify(GAUDI2_QUEUE_ID_NIC_4_1),
+ __stringify(GAUDI2_QUEUE_ID_NIC_4_2),
+ __stringify(GAUDI2_QUEUE_ID_NIC_4_3),
+ __stringify(GAUDI2_QUEUE_ID_NIC_5_0),
+ __stringify(GAUDI2_QUEUE_ID_NIC_5_1),
+ __stringify(GAUDI2_QUEUE_ID_NIC_5_2),
+ __stringify(GAUDI2_QUEUE_ID_NIC_5_3),
+ __stringify(GAUDI2_QUEUE_ID_NIC_6_0),
+ __stringify(GAUDI2_QUEUE_ID_NIC_6_1),
+ __stringify(GAUDI2_QUEUE_ID_NIC_6_2),
+ __stringify(GAUDI2_QUEUE_ID_NIC_6_3),
+ __stringify(GAUDI2_QUEUE_ID_NIC_7_0),
+ __stringify(GAUDI2_QUEUE_ID_NIC_7_1),
+ __stringify(GAUDI2_QUEUE_ID_NIC_7_2),
+ __stringify(GAUDI2_QUEUE_ID_NIC_7_3),
+ __stringify(GAUDI2_QUEUE_ID_NIC_8_0),
+ __stringify(GAUDI2_QUEUE_ID_NIC_8_1),
+ __stringify(GAUDI2_QUEUE_ID_NIC_8_2),
+ __stringify(GAUDI2_QUEUE_ID_NIC_8_3),
+ __stringify(GAUDI2_QUEUE_ID_NIC_9_0),
+ __stringify(GAUDI2_QUEUE_ID_NIC_9_1),
+ __stringify(GAUDI2_QUEUE_ID_NIC_9_2),
+ __stringify(GAUDI2_QUEUE_ID_NIC_9_3),
+ __stringify(GAUDI2_QUEUE_ID_NIC_10_0),
+ __stringify(GAUDI2_QUEUE_ID_NIC_10_1),
+ __stringify(GAUDI2_QUEUE_ID_NIC_10_2),
+ __stringify(GAUDI2_QUEUE_ID_NIC_10_3),
+ __stringify(GAUDI2_QUEUE_ID_NIC_11_0),
+ __stringify(GAUDI2_QUEUE_ID_NIC_11_1),
+ __stringify(GAUDI2_QUEUE_ID_NIC_11_2),
+ __stringify(GAUDI2_QUEUE_ID_NIC_11_3),
+ __stringify(GAUDI2_QUEUE_ID_NIC_12_0),
+ __stringify(GAUDI2_QUEUE_ID_NIC_12_1),
+ __stringify(GAUDI2_QUEUE_ID_NIC_12_2),
+ __stringify(GAUDI2_QUEUE_ID_NIC_12_3),
+ __stringify(GAUDI2_QUEUE_ID_NIC_13_0),
+ __stringify(GAUDI2_QUEUE_ID_NIC_13_1),
+ __stringify(GAUDI2_QUEUE_ID_NIC_13_2),
+ __stringify(GAUDI2_QUEUE_ID_NIC_13_3),
+ __stringify(GAUDI2_QUEUE_ID_NIC_14_0),
+ __stringify(GAUDI2_QUEUE_ID_NIC_14_1),
+ __stringify(GAUDI2_QUEUE_ID_NIC_14_2),
+ __stringify(GAUDI2_QUEUE_ID_NIC_14_3),
+ __stringify(GAUDI2_QUEUE_ID_NIC_15_0),
+ __stringify(GAUDI2_QUEUE_ID_NIC_15_1),
+ __stringify(GAUDI2_QUEUE_ID_NIC_15_2),
+ __stringify(GAUDI2_QUEUE_ID_NIC_15_3),
+ __stringify(GAUDI2_QUEUE_ID_NIC_16_0),
+ __stringify(GAUDI2_QUEUE_ID_NIC_16_1),
+ __stringify(GAUDI2_QUEUE_ID_NIC_16_2),
+ __stringify(GAUDI2_QUEUE_ID_NIC_16_3),
+ __stringify(GAUDI2_QUEUE_ID_NIC_17_0),
+ __stringify(GAUDI2_QUEUE_ID_NIC_17_1),
+ __stringify(GAUDI2_QUEUE_ID_NIC_17_2),
+ __stringify(GAUDI2_QUEUE_ID_NIC_17_3),
+ __stringify(GAUDI2_QUEUE_ID_NIC_18_0),
+ __stringify(GAUDI2_QUEUE_ID_NIC_18_1),
+ __stringify(GAUDI2_QUEUE_ID_NIC_18_2),
+ __stringify(GAUDI2_QUEUE_ID_NIC_18_3),
+ __stringify(GAUDI2_QUEUE_ID_NIC_19_0),
+ __stringify(GAUDI2_QUEUE_ID_NIC_19_1),
+ __stringify(GAUDI2_QUEUE_ID_NIC_19_2),
+ __stringify(GAUDI2_QUEUE_ID_NIC_19_3),
+ __stringify(GAUDI2_QUEUE_ID_NIC_20_0),
+ __stringify(GAUDI2_QUEUE_ID_NIC_20_1),
+ __stringify(GAUDI2_QUEUE_ID_NIC_20_2),
+ __stringify(GAUDI2_QUEUE_ID_NIC_20_3),
+ __stringify(GAUDI2_QUEUE_ID_NIC_21_0),
+ __stringify(GAUDI2_QUEUE_ID_NIC_21_1),
+ __stringify(GAUDI2_QUEUE_ID_NIC_21_2),
+ __stringify(GAUDI2_QUEUE_ID_NIC_21_3),
+ __stringify(GAUDI2_QUEUE_ID_NIC_22_0),
+ __stringify(GAUDI2_QUEUE_ID_NIC_22_1),
+ __stringify(GAUDI2_QUEUE_ID_NIC_22_2),
+ __stringify(GAUDI2_QUEUE_ID_NIC_22_3),
+ __stringify(GAUDI2_QUEUE_ID_NIC_23_0),
+ __stringify(GAUDI2_QUEUE_ID_NIC_23_1),
+ __stringify(GAUDI2_QUEUE_ID_NIC_23_2),
+ __stringify(GAUDI2_QUEUE_ID_NIC_23_3),
+ __stringify(GAUDI2_QUEUE_ID_ROT_0_0),
+ __stringify(GAUDI2_QUEUE_ID_ROT_0_1),
+ __stringify(GAUDI2_QUEUE_ID_ROT_0_2),
+ __stringify(GAUDI2_QUEUE_ID_ROT_0_3),
+ __stringify(GAUDI2_QUEUE_ID_ROT_1_0),
+ __stringify(GAUDI2_QUEUE_ID_ROT_1_1),
+ __stringify(GAUDI2_QUEUE_ID_ROT_1_2),
+ __stringify(GAUDI2_QUEUE_ID_ROT_1_3),
+ __stringify(GAUDI2_QUEUE_ID_CPU_PQ),
+ __stringify(GAUDI2_QUEUE_ID_SIZE),
+};
+
static const char * const gaudi2_qm_sei_error_cause[GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE] = {
"qman sei intr",
"arc sei intr"
@@ -523,7 +1153,7 @@ static const char * const gaudi2_qman_error_cause[GAUDI2_NUM_OF_QM_ERR_CAUSE] =
"PQC L2H error"
};
-static const char * const gaudi2_qman_lower_cp_error_cause[GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE] = {
+static const char * const gaudi2_lower_qman_error_cause[GAUDI2_NUM_OF_LOWER_QM_ERR_CAUSE] = {
"RSVD0",
"CQ AXI HBW error",
"CP AXI HBW error",
@@ -613,6 +1243,7 @@ static const char * const gaudi2_tpc_interrupts_cause[GAUDI2_NUM_OF_TPC_INTR_CAU
"invalid_lock_access",
"LD_L protection violation",
"ST_L protection violation",
+ "D$ L0CS mismatch",
};
static const char * const guadi2_mme_error_cause[GAUDI2_NUM_OF_MME_ERR_CAUSE] = {
@@ -634,14 +1265,6 @@ static const char * const guadi2_mme_error_cause[GAUDI2_NUM_OF_MME_ERR_CAUSE] =
"sbte_prtn_intr_4",
};
-static const char * const guadi2_mme_sbte_error_cause[GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE] = {
- "i0",
- "i1",
- "i2",
- "i3",
- "i4",
-};
-
static const char * const guadi2_mme_wap_error_cause[GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE] = {
"WBC ERR RESP_0",
"WBC ERR RESP_1",
@@ -711,6 +1334,111 @@ gaudi2_pcie_addr_dec_error_cause[GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE] = {
"TLP is blocked by RR"
};
+static const int gaudi2_queue_id_to_engine_id[] = {
+ [GAUDI2_QUEUE_ID_PDMA_0_0...GAUDI2_QUEUE_ID_PDMA_0_3] = GAUDI2_ENGINE_ID_PDMA_0,
+ [GAUDI2_QUEUE_ID_PDMA_1_0...GAUDI2_QUEUE_ID_PDMA_1_3] = GAUDI2_ENGINE_ID_PDMA_1,
+ [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] =
+ GAUDI2_DCORE0_ENGINE_ID_EDMA_0,
+ [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] =
+ GAUDI2_DCORE0_ENGINE_ID_EDMA_1,
+ [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] =
+ GAUDI2_DCORE1_ENGINE_ID_EDMA_0,
+ [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] =
+ GAUDI2_DCORE1_ENGINE_ID_EDMA_1,
+ [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] =
+ GAUDI2_DCORE2_ENGINE_ID_EDMA_0,
+ [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] =
+ GAUDI2_DCORE2_ENGINE_ID_EDMA_1,
+ [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] =
+ GAUDI2_DCORE3_ENGINE_ID_EDMA_0,
+ [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] =
+ GAUDI2_DCORE3_ENGINE_ID_EDMA_1,
+ [GAUDI2_QUEUE_ID_DCORE0_MME_0_0...GAUDI2_QUEUE_ID_DCORE0_MME_0_3] =
+ GAUDI2_DCORE0_ENGINE_ID_MME,
+ [GAUDI2_QUEUE_ID_DCORE1_MME_0_0...GAUDI2_QUEUE_ID_DCORE1_MME_0_3] =
+ GAUDI2_DCORE1_ENGINE_ID_MME,
+ [GAUDI2_QUEUE_ID_DCORE2_MME_0_0...GAUDI2_QUEUE_ID_DCORE2_MME_0_3] =
+ GAUDI2_DCORE2_ENGINE_ID_MME,
+ [GAUDI2_QUEUE_ID_DCORE3_MME_0_0...GAUDI2_QUEUE_ID_DCORE3_MME_0_3] =
+ GAUDI2_DCORE3_ENGINE_ID_MME,
+ [GAUDI2_QUEUE_ID_DCORE0_TPC_0_0...GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] =
+ GAUDI2_DCORE0_ENGINE_ID_TPC_0,
+ [GAUDI2_QUEUE_ID_DCORE0_TPC_1_0...GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] =
+ GAUDI2_DCORE0_ENGINE_ID_TPC_1,
+ [GAUDI2_QUEUE_ID_DCORE0_TPC_2_0...GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] =
+ GAUDI2_DCORE0_ENGINE_ID_TPC_2,
+ [GAUDI2_QUEUE_ID_DCORE0_TPC_3_0...GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] =
+ GAUDI2_DCORE0_ENGINE_ID_TPC_3,
+ [GAUDI2_QUEUE_ID_DCORE0_TPC_4_0...GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] =
+ GAUDI2_DCORE0_ENGINE_ID_TPC_4,
+ [GAUDI2_QUEUE_ID_DCORE0_TPC_5_0...GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] =
+ GAUDI2_DCORE0_ENGINE_ID_TPC_5,
+ [GAUDI2_QUEUE_ID_DCORE0_TPC_6_0...GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] =
+ GAUDI2_DCORE0_ENGINE_ID_TPC_6,
+ [GAUDI2_QUEUE_ID_DCORE1_TPC_0_0...GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] =
+ GAUDI2_DCORE1_ENGINE_ID_TPC_0,
+ [GAUDI2_QUEUE_ID_DCORE1_TPC_1_0...GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] =
+ GAUDI2_DCORE1_ENGINE_ID_TPC_1,
+ [GAUDI2_QUEUE_ID_DCORE1_TPC_2_0...GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] =
+ GAUDI2_DCORE1_ENGINE_ID_TPC_2,
+ [GAUDI2_QUEUE_ID_DCORE1_TPC_3_0...GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] =
+ GAUDI2_DCORE1_ENGINE_ID_TPC_3,
+ [GAUDI2_QUEUE_ID_DCORE1_TPC_4_0...GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] =
+ GAUDI2_DCORE1_ENGINE_ID_TPC_4,
+ [GAUDI2_QUEUE_ID_DCORE1_TPC_5_0...GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] =
+ GAUDI2_DCORE1_ENGINE_ID_TPC_5,
+ [GAUDI2_QUEUE_ID_DCORE2_TPC_0_0...GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] =
+ GAUDI2_DCORE2_ENGINE_ID_TPC_0,
+ [GAUDI2_QUEUE_ID_DCORE2_TPC_1_0...GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] =
+ GAUDI2_DCORE2_ENGINE_ID_TPC_1,
+ [GAUDI2_QUEUE_ID_DCORE2_TPC_2_0...GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] =
+ GAUDI2_DCORE2_ENGINE_ID_TPC_2,
+ [GAUDI2_QUEUE_ID_DCORE2_TPC_3_0...GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] =
+ GAUDI2_DCORE2_ENGINE_ID_TPC_3,
+ [GAUDI2_QUEUE_ID_DCORE2_TPC_4_0...GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] =
+ GAUDI2_DCORE2_ENGINE_ID_TPC_4,
+ [GAUDI2_QUEUE_ID_DCORE2_TPC_5_0...GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] =
+ GAUDI2_DCORE2_ENGINE_ID_TPC_5,
+ [GAUDI2_QUEUE_ID_DCORE3_TPC_0_0...GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] =
+ GAUDI2_DCORE3_ENGINE_ID_TPC_0,
+ [GAUDI2_QUEUE_ID_DCORE3_TPC_1_0...GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] =
+ GAUDI2_DCORE3_ENGINE_ID_TPC_1,
+ [GAUDI2_QUEUE_ID_DCORE3_TPC_2_0...GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] =
+ GAUDI2_DCORE3_ENGINE_ID_TPC_2,
+ [GAUDI2_QUEUE_ID_DCORE3_TPC_3_0...GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] =
+ GAUDI2_DCORE3_ENGINE_ID_TPC_3,
+ [GAUDI2_QUEUE_ID_DCORE3_TPC_4_0...GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] =
+ GAUDI2_DCORE3_ENGINE_ID_TPC_4,
+ [GAUDI2_QUEUE_ID_DCORE3_TPC_5_0...GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] =
+ GAUDI2_DCORE3_ENGINE_ID_TPC_5,
+ [GAUDI2_QUEUE_ID_NIC_0_0...GAUDI2_QUEUE_ID_NIC_0_3] = GAUDI2_ENGINE_ID_NIC0_0,
+ [GAUDI2_QUEUE_ID_NIC_1_0...GAUDI2_QUEUE_ID_NIC_1_3] = GAUDI2_ENGINE_ID_NIC0_1,
+ [GAUDI2_QUEUE_ID_NIC_2_0...GAUDI2_QUEUE_ID_NIC_2_3] = GAUDI2_ENGINE_ID_NIC1_0,
+ [GAUDI2_QUEUE_ID_NIC_3_0...GAUDI2_QUEUE_ID_NIC_3_3] = GAUDI2_ENGINE_ID_NIC1_1,
+ [GAUDI2_QUEUE_ID_NIC_4_0...GAUDI2_QUEUE_ID_NIC_4_3] = GAUDI2_ENGINE_ID_NIC2_0,
+ [GAUDI2_QUEUE_ID_NIC_5_0...GAUDI2_QUEUE_ID_NIC_5_3] = GAUDI2_ENGINE_ID_NIC2_1,
+ [GAUDI2_QUEUE_ID_NIC_6_0...GAUDI2_QUEUE_ID_NIC_6_3] = GAUDI2_ENGINE_ID_NIC3_0,
+ [GAUDI2_QUEUE_ID_NIC_7_0...GAUDI2_QUEUE_ID_NIC_7_3] = GAUDI2_ENGINE_ID_NIC3_1,
+ [GAUDI2_QUEUE_ID_NIC_8_0...GAUDI2_QUEUE_ID_NIC_8_3] = GAUDI2_ENGINE_ID_NIC4_0,
+ [GAUDI2_QUEUE_ID_NIC_9_0...GAUDI2_QUEUE_ID_NIC_9_3] = GAUDI2_ENGINE_ID_NIC4_1,
+ [GAUDI2_QUEUE_ID_NIC_10_0...GAUDI2_QUEUE_ID_NIC_10_3] = GAUDI2_ENGINE_ID_NIC5_0,
+ [GAUDI2_QUEUE_ID_NIC_11_0...GAUDI2_QUEUE_ID_NIC_11_3] = GAUDI2_ENGINE_ID_NIC5_1,
+ [GAUDI2_QUEUE_ID_NIC_12_0...GAUDI2_QUEUE_ID_NIC_12_3] = GAUDI2_ENGINE_ID_NIC6_0,
+ [GAUDI2_QUEUE_ID_NIC_13_0...GAUDI2_QUEUE_ID_NIC_13_3] = GAUDI2_ENGINE_ID_NIC6_1,
+ [GAUDI2_QUEUE_ID_NIC_14_0...GAUDI2_QUEUE_ID_NIC_14_3] = GAUDI2_ENGINE_ID_NIC7_0,
+ [GAUDI2_QUEUE_ID_NIC_15_0...GAUDI2_QUEUE_ID_NIC_15_3] = GAUDI2_ENGINE_ID_NIC7_1,
+ [GAUDI2_QUEUE_ID_NIC_16_0...GAUDI2_QUEUE_ID_NIC_16_3] = GAUDI2_ENGINE_ID_NIC8_0,
+ [GAUDI2_QUEUE_ID_NIC_17_0...GAUDI2_QUEUE_ID_NIC_17_3] = GAUDI2_ENGINE_ID_NIC8_1,
+ [GAUDI2_QUEUE_ID_NIC_18_0...GAUDI2_QUEUE_ID_NIC_18_3] = GAUDI2_ENGINE_ID_NIC9_0,
+ [GAUDI2_QUEUE_ID_NIC_19_0...GAUDI2_QUEUE_ID_NIC_19_3] = GAUDI2_ENGINE_ID_NIC9_1,
+ [GAUDI2_QUEUE_ID_NIC_20_0...GAUDI2_QUEUE_ID_NIC_20_3] = GAUDI2_ENGINE_ID_NIC10_0,
+ [GAUDI2_QUEUE_ID_NIC_21_0...GAUDI2_QUEUE_ID_NIC_21_3] = GAUDI2_ENGINE_ID_NIC10_1,
+ [GAUDI2_QUEUE_ID_NIC_22_0...GAUDI2_QUEUE_ID_NIC_22_3] = GAUDI2_ENGINE_ID_NIC11_0,
+ [GAUDI2_QUEUE_ID_NIC_23_0...GAUDI2_QUEUE_ID_NIC_23_3] = GAUDI2_ENGINE_ID_NIC11_1,
+ [GAUDI2_QUEUE_ID_ROT_0_0...GAUDI2_QUEUE_ID_ROT_0_3] = GAUDI2_ENGINE_ID_ROT_0,
+ [GAUDI2_QUEUE_ID_ROT_1_0...GAUDI2_QUEUE_ID_ROT_1_3] = GAUDI2_ENGINE_ID_ROT_1,
+};
+
const u32 gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_SIZE] = {
[GAUDI2_QUEUE_ID_PDMA_0_0] = mmPDMA0_QM_BASE,
[GAUDI2_QUEUE_ID_PDMA_0_1] = mmPDMA0_QM_BASE,
@@ -1437,6 +2165,34 @@ static const u32 gaudi2_tpc_cfg_blocks_bases[TPC_ID_SIZE] = {
[TPC_ID_DCORE0_TPC6] = mmDCORE0_TPC6_CFG_BASE,
};
+static const u32 gaudi2_tpc_eml_cfg_blocks_bases[TPC_ID_SIZE] = {
+ [TPC_ID_DCORE0_TPC0] = mmDCORE0_TPC0_EML_CFG_BASE,
+ [TPC_ID_DCORE0_TPC1] = mmDCORE0_TPC1_EML_CFG_BASE,
+ [TPC_ID_DCORE0_TPC2] = mmDCORE0_TPC2_EML_CFG_BASE,
+ [TPC_ID_DCORE0_TPC3] = mmDCORE0_TPC3_EML_CFG_BASE,
+ [TPC_ID_DCORE0_TPC4] = mmDCORE0_TPC4_EML_CFG_BASE,
+ [TPC_ID_DCORE0_TPC5] = mmDCORE0_TPC5_EML_CFG_BASE,
+ [TPC_ID_DCORE1_TPC0] = mmDCORE1_TPC0_EML_CFG_BASE,
+ [TPC_ID_DCORE1_TPC1] = mmDCORE1_TPC1_EML_CFG_BASE,
+ [TPC_ID_DCORE1_TPC2] = mmDCORE1_TPC2_EML_CFG_BASE,
+ [TPC_ID_DCORE1_TPC3] = mmDCORE1_TPC3_EML_CFG_BASE,
+ [TPC_ID_DCORE1_TPC4] = mmDCORE1_TPC4_EML_CFG_BASE,
+ [TPC_ID_DCORE1_TPC5] = mmDCORE1_TPC5_EML_CFG_BASE,
+ [TPC_ID_DCORE2_TPC0] = mmDCORE2_TPC0_EML_CFG_BASE,
+ [TPC_ID_DCORE2_TPC1] = mmDCORE2_TPC1_EML_CFG_BASE,
+ [TPC_ID_DCORE2_TPC2] = mmDCORE2_TPC2_EML_CFG_BASE,
+ [TPC_ID_DCORE2_TPC3] = mmDCORE2_TPC3_EML_CFG_BASE,
+ [TPC_ID_DCORE2_TPC4] = mmDCORE2_TPC4_EML_CFG_BASE,
+ [TPC_ID_DCORE2_TPC5] = mmDCORE2_TPC5_EML_CFG_BASE,
+ [TPC_ID_DCORE3_TPC0] = mmDCORE3_TPC0_EML_CFG_BASE,
+ [TPC_ID_DCORE3_TPC1] = mmDCORE3_TPC1_EML_CFG_BASE,
+ [TPC_ID_DCORE3_TPC2] = mmDCORE3_TPC2_EML_CFG_BASE,
+ [TPC_ID_DCORE3_TPC3] = mmDCORE3_TPC3_EML_CFG_BASE,
+ [TPC_ID_DCORE3_TPC4] = mmDCORE3_TPC4_EML_CFG_BASE,
+ [TPC_ID_DCORE3_TPC5] = mmDCORE3_TPC5_EML_CFG_BASE,
+ [TPC_ID_DCORE0_TPC6] = mmDCORE0_TPC6_EML_CFG_BASE,
+};
+
const u32 gaudi2_rot_blocks_bases[ROTATOR_ID_SIZE] = {
[ROTATOR_ID_0] = mmROT0_BASE,
[ROTATOR_ID_1] = mmROT1_BASE
@@ -1475,6 +2231,56 @@ static const u32 gaudi2_rot_id_to_queue_id[ROTATOR_ID_SIZE] = {
[ROTATOR_ID_1] = GAUDI2_QUEUE_ID_ROT_1_0,
};
+static const u32 gaudi2_tpc_engine_id_to_tpc_id[] = {
+ [GAUDI2_DCORE0_ENGINE_ID_TPC_0] = TPC_ID_DCORE0_TPC0,
+ [GAUDI2_DCORE0_ENGINE_ID_TPC_1] = TPC_ID_DCORE0_TPC1,
+ [GAUDI2_DCORE0_ENGINE_ID_TPC_2] = TPC_ID_DCORE0_TPC2,
+ [GAUDI2_DCORE0_ENGINE_ID_TPC_3] = TPC_ID_DCORE0_TPC3,
+ [GAUDI2_DCORE0_ENGINE_ID_TPC_4] = TPC_ID_DCORE0_TPC4,
+ [GAUDI2_DCORE0_ENGINE_ID_TPC_5] = TPC_ID_DCORE0_TPC5,
+ [GAUDI2_DCORE1_ENGINE_ID_TPC_0] = TPC_ID_DCORE1_TPC0,
+ [GAUDI2_DCORE1_ENGINE_ID_TPC_1] = TPC_ID_DCORE1_TPC1,
+ [GAUDI2_DCORE1_ENGINE_ID_TPC_2] = TPC_ID_DCORE1_TPC2,
+ [GAUDI2_DCORE1_ENGINE_ID_TPC_3] = TPC_ID_DCORE1_TPC3,
+ [GAUDI2_DCORE1_ENGINE_ID_TPC_4] = TPC_ID_DCORE1_TPC4,
+ [GAUDI2_DCORE1_ENGINE_ID_TPC_5] = TPC_ID_DCORE1_TPC5,
+ [GAUDI2_DCORE2_ENGINE_ID_TPC_0] = TPC_ID_DCORE2_TPC0,
+ [GAUDI2_DCORE2_ENGINE_ID_TPC_1] = TPC_ID_DCORE2_TPC1,
+ [GAUDI2_DCORE2_ENGINE_ID_TPC_2] = TPC_ID_DCORE2_TPC2,
+ [GAUDI2_DCORE2_ENGINE_ID_TPC_3] = TPC_ID_DCORE2_TPC3,
+ [GAUDI2_DCORE2_ENGINE_ID_TPC_4] = TPC_ID_DCORE2_TPC4,
+ [GAUDI2_DCORE2_ENGINE_ID_TPC_5] = TPC_ID_DCORE2_TPC5,
+ [GAUDI2_DCORE3_ENGINE_ID_TPC_0] = TPC_ID_DCORE3_TPC0,
+ [GAUDI2_DCORE3_ENGINE_ID_TPC_1] = TPC_ID_DCORE3_TPC1,
+ [GAUDI2_DCORE3_ENGINE_ID_TPC_2] = TPC_ID_DCORE3_TPC2,
+ [GAUDI2_DCORE3_ENGINE_ID_TPC_3] = TPC_ID_DCORE3_TPC3,
+ [GAUDI2_DCORE3_ENGINE_ID_TPC_4] = TPC_ID_DCORE3_TPC4,
+ [GAUDI2_DCORE3_ENGINE_ID_TPC_5] = TPC_ID_DCORE3_TPC5,
+ /* the PCI TPC is placed last (mapped liked HW) */
+ [GAUDI2_DCORE0_ENGINE_ID_TPC_6] = TPC_ID_DCORE0_TPC6,
+};
+
+static const u32 gaudi2_mme_engine_id_to_mme_id[] = {
+ [GAUDI2_DCORE0_ENGINE_ID_MME] = MME_ID_DCORE0,
+ [GAUDI2_DCORE1_ENGINE_ID_MME] = MME_ID_DCORE1,
+ [GAUDI2_DCORE2_ENGINE_ID_MME] = MME_ID_DCORE2,
+ [GAUDI2_DCORE3_ENGINE_ID_MME] = MME_ID_DCORE3,
+};
+
+static const u32 gaudi2_edma_engine_id_to_edma_id[] = {
+ [GAUDI2_ENGINE_ID_PDMA_0] = DMA_CORE_ID_PDMA0,
+ [GAUDI2_ENGINE_ID_PDMA_1] = DMA_CORE_ID_PDMA1,
+ [GAUDI2_DCORE0_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA0,
+ [GAUDI2_DCORE0_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA1,
+ [GAUDI2_DCORE1_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA2,
+ [GAUDI2_DCORE1_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA3,
+ [GAUDI2_DCORE2_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA4,
+ [GAUDI2_DCORE2_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA5,
+ [GAUDI2_DCORE3_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA6,
+ [GAUDI2_DCORE3_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA7,
+ [GAUDI2_ENGINE_ID_KDMA] = DMA_CORE_ID_KDMA,
+};
+
const u32 edma_stream_base[NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES] = {
GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0,
GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0,
@@ -1499,41 +2305,6 @@ static const char gaudi2_vdec_irq_name[GAUDI2_VDEC_MSIX_ENTRIES][GAUDI2_MAX_STRI
"gaudi2 vdec s_1", "gaudi2 vdec s_1 abnormal"
};
-static const u32 rtr_coordinates_to_rtr_id[NUM_OF_RTR_PER_DCORE * NUM_OF_DCORES] = {
- RTR_ID_X_Y(2, 4),
- RTR_ID_X_Y(3, 4),
- RTR_ID_X_Y(4, 4),
- RTR_ID_X_Y(5, 4),
- RTR_ID_X_Y(6, 4),
- RTR_ID_X_Y(7, 4),
- RTR_ID_X_Y(8, 4),
- RTR_ID_X_Y(9, 4),
- RTR_ID_X_Y(10, 4),
- RTR_ID_X_Y(11, 4),
- RTR_ID_X_Y(12, 4),
- RTR_ID_X_Y(13, 4),
- RTR_ID_X_Y(14, 4),
- RTR_ID_X_Y(15, 4),
- RTR_ID_X_Y(16, 4),
- RTR_ID_X_Y(17, 4),
- RTR_ID_X_Y(2, 11),
- RTR_ID_X_Y(3, 11),
- RTR_ID_X_Y(4, 11),
- RTR_ID_X_Y(5, 11),
- RTR_ID_X_Y(6, 11),
- RTR_ID_X_Y(7, 11),
- RTR_ID_X_Y(8, 11),
- RTR_ID_X_Y(9, 11),
- RTR_ID_X_Y(0, 0),/* 24 no id */
- RTR_ID_X_Y(0, 0),/* 25 no id */
- RTR_ID_X_Y(0, 0),/* 26 no id */
- RTR_ID_X_Y(0, 0),/* 27 no id */
- RTR_ID_X_Y(14, 11),
- RTR_ID_X_Y(15, 11),
- RTR_ID_X_Y(16, 11),
- RTR_ID_X_Y(17, 11)
-};
-
enum rtr_id {
DCORE0_RTR0,
DCORE0_RTR1,
@@ -1676,7 +2447,8 @@ enum razwi_event_sources {
RAZWI_PDMA,
RAZWI_NIC,
RAZWI_DEC,
- RAZWI_ROT
+ RAZWI_ROT,
+ RAZWI_ARC_FARM
};
struct hbm_mc_error_causes {
@@ -1784,7 +2556,14 @@ static void gaudi2_set_arc_id_cap(struct hl_device *hdev, u64 arc_id);
static void gaudi2_memset_device_lbw(struct hl_device *hdev, u32 addr, u32 size, u32 val);
static int gaudi2_send_job_to_kdma(struct hl_device *hdev, u64 src_addr, u64 dst_addr, u32 size,
bool is_memset);
+static bool gaudi2_get_tpc_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
+ struct engines_data *e);
+static bool gaudi2_get_mme_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
+ struct engines_data *e);
+static bool gaudi2_get_edma_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
+ struct engines_data *e);
static u64 gaudi2_mmu_scramble_addr(struct hl_device *hdev, u64 raw_addr);
+static u64 gaudi2_mmu_descramble_addr(struct hl_device *hdev, u64 scrambled_addr);
static void gaudi2_init_scrambler_hbm(struct hl_device *hdev)
{
@@ -1879,11 +2658,26 @@ static int set_number_of_functional_hbms(struct hl_device *hdev)
return 0;
}
+static bool gaudi2_is_edma_queue_id(u32 queue_id)
+{
+
+ switch (queue_id) {
+ case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3:
+ case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3:
+ case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3:
+ case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3:
+ return true;
+ default:
+ return false;
+ }
+}
+
static int gaudi2_set_dram_properties(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
- u32 basic_hbm_page_size;
- int rc;
+ u64 hbm_drv_base_offset = 0, edma_pq_base_addr;
+ u32 basic_hbm_page_size, edma_idx = 0;
+ int rc, i;
rc = set_number_of_functional_hbms(hdev);
if (rc)
@@ -1927,9 +2721,35 @@ static int gaudi2_set_dram_properties(struct hl_device *hdev)
prop->dmmu.start_addr = prop->dram_base_address +
(prop->dram_page_size *
DIV_ROUND_UP_SECTOR_T(prop->dram_size, prop->dram_page_size));
-
prop->dmmu.end_addr = prop->dmmu.start_addr + prop->dram_page_size *
div_u64((VA_HBM_SPACE_END - prop->dmmu.start_addr), prop->dmmu.page_size);
+ /*
+ * Driver can't share an (48MB) HBM page with the F/W in order to prevent FW to block
+ * the driver part by range register, so it must start at the next (48MB) page
+ */
+ hbm_drv_base_offset = roundup(CPU_FW_IMAGE_SIZE, prop->num_functional_hbms * SZ_8M);
+
+ /*
+ * The NIC driver section size and the HMMU page tables section in the HBM needs
+ * to be the remaining size in the first dram page after taking into
+ * account the F/W image size
+ */
+
+ /* Reserve region in HBM for HMMU page tables */
+ prop->mmu_pgt_addr = DRAM_PHYS_BASE + hbm_drv_base_offset +
+ ((prop->dram_page_size - hbm_drv_base_offset) -
+ (HMMU_PAGE_TABLES_SIZE + EDMA_PQS_SIZE + EDMA_SCRATCHPAD_SIZE));
+
+ /* Set EDMA PQs HBM addresses */
+ edma_pq_base_addr = prop->mmu_pgt_addr + HMMU_PAGE_TABLES_SIZE;
+
+ for (i = 0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i++) {
+ if (gaudi2_is_edma_queue_id(i)) {
+ prop->hw_queues_props[i].q_dram_bd_address = edma_pq_base_addr +
+ (edma_idx * HL_QUEUE_SIZE_IN_BYTES);
+ edma_idx++;
+ }
+ }
return 0;
}
@@ -1939,7 +2759,7 @@ static int gaudi2_set_fixed_properties(struct hl_device *hdev)
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct hw_queue_properties *q_props;
u32 num_sync_stream_queues = 0;
- int i;
+ int i, rc;
prop->max_queues = GAUDI2_QUEUE_ID_SIZE;
prop->hw_queues_props = kcalloc(prop->max_queues, sizeof(struct hw_queue_properties),
@@ -1962,6 +2782,9 @@ static int gaudi2_set_fixed_properties(struct hl_device *hdev)
}
q_props[i].cb_alloc_flags = CB_ALLOC_USER;
+
+ if (gaudi2_is_edma_queue_id(i))
+ q_props[i].dram_bd = 1;
}
q_props[GAUDI2_QUEUE_ID_CPU_PQ].type = QUEUE_TYPE_CPU;
@@ -1988,48 +2811,45 @@ static int gaudi2_set_fixed_properties(struct hl_device *hdev)
prop->hints_range_reservation = true;
- if (hdev->pldm)
- prop->mmu_pgt_size = 0x800000; /* 8MB */
- else
- prop->mmu_pgt_size = MMU_PAGE_TABLES_INITIAL_SIZE;
+ prop->rotator_enabled_mask = BIT(NUM_OF_ROT) - 1;
+
+ prop->max_asid = 2;
+ prop->dmmu.pgt_size = HMMU_PAGE_TABLES_SIZE;
prop->mmu_pte_size = HL_PTE_SIZE;
- prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
- prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
prop->dmmu.hop_shifts[MMU_HOP0] = DHOP0_SHIFT;
prop->dmmu.hop_shifts[MMU_HOP1] = DHOP1_SHIFT;
prop->dmmu.hop_shifts[MMU_HOP2] = DHOP2_SHIFT;
prop->dmmu.hop_shifts[MMU_HOP3] = DHOP3_SHIFT;
- prop->dmmu.hop_shifts[MMU_HOP4] = DHOP4_SHIFT;
prop->dmmu.hop_masks[MMU_HOP0] = DHOP0_MASK;
prop->dmmu.hop_masks[MMU_HOP1] = DHOP1_MASK;
prop->dmmu.hop_masks[MMU_HOP2] = DHOP2_MASK;
prop->dmmu.hop_masks[MMU_HOP3] = DHOP3_MASK;
- prop->dmmu.hop_masks[MMU_HOP4] = DHOP4_MASK;
prop->dmmu.page_size = PAGE_SIZE_1GB;
- prop->dmmu.num_hops = MMU_ARCH_6_HOPS;
+ prop->dmmu.num_hops = MMU_ARCH_4_HOPS;
prop->dmmu.last_mask = LAST_MASK;
- prop->dmmu.host_resident = 1;
- /* TODO: will be duplicated until implementing per-MMU props */
- prop->dmmu.hop_table_size = prop->mmu_hop_table_size;
- prop->dmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
+ prop->dmmu.host_resident = 0;
+ prop->dmmu.hop_table_size = HOP_TABLE_SIZE_512_PTE;
+ prop->dmmu.hop0_tables_total_size = HOP_TABLE_SIZE_512_PTE * prop->max_asid;
- /*
- * this is done in order to be able to validate FW descriptor (i.e. validating that
- * the addresses and allocated space for FW image does not cross memory bounds).
- * for this reason we set the DRAM size to the minimum possible and later it will
- * be modified according to what reported in the cpucp info packet
+ /* As we need to set the pgt address in dram for HMMU init so we cannot
+ * wait to the fw cpucp info to set the dram props as mmu init comes before
+ * hw init
*/
- prop->dram_size = (GAUDI2_HBM_NUM - 1) * SZ_16G;
+ rc = hdev->asic_funcs->set_dram_properties(hdev);
+ if (rc)
+ goto free_qprops;
+
+ prop->mmu_pgt_size = PMMU_PAGE_TABLES_SIZE;
+ prop->pmmu.pgt_size = prop->mmu_pgt_size;
hdev->pmmu_huge_range = true;
prop->pmmu.host_resident = 1;
prop->pmmu.num_hops = MMU_ARCH_6_HOPS;
prop->pmmu.last_mask = LAST_MASK;
- /* TODO: will be duplicated until implementing per-MMU props */
- prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
- prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
+ prop->pmmu.hop_table_size = HOP_TABLE_SIZE_512_PTE;
+ prop->pmmu.hop0_tables_total_size = HOP_TABLE_SIZE_512_PTE * prop->max_asid;
prop->hints_host_reserved_va_range.start_addr = RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START;
prop->hints_host_reserved_va_range.end_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HOST_END;
@@ -2084,11 +2904,13 @@ static int gaudi2_set_fixed_properties(struct hl_device *hdev)
prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END;
}
+ prop->max_num_of_engines = GAUDI2_ENGINE_ID_SIZE;
prop->num_engine_cores = CPU_ID_MAX;
prop->cfg_size = CFG_SIZE;
- prop->max_asid = MAX_ASID;
prop->num_of_events = GAUDI2_EVENT_SIZE;
+ prop->supports_engine_modes = true;
+
prop->dc_power_default = DC_POWER_DEFAULT;
prop->cb_pool_cb_cnt = GAUDI2_CB_POOL_CB_CNT;
@@ -2096,7 +2918,7 @@ static int gaudi2_set_fixed_properties(struct hl_device *hdev)
prop->pcie_dbi_base_address = CFG_BASE + mmPCIE_DBI_BASE;
prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
- strncpy(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN);
+ strscpy_pad(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN);
prop->mme_master_slave_mode = 1;
@@ -2107,6 +2929,8 @@ static int gaudi2_set_fixed_properties(struct hl_device *hdev)
(num_sync_stream_queues * HL_RSVD_MONS);
prop->first_available_user_interrupt = GAUDI2_IRQ_NUM_USER_FIRST;
+ prop->tpc_interrupt_id = GAUDI2_IRQ_NUM_TPC_ASSERT;
+ prop->eq_interrupt_id = GAUDI2_IRQ_NUM_EVENT_QUEUE;
prop->first_available_cq[0] = GAUDI2_RESERVED_CQ_NUMBER;
@@ -2125,7 +2949,13 @@ static int gaudi2_set_fixed_properties(struct hl_device *hdev)
prop->hbw_flush_reg = mmPCIE_WRAP_SPECIAL_GLBL_SPARE_0;
+ prop->supports_advanced_cpucp_rc = true;
+
return 0;
+
+free_qprops:
+ kfree(prop->hw_queues_props);
+ return rc;
}
static int gaudi2_pci_bars_map(struct hl_device *hdev)
@@ -2547,7 +3377,8 @@ static int gaudi2_cpucp_info_get(struct hl_device *hdev)
}
if (!strlen(prop->cpucp_info.card_name))
- strncpy(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN);
+ strscpy_pad(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME,
+ CARD_NAME_MAX_LEN);
/* Overwrite binning masks with the actual binning values from F/W */
hdev->dram_binning = prop->cpucp_info.dram_binning_mask;
@@ -2555,6 +3386,10 @@ static int gaudi2_cpucp_info_get(struct hl_device *hdev)
hdev->tpc_binning = le64_to_cpu(prop->cpucp_info.tpc_binning_mask);
hdev->decoder_binning = lower_32_bits(le64_to_cpu(prop->cpucp_info.decoder_binning_mask));
+ dev_dbg(hdev->dev, "Read binning masks: tpc: 0x%llx, dram: 0x%llx, edma: 0x%x, dec: 0x%x\n",
+ hdev->tpc_binning, hdev->dram_binning, hdev->edma_binning,
+ hdev->decoder_binning);
+
/*
* at this point the DRAM parameters need to be updated according to data obtained
* from the FW
@@ -2594,6 +3429,25 @@ static int gaudi2_fetch_psoc_frequency(struct hl_device *hdev)
return 0;
}
+static int gaudi2_mmu_clear_pgt_range(struct hl_device *hdev)
+{
+ struct gaudi2_device *gaudi2 = hdev->asic_specific;
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ int rc;
+
+ if (!(gaudi2->hw_cap_initialized & HW_CAP_MMU_MASK))
+ return 0;
+
+ if (prop->dmmu.host_resident)
+ return 0;
+
+ rc = gaudi2_memset_device_memory(hdev, prop->mmu_pgt_addr, prop->dmmu.pgt_size, 0);
+ if (rc)
+ dev_err(hdev->dev, "Failed to clear mmu pgt");
+
+ return rc;
+}
+
static int gaudi2_early_init(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
@@ -2650,7 +3504,18 @@ static int gaudi2_early_init(struct hl_device *hdev)
if (gaudi2_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
- hdev->asic_funcs->hw_fini(hdev, true, false);
+ rc = hdev->asic_funcs->hw_fini(hdev, true, false);
+ if (rc) {
+ dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc);
+ goto pci_fini;
+ }
+
+ rc = hl_fw_read_preboot_status(hdev);
+ if (rc) {
+ if (hdev->reset_on_preboot_fail)
+ hdev->asic_funcs->hw_fini(hdev, true, false);
+ goto pci_fini;
+ }
}
return 0;
@@ -2692,6 +3557,7 @@ static bool gaudi2_is_arc_tpc_owned(u64 arc_id)
static void gaudi2_init_arcs(struct hl_device *hdev)
{
+ struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
struct gaudi2_device *gaudi2 = hdev->asic_specific;
u64 arc_id;
u32 i;
@@ -2721,6 +3587,10 @@ static void gaudi2_init_arcs(struct hl_device *hdev)
gaudi2_set_arc_id_cap(hdev, arc_id);
}
+
+ /* Fetch ARC scratchpad address */
+ hdev->asic_prop.engine_core_interrupt_reg_addr =
+ CFG_BASE + le32_to_cpu(dyn_regs->eng_arc_irq_ctrl);
}
static int gaudi2_scrub_arc_dccm(struct hl_device *hdev, u32 cpu_id)
@@ -2772,16 +3642,21 @@ static int gaudi2_scrub_arc_dccm(struct hl_device *hdev, u32 cpu_id)
return 0;
}
-static void gaudi2_scrub_arcs_dccm(struct hl_device *hdev)
+static int gaudi2_scrub_arcs_dccm(struct hl_device *hdev)
{
u16 arc_id;
+ int rc;
for (arc_id = CPU_ID_SCHED_ARC0 ; arc_id < CPU_ID_MAX ; arc_id++) {
if (!gaudi2_is_arc_enabled(hdev, arc_id))
continue;
- gaudi2_scrub_arc_dccm(hdev, arc_id);
+ rc = gaudi2_scrub_arc_dccm(hdev, arc_id);
+ if (rc)
+ return rc;
}
+
+ return 0;
}
static int gaudi2_late_init(struct hl_device *hdev)
@@ -2789,14 +3664,10 @@ static int gaudi2_late_init(struct hl_device *hdev)
struct gaudi2_device *gaudi2 = hdev->asic_specific;
int rc;
- hdev->asic_prop.supports_advanced_cpucp_rc = true;
-
rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS,
gaudi2->virt_msix_db_dma_addr);
- if (rc) {
- dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
+ if (rc)
return rc;
- }
rc = gaudi2_fetch_psoc_frequency(hdev);
if (rc) {
@@ -2804,8 +3675,20 @@ static int gaudi2_late_init(struct hl_device *hdev)
goto disable_pci_access;
}
+ rc = gaudi2_mmu_clear_pgt_range(hdev);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
+ goto disable_pci_access;
+ }
+
gaudi2_init_arcs(hdev);
- gaudi2_scrub_arcs_dccm(hdev);
+
+ rc = gaudi2_scrub_arcs_dccm(hdev);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to scrub arcs DCCM\n");
+ goto disable_pci_access;
+ }
+
gaudi2_init_security(hdev);
return 0;
@@ -2989,6 +3872,13 @@ static void gaudi2_user_interrupt_setup(struct hl_device *hdev)
struct asic_fixed_properties *prop = &hdev->asic_prop;
int i, j, k;
+ /* Initialize TPC interrupt */
+ HL_USR_INTR_STRUCT_INIT(hdev->tpc_interrupt, hdev, 0, HL_USR_INTERRUPT_TPC);
+
+ /* Initialize unexpected error interrupt */
+ HL_USR_INTR_STRUCT_INIT(hdev->unexpected_error_interrupt, hdev, 0,
+ HL_USR_INTERRUPT_UNEXPECTED);
+
/* Initialize common user CQ interrupt */
HL_USR_INTR_STRUCT_INIT(hdev->common_user_cq_interrupt, hdev,
HL_COMMON_USER_CQ_INTERRUPT_ID, HL_USR_INTERRUPT_CQ);
@@ -3051,7 +3941,7 @@ static int gaudi2_special_blocks_config(struct hl_device *hdev)
int i, rc;
/* Configure Special blocks */
- prop->glbl_err_cause_num = GAUDI2_NUM_OF_GLBL_ERR_CAUSE;
+ prop->glbl_err_max_cause_num = GAUDI2_GLBL_ERR_MAX_CAUSE_NUM;
prop->num_of_special_blocks = ARRAY_SIZE(gaudi2_special_blocks);
prop->special_blocks = kmalloc_array(prop->num_of_special_blocks,
sizeof(*prop->special_blocks), GFP_KERNEL);
@@ -3115,6 +4005,48 @@ static int gaudi2_special_blocks_iterator_config(struct hl_device *hdev)
return gaudi2_special_blocks_config(hdev);
}
+static void gaudi2_test_queues_msgs_free(struct hl_device *hdev)
+{
+ struct gaudi2_device *gaudi2 = hdev->asic_specific;
+ struct gaudi2_queues_test_info *msg_info = gaudi2->queues_test_info;
+ int i;
+
+ for (i = 0 ; i < GAUDI2_NUM_TESTED_QS ; i++) {
+ /* bail-out if this is an allocation failure point */
+ if (!msg_info[i].kern_addr)
+ break;
+
+ hl_asic_dma_pool_free(hdev, msg_info[i].kern_addr, msg_info[i].dma_addr);
+ msg_info[i].kern_addr = NULL;
+ }
+}
+
+static int gaudi2_test_queues_msgs_alloc(struct hl_device *hdev)
+{
+ struct gaudi2_device *gaudi2 = hdev->asic_specific;
+ struct gaudi2_queues_test_info *msg_info = gaudi2->queues_test_info;
+ int i, rc;
+
+ /* allocate a message-short buf for each Q we intend to test */
+ for (i = 0 ; i < GAUDI2_NUM_TESTED_QS ; i++) {
+ msg_info[i].kern_addr =
+ (void *)hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_short),
+ GFP_KERNEL, &msg_info[i].dma_addr);
+ if (!msg_info[i].kern_addr) {
+ dev_err(hdev->dev,
+ "Failed to allocate dma memory for H/W queue %d testing\n", i);
+ rc = -ENOMEM;
+ goto err_exit;
+ }
+ }
+
+ return 0;
+
+err_exit:
+ gaudi2_test_queues_msgs_free(hdev);
+ return rc;
+}
+
static int gaudi2_sw_init(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
@@ -3188,13 +4120,7 @@ static int gaudi2_sw_init(struct hl_device *hdev)
spin_lock_init(&gaudi2->hw_queues_lock);
- gaudi2->scratchpad_kernel_address = hl_asic_dma_alloc_coherent(hdev, PAGE_SIZE,
- &gaudi2->scratchpad_bus_address,
- GFP_KERNEL | __GFP_ZERO);
- if (!gaudi2->scratchpad_kernel_address) {
- rc = -ENOMEM;
- goto free_virt_msix_db_mem;
- }
+ gaudi2->scratchpad_bus_address = prop->mmu_pgt_addr + HMMU_PAGE_TABLES_SIZE + EDMA_PQS_SIZE;
gaudi2_user_mapped_blocks_init(hdev);
@@ -3208,17 +4134,28 @@ static int gaudi2_sw_init(struct hl_device *hdev)
prop->supports_compute_reset = true;
+ /* Event queue sanity check added in FW version 1.11 */
+ if (hl_fw_version_cmp(hdev, 1, 11, 0) < 0)
+ hdev->event_queue.check_eqe_index = false;
+ else
+ hdev->event_queue.check_eqe_index = true;
+
hdev->asic_funcs->set_pci_memory_regions(hdev);
rc = gaudi2_special_blocks_iterator_config(hdev);
if (rc)
- goto free_scratchpad_mem;
+ goto free_virt_msix_db_mem;
+
+ rc = gaudi2_test_queues_msgs_alloc(hdev);
+ if (rc)
+ goto special_blocks_free;
+
+ hdev->heartbeat_debug_info.cpu_queue_id = GAUDI2_QUEUE_ID_CPU_PQ;
return 0;
-free_scratchpad_mem:
- hl_asic_dma_pool_free(hdev, gaudi2->scratchpad_kernel_address,
- gaudi2->scratchpad_bus_address);
+special_blocks_free:
+ gaudi2_special_blocks_iterator_free(hdev);
free_virt_msix_db_mem:
hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr);
free_cpu_accessible_dma_pool:
@@ -3238,6 +4175,8 @@ static int gaudi2_sw_fini(struct hl_device *hdev)
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct gaudi2_device *gaudi2 = hdev->asic_specific;
+ gaudi2_test_queues_msgs_free(hdev);
+
gaudi2_special_blocks_iterator_free(hdev);
hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr);
@@ -3247,9 +4186,6 @@ static int gaudi2_sw_fini(struct hl_device *hdev)
hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
hdev->cpu_accessible_dma_address);
- hl_asic_dma_free_coherent(hdev, PAGE_SIZE, gaudi2->scratchpad_kernel_address,
- gaudi2->scratchpad_bus_address);
-
dma_pool_destroy(hdev->dma_pool);
kfree(gaudi2);
@@ -3646,8 +4582,14 @@ static const char *gaudi2_irq_name(u16 irq_number)
return "gaudi2 completion";
case GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ... GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM:
return gaudi2_vdec_irq_name[irq_number - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM];
+ case GAUDI2_IRQ_NUM_TPC_ASSERT:
+ return "gaudi2 tpc assert";
+ case GAUDI2_IRQ_NUM_UNEXPECTED_ERROR:
+ return "gaudi2 unexpected error";
case GAUDI2_IRQ_NUM_USER_FIRST ... GAUDI2_IRQ_NUM_USER_LAST:
return "gaudi2 user completion";
+ case GAUDI2_IRQ_NUM_EQ_ERROR:
+ return "gaudi2 eq error";
default:
return "invalid";
}
@@ -3677,7 +4619,6 @@ static void gaudi2_dec_disable_msix(struct hl_device *hdev, u32 max_irq_num)
static int gaudi2_dec_enable_msix(struct hl_device *hdev)
{
int rc, i, irq_init_cnt, irq, relative_idx;
- irq_handler_t irq_handler;
struct hl_dec *dec;
for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM, irq_init_cnt = 0;
@@ -3687,20 +4628,22 @@ static int gaudi2_dec_enable_msix(struct hl_device *hdev)
irq = pci_irq_vector(hdev->pdev, i);
relative_idx = i - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM;
- irq_handler = (relative_idx % 2) ?
- hl_irq_handler_dec_abnrm :
- hl_irq_handler_user_interrupt;
-
- dec = hdev->dec + relative_idx / 2;
-
/* We pass different structures depending on the irq handler. For the abnormal
* interrupt we pass hl_dec and for the regular interrupt we pass the relevant
* user_interrupt entry
+ *
+ * TODO: change the dec abnrm to threaded irq
*/
- rc = request_irq(irq, irq_handler, 0, gaudi2_irq_name(i),
- ((relative_idx % 2) ?
- (void *) dec :
- (void *) &hdev->user_interrupt[dec->core_id]));
+
+ dec = hdev->dec + relative_idx / 2;
+ if (relative_idx % 2) {
+ rc = request_irq(irq, hl_irq_handler_dec_abnrm, 0,
+ gaudi2_irq_name(i), (void *) dec);
+ } else {
+ rc = request_irq(irq, hl_irq_user_interrupt_handler, 0, gaudi2_irq_name(i),
+ (void *) &hdev->user_interrupt[dec->core_id]);
+ }
+
if (rc) {
dev_err(hdev->dev, "Failed to request IRQ %d", irq);
goto free_dec_irqs;
@@ -3719,12 +4662,13 @@ static int gaudi2_enable_msix(struct hl_device *hdev)
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct gaudi2_device *gaudi2 = hdev->asic_specific;
int rc, irq, i, j, user_irq_init_cnt;
- irq_handler_t irq_handler;
struct hl_cq *cq;
if (gaudi2->hw_cap_initialized & HW_CAP_MSIX)
return 0;
+ hl_init_cpu_for_irq(hdev);
+
rc = pci_alloc_irq_vectors(hdev->pdev, GAUDI2_MSIX_ENTRIES, GAUDI2_MSIX_ENTRIES,
PCI_IRQ_MSIX);
if (rc < 0) {
@@ -3755,20 +4699,47 @@ static int gaudi2_enable_msix(struct hl_device *hdev)
goto free_event_irq;
}
+ irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT);
+ rc = request_threaded_irq(irq, NULL, hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
+ gaudi2_irq_name(GAUDI2_IRQ_NUM_TPC_ASSERT),
+ &hdev->tpc_interrupt);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to request IRQ %d", irq);
+ goto free_dec_irq;
+ }
+
+ irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR);
+ rc = request_threaded_irq(irq, NULL, hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
+ gaudi2_irq_name(GAUDI2_IRQ_NUM_UNEXPECTED_ERROR),
+ &hdev->unexpected_error_interrupt);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to request IRQ %d", irq);
+ goto free_tpc_irq;
+ }
+
for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, user_irq_init_cnt = 0;
user_irq_init_cnt < prop->user_interrupt_count;
i++, j++, user_irq_init_cnt++) {
irq = pci_irq_vector(hdev->pdev, i);
- irq_handler = hl_irq_handler_user_interrupt;
-
- rc = request_irq(irq, irq_handler, 0, gaudi2_irq_name(i), &hdev->user_interrupt[j]);
+ hl_set_irq_affinity(hdev, irq);
+ rc = request_irq(irq, hl_irq_user_interrupt_handler, 0, gaudi2_irq_name(i),
+ &hdev->user_interrupt[j]);
if (rc) {
dev_err(hdev->dev, "Failed to request IRQ %d", irq);
goto free_user_irq;
}
}
+ irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EQ_ERROR);
+ rc = request_threaded_irq(irq, NULL, hl_irq_eq_error_interrupt_thread_handler,
+ IRQF_ONESHOT, gaudi2_irq_name(GAUDI2_IRQ_NUM_EQ_ERROR),
+ hdev);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to request IRQ %d", irq);
+ goto free_user_irq;
+ }
+
gaudi2->hw_cap_initialized |= HW_CAP_MSIX;
return 0;
@@ -3778,11 +4749,16 @@ free_user_irq:
i < GAUDI2_IRQ_NUM_USER_FIRST + user_irq_init_cnt ; i++, j++) {
irq = pci_irq_vector(hdev->pdev, i);
+ irq_set_affinity_and_hint(irq, NULL);
free_irq(irq, &hdev->user_interrupt[j]);
}
-
- gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM + 1);
-
+ irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR);
+ free_irq(irq, &hdev->unexpected_error_interrupt);
+free_tpc_irq:
+ irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT);
+ free_irq(irq, &hdev->tpc_interrupt);
+free_dec_irq:
+ gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_DEC_LAST + 1);
free_event_irq:
irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
free_irq(irq, cq);
@@ -3814,6 +4790,9 @@ static void gaudi2_sync_irqs(struct hl_device *hdev)
synchronize_irq(irq);
}
+ synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT));
+ synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR));
+
for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = 0 ; j < hdev->asic_prop.user_interrupt_count;
i++, j++) {
irq = pci_irq_vector(hdev->pdev, i);
@@ -3821,6 +4800,7 @@ static void gaudi2_sync_irqs(struct hl_device *hdev)
}
synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE));
+ synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EQ_ERROR));
}
static void gaudi2_disable_msix(struct hl_device *hdev)
@@ -3840,10 +4820,17 @@ static void gaudi2_disable_msix(struct hl_device *hdev)
gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM + 1);
+ irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT);
+ free_irq(irq, &hdev->tpc_interrupt);
+
+ irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR);
+ free_irq(irq, &hdev->unexpected_error_interrupt);
+
for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, k = 0;
k < hdev->asic_prop.user_interrupt_count ; i++, j++, k++) {
irq = pci_irq_vector(hdev->pdev, i);
+ irq_set_affinity_and_hint(irq, NULL);
free_irq(irq, &hdev->user_interrupt[j]);
}
@@ -3851,6 +4838,9 @@ static void gaudi2_disable_msix(struct hl_device *hdev)
cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION];
free_irq(irq, cq);
+ irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EQ_ERROR);
+ free_irq(irq, hdev);
+
pci_free_irq_vectors(hdev->pdev);
gaudi2->hw_cap_initialized &= ~HW_CAP_MSIX;
@@ -4037,7 +5027,6 @@ static int gaudi2_set_engine_cores(struct hl_device *hdev, u32 *core_ids,
{
int i, rc;
-
for (i = 0 ; i < num_cores ; i++) {
if (gaudi2_is_arc_enabled(hdev, core_ids[i]))
gaudi2_set_arc_running_mode(hdev, core_ids[i], core_command);
@@ -4059,6 +5048,139 @@ static int gaudi2_set_engine_cores(struct hl_device *hdev, u32 *core_ids,
return 0;
}
+static int gaudi2_set_tpc_engine_mode(struct hl_device *hdev, u32 engine_id, u32 engine_command)
+{
+ struct gaudi2_device *gaudi2 = hdev->asic_specific;
+ u32 reg_base, reg_addr, reg_val, tpc_id;
+
+ if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
+ return 0;
+
+ tpc_id = gaudi2_tpc_engine_id_to_tpc_id[engine_id];
+ if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + tpc_id)))
+ return 0;
+
+ reg_base = gaudi2_tpc_cfg_blocks_bases[tpc_id];
+ reg_addr = reg_base + TPC_CFG_STALL_OFFSET;
+ reg_val = FIELD_PREP(DCORE0_TPC0_CFG_TPC_STALL_V_MASK,
+ (engine_command == HL_ENGINE_STALL) ? 1 : 0);
+ WREG32(reg_addr, reg_val);
+
+ if (engine_command == HL_ENGINE_RESUME) {
+ reg_base = gaudi2_tpc_eml_cfg_blocks_bases[tpc_id];
+ reg_addr = reg_base + TPC_EML_CFG_DBG_CNT_OFFSET;
+ RMWREG32(reg_addr, 0x1, DCORE0_TPC0_EML_CFG_DBG_CNT_DBG_EXIT_MASK);
+ }
+
+ return 0;
+}
+
+static int gaudi2_set_mme_engine_mode(struct hl_device *hdev, u32 engine_id, u32 engine_command)
+{
+ struct gaudi2_device *gaudi2 = hdev->asic_specific;
+ u32 reg_base, reg_addr, reg_val, mme_id;
+
+ mme_id = gaudi2_mme_engine_id_to_mme_id[engine_id];
+ if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + mme_id)))
+ return 0;
+
+ reg_base = gaudi2_mme_ctrl_lo_blocks_bases[mme_id];
+ reg_addr = reg_base + MME_CTRL_LO_QM_STALL_OFFSET;
+ reg_val = FIELD_PREP(DCORE0_MME_CTRL_LO_QM_STALL_V_MASK,
+ (engine_command == HL_ENGINE_STALL) ? 1 : 0);
+ WREG32(reg_addr, reg_val);
+
+ return 0;
+}
+
+static int gaudi2_set_edma_engine_mode(struct hl_device *hdev, u32 engine_id, u32 engine_command)
+{
+ struct gaudi2_device *gaudi2 = hdev->asic_specific;
+ u32 reg_base, reg_addr, reg_val, edma_id;
+
+ if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
+ return 0;
+
+ edma_id = gaudi2_edma_engine_id_to_edma_id[engine_id];
+ if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + edma_id)))
+ return 0;
+
+ reg_base = gaudi2_dma_core_blocks_bases[edma_id];
+ reg_addr = reg_base + EDMA_CORE_CFG_STALL_OFFSET;
+ reg_val = FIELD_PREP(DCORE0_EDMA0_CORE_CFG_1_HALT_MASK,
+ (engine_command == HL_ENGINE_STALL) ? 1 : 0);
+ WREG32(reg_addr, reg_val);
+
+ if (engine_command == HL_ENGINE_STALL) {
+ reg_val = FIELD_PREP(DCORE0_EDMA0_CORE_CFG_1_HALT_MASK, 0x1) |
+ FIELD_PREP(DCORE0_EDMA0_CORE_CFG_1_FLUSH_MASK, 0x1);
+ WREG32(reg_addr, reg_val);
+ }
+
+ return 0;
+}
+
+static int gaudi2_set_engine_modes(struct hl_device *hdev,
+ u32 *engine_ids, u32 num_engines, u32 engine_command)
+{
+ int i, rc;
+
+ for (i = 0 ; i < num_engines ; ++i) {
+ switch (engine_ids[i]) {
+ case GAUDI2_DCORE0_ENGINE_ID_TPC_0 ... GAUDI2_DCORE0_ENGINE_ID_TPC_5:
+ case GAUDI2_DCORE1_ENGINE_ID_TPC_0 ... GAUDI2_DCORE1_ENGINE_ID_TPC_5:
+ case GAUDI2_DCORE2_ENGINE_ID_TPC_0 ... GAUDI2_DCORE2_ENGINE_ID_TPC_5:
+ case GAUDI2_DCORE3_ENGINE_ID_TPC_0 ... GAUDI2_DCORE3_ENGINE_ID_TPC_5:
+ rc = gaudi2_set_tpc_engine_mode(hdev, engine_ids[i], engine_command);
+ if (rc)
+ return rc;
+
+ break;
+ case GAUDI2_DCORE0_ENGINE_ID_MME:
+ case GAUDI2_DCORE1_ENGINE_ID_MME:
+ case GAUDI2_DCORE2_ENGINE_ID_MME:
+ case GAUDI2_DCORE3_ENGINE_ID_MME:
+ rc = gaudi2_set_mme_engine_mode(hdev, engine_ids[i], engine_command);
+ if (rc)
+ return rc;
+
+ break;
+ case GAUDI2_DCORE0_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE0_ENGINE_ID_EDMA_1:
+ case GAUDI2_DCORE1_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE1_ENGINE_ID_EDMA_1:
+ case GAUDI2_DCORE2_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE2_ENGINE_ID_EDMA_1:
+ case GAUDI2_DCORE3_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE3_ENGINE_ID_EDMA_1:
+ rc = gaudi2_set_edma_engine_mode(hdev, engine_ids[i], engine_command);
+ if (rc)
+ return rc;
+
+ break;
+ default:
+ dev_err(hdev->dev, "Invalid engine ID %u\n", engine_ids[i]);
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static int gaudi2_set_engines(struct hl_device *hdev, u32 *engine_ids,
+ u32 num_engines, u32 engine_command)
+{
+ switch (engine_command) {
+ case HL_ENGINE_CORE_HALT:
+ case HL_ENGINE_CORE_RUN:
+ return gaudi2_set_engine_cores(hdev, engine_ids, num_engines, engine_command);
+
+ case HL_ENGINE_STALL:
+ case HL_ENGINE_RESUME:
+ return gaudi2_set_engine_modes(hdev, engine_ids, num_engines, engine_command);
+
+ default:
+ dev_err(hdev->dev, "failed to execute command id %u\n", engine_command);
+ return -EINVAL;
+ }
+}
+
static void gaudi2_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
{
u32 wait_timeout_ms;
@@ -4068,7 +5190,7 @@ static void gaudi2_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw
else
wait_timeout_ms = GAUDI2_RESET_WAIT_MSEC;
- if (fw_reset)
+ if (fw_reset || hdev->cpld_shutdown)
goto skip_engines;
gaudi2_stop_dma_qmans(hdev);
@@ -4121,6 +5243,8 @@ static void gaudi2_init_firmware_preload_params(struct hl_device *hdev)
pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
pre_fw_load->wait_for_preboot_timeout = GAUDI2_PREBOOT_REQ_TIMEOUT_USEC;
+ pre_fw_load->wait_for_preboot_extended_timeout =
+ GAUDI2_PREBOOT_EXTENDED_REQ_TIMEOUT_USEC;
}
static void gaudi2_init_firmware_loader(struct hl_device *hdev)
@@ -4251,10 +5375,17 @@ static void gaudi2_init_qman_pq(struct hl_device *hdev, u32 reg_base,
q = &hdev->kernel_queues[queue_id_base + pq_id];
pq_offset = pq_id * 4;
- WREG32(reg_base + QM_PQ_BASE_LO_0_OFFSET + pq_offset,
- lower_32_bits(q->bus_address));
- WREG32(reg_base + QM_PQ_BASE_HI_0_OFFSET + pq_offset,
- upper_32_bits(q->bus_address));
+ if (q->dram_bd) {
+ WREG32(reg_base + QM_PQ_BASE_LO_0_OFFSET + pq_offset,
+ lower_32_bits(q->pq_dram_address));
+ WREG32(reg_base + QM_PQ_BASE_HI_0_OFFSET + pq_offset,
+ upper_32_bits(q->pq_dram_address));
+ } else {
+ WREG32(reg_base + QM_PQ_BASE_LO_0_OFFSET + pq_offset,
+ lower_32_bits(q->bus_address));
+ WREG32(reg_base + QM_PQ_BASE_HI_0_OFFSET + pq_offset,
+ upper_32_bits(q->bus_address));
+ }
WREG32(reg_base + QM_PQ_SIZE_0_OFFSET + pq_offset, ilog2(HL_QUEUE_LENGTH));
WREG32(reg_base + QM_PQ_PI_0_OFFSET + pq_offset, 0);
WREG32(reg_base + QM_PQ_CI_0_OFFSET + pq_offset, 0);
@@ -5141,7 +6272,8 @@ static int gaudi2_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_har
return rc;
}
-static int gaudi2_mmu_update_hop0_addr(struct hl_device *hdev, u32 stlb_base)
+static int gaudi2_mmu_update_hop0_addr(struct hl_device *hdev, u32 stlb_base,
+ bool host_resident_pgt)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
u64 hop0_addr;
@@ -5153,7 +6285,11 @@ static int gaudi2_mmu_update_hop0_addr(struct hl_device *hdev, u32 stlb_base)
max_asid = min((u32) 8, max_asid);
for (asid = 0 ; asid < max_asid ; asid++) {
- hop0_addr = hdev->mmu_priv.hr.mmu_asid_hop0[asid].phys_addr;
+ if (host_resident_pgt)
+ hop0_addr = hdev->mmu_priv.hr.mmu_asid_hop0[asid].phys_addr;
+ else
+ hop0_addr = prop->mmu_pgt_addr + (asid * prop->dmmu.hop_table_size);
+
rc = gaudi2_mmu_update_asid_hop0_addr(hdev, stlb_base, asid, hop0_addr);
if (rc) {
dev_err(hdev->dev, "failed to set hop0 addr for asid %d\n", asid);
@@ -5164,7 +6300,8 @@ static int gaudi2_mmu_update_hop0_addr(struct hl_device *hdev, u32 stlb_base)
return 0;
}
-static int gaudi2_mmu_init_common(struct hl_device *hdev, u32 mmu_base, u32 stlb_base)
+static int gaudi2_mmu_init_common(struct hl_device *hdev, u32 mmu_base, u32 stlb_base,
+ bool host_resident_pgt)
{
u32 status, timeout_usec;
int rc;
@@ -5187,7 +6324,7 @@ static int gaudi2_mmu_init_common(struct hl_device *hdev, u32 mmu_base, u32 stlb
if (rc)
dev_notice_ratelimited(hdev->dev, "Timeout when waiting for MMU SRAM init\n");
- rc = gaudi2_mmu_update_hop0_addr(hdev, stlb_base);
+ rc = gaudi2_mmu_update_hop0_addr(hdev, stlb_base, host_resident_pgt);
if (rc)
return rc;
@@ -5211,6 +6348,7 @@ static int gaudi2_mmu_init_common(struct hl_device *hdev, u32 mmu_base, u32 stlb
static int gaudi2_pci_mmu_init(struct hl_device *hdev)
{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
struct gaudi2_device *gaudi2 = hdev->asic_specific;
u32 mmu_base, stlb_base;
int rc;
@@ -5250,7 +6388,7 @@ static int gaudi2_pci_mmu_init(struct hl_device *hdev)
WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_PMMU_SPI_SEI_ENABLE_MASK);
- rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base);
+ rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base, prop->pmmu.host_resident);
if (rc)
return rc;
@@ -5302,7 +6440,7 @@ static int gaudi2_dcore_hmmu_init(struct hl_device *hdev, int dcore_id,
WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_HMMU_SPI_SEI_ENABLE_MASK);
- rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base);
+ rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base, prop->dmmu.host_resident);
if (rc)
return rc;
@@ -5509,11 +6647,10 @@ static void gaudi2_send_hard_reset_cmd(struct hl_device *hdev)
* gaudi2_execute_hard_reset - execute hard reset by driver/FW
*
* @hdev: pointer to the habanalabs device structure
- * @reset_sleep_ms: sleep time in msec after reset
*
* This function executes hard reset based on if driver/FW should do the reset
*/
-static void gaudi2_execute_hard_reset(struct hl_device *hdev, u32 reset_sleep_ms)
+static void gaudi2_execute_hard_reset(struct hl_device *hdev)
{
if (hdev->asic_prop.hard_reset_done_by_fw) {
gaudi2_send_hard_reset_cmd(hdev);
@@ -5535,27 +6672,16 @@ static void gaudi2_execute_hard_reset(struct hl_device *hdev, u32 reset_sleep_ms
* gaudi2_execute_soft_reset - execute soft reset by driver/FW
*
* @hdev: pointer to the habanalabs device structure
- * @reset_sleep_ms: sleep time in msec after reset
* @driver_performs_reset: true if driver should perform reset instead of f/w.
+ * @poll_timeout_us: time to wait for response from f/w.
*
* This function executes soft reset based on if driver/FW should do the reset
*/
-static void gaudi2_execute_soft_reset(struct hl_device *hdev, u32 reset_sleep_ms,
- bool driver_performs_reset)
+static int gaudi2_execute_soft_reset(struct hl_device *hdev, bool driver_performs_reset,
+ u32 poll_timeout_us)
{
- struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
-
- if (!driver_performs_reset) {
- /* set SP to indicate reset request sent to FW */
- if (dyn_regs->cpu_rst_status)
- WREG32(le32_to_cpu(dyn_regs->cpu_rst_status), CPU_RST_STATUS_NA);
- else
- WREG32(mmCPU_RST_STATUS_TO_HOST, CPU_RST_STATUS_NA);
-
- WREG32(le32_to_cpu(dyn_regs->gic_host_soft_rst_irq),
- gaudi2_irq_map_table[GAUDI2_EVENT_CPU_SOFT_RESET].cpu_id);
- return;
- }
+ if (!driver_performs_reset)
+ return hl_fw_send_soft_reset(hdev);
/* Block access to engines, QMANs and SM during reset, these
* RRs will be reconfigured after soft reset.
@@ -5569,17 +6695,14 @@ static void gaudi2_execute_soft_reset(struct hl_device *hdev, u32 reset_sleep_ms
mmPCIE_VDEC1_MSTR_IF_RR_SHRD_HBW_BASE + HL_BLOCK_SIZE);
WREG32(mmPSOC_RESET_CONF_SOFT_RST, 1);
+ return 0;
}
-static void gaudi2_poll_btm_indication(struct hl_device *hdev, u32 reset_sleep_ms,
- u32 poll_timeout_us)
+static void gaudi2_poll_btm_indication(struct hl_device *hdev, u32 poll_timeout_us)
{
int i, rc = 0;
u32 reg_val;
- /* without this sleep reset will not work */
- msleep(reset_sleep_ms);
-
/* We poll the BTM done indication multiple times after reset due to
* a HW errata 'GAUDI2_0300'
*/
@@ -5596,30 +6719,12 @@ static void gaudi2_poll_btm_indication(struct hl_device *hdev, u32 reset_sleep_m
dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", reg_val);
}
-static void gaudi2_get_soft_rst_done_indication(struct hl_device *hdev, u32 poll_timeout_us)
-{
- int i, rc = 0;
- u32 reg_val;
-
- for (i = 0 ; i < GAUDI2_RESET_POLL_CNT ; i++)
- rc = hl_poll_timeout(
- hdev,
- mmCPU_RST_STATUS_TO_HOST,
- reg_val,
- reg_val == CPU_RST_STATUS_SOFT_RST_DONE,
- 1000,
- poll_timeout_us);
-
- if (rc)
- dev_err(hdev->dev, "Timeout while waiting for FW to complete soft reset (0x%x)\n",
- reg_val);
-}
-
-static void gaudi2_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
+static int gaudi2_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
{
struct gaudi2_device *gaudi2 = hdev->asic_specific;
u32 poll_timeout_us, reset_sleep_ms;
bool driver_performs_reset = false;
+ int rc;
if (hdev->pldm) {
reset_sleep_ms = hard_reset ? GAUDI2_PLDM_HRESET_TIMEOUT_MSEC :
@@ -5637,7 +6742,7 @@ static void gaudi2_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_rese
if (hard_reset) {
driver_performs_reset = !hdev->asic_prop.hard_reset_done_by_fw;
- gaudi2_execute_hard_reset(hdev, reset_sleep_ms);
+ gaudi2_execute_hard_reset(hdev);
} else {
/*
* As we have to support also work with preboot only (which does not supports
@@ -5647,11 +6752,13 @@ static void gaudi2_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_rese
*/
driver_performs_reset = (hdev->fw_components == FW_TYPE_PREBOOT_CPU &&
!hdev->asic_prop.fw_security_enabled);
- gaudi2_execute_soft_reset(hdev, reset_sleep_ms, driver_performs_reset);
+ rc = gaudi2_execute_soft_reset(hdev, driver_performs_reset, poll_timeout_us);
+ if (rc)
+ return rc;
}
skip_reset:
- if (driver_performs_reset || hard_reset)
+ if (driver_performs_reset || hard_reset) {
/*
* Instead of waiting for BTM indication we should wait for preboot ready:
* Consider the below scenario:
@@ -5671,17 +6778,18 @@ skip_reset:
* communicate with FW that is during reset.
* to overcome this we will always wait to preboot ready indication
*/
- if ((hdev->fw_components & FW_TYPE_PREBOOT_CPU)) {
- msleep(reset_sleep_ms);
+
+ /* without this sleep reset will not work */
+ msleep(reset_sleep_ms);
+
+ if (hdev->fw_components & FW_TYPE_PREBOOT_CPU)
hl_fw_wait_preboot_ready(hdev);
- } else {
- gaudi2_poll_btm_indication(hdev, reset_sleep_ms, poll_timeout_us);
- }
- else
- gaudi2_get_soft_rst_done_indication(hdev, poll_timeout_us);
+ else
+ gaudi2_poll_btm_indication(hdev, poll_timeout_us);
+ }
if (!gaudi2)
- return;
+ return 0;
gaudi2->dec_hw_cap_initialized &= ~(HW_CAP_DEC_MASK);
gaudi2->tpc_hw_cap_initialized &= ~(HW_CAP_TPC_MASK);
@@ -5708,17 +6816,12 @@ skip_reset:
HW_CAP_PDMA_MASK | HW_CAP_EDMA_MASK | HW_CAP_MME_MASK |
HW_CAP_ROT_MASK);
}
+ return 0;
}
static int gaudi2_suspend(struct hl_device *hdev)
{
- int rc;
-
- rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
- if (rc)
- dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
-
- return rc;
+ return hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
}
static int gaudi2_resume(struct hl_device *hdev)
@@ -5731,10 +6834,17 @@ static int gaudi2_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
{
int rc;
- vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
- VM_DONTCOPY | VM_NORESERVE;
+ vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
+ VM_DONTCOPY | VM_NORESERVE);
#ifdef _HAS_DMA_MMAP_COHERENT
+ /*
+ * If dma_alloc_coherent() returns a vmalloc address, set VM_MIXEDMAP
+ * so vm_insert_page() can handle it safely. Without this, the kernel
+ * may BUG_ON due to VM_PFNMAP.
+ */
+ if (is_vmalloc_addr(cpu_addr))
+ vm_flags_set(vma, VM_MIXEDMAP);
rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, dma_addr, size);
if (rc)
@@ -6019,31 +7129,14 @@ static void gaudi2_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t s
hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
}
-static dma_addr_t gaudi2_dma_map_single(struct hl_device *hdev, void *addr, int len,
- enum dma_data_direction dir)
-{
- dma_addr_t dma_addr;
-
- dma_addr = dma_map_single(&hdev->pdev->dev, addr, len, dir);
- if (unlikely(dma_mapping_error(&hdev->pdev->dev, dma_addr)))
- return 0;
-
- return dma_addr;
-}
-
-static void gaudi2_dma_unmap_single(struct hl_device *hdev, dma_addr_t addr, int len,
- enum dma_data_direction dir)
-{
- dma_unmap_single(&hdev->pdev->dev, addr, len, dir);
-}
-
static int gaudi2_validate_cb_address(struct hl_device *hdev, struct hl_cs_parser *parser)
{
struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
struct gaudi2_device *gaudi2 = hdev->asic_specific;
if (!gaudi2_is_queue_enabled(hdev, parser->hw_queue_id)) {
- dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id);
+ dev_err(hdev->dev, "h/w queue %s is disabled\n",
+ GAUDI2_QUEUE_ID_TO_STR(parser->hw_queue_id));
return -EINVAL;
}
@@ -6259,28 +7352,29 @@ static void gaudi2_qman_set_test_mode(struct hl_device *hdev, u32 hw_queue_id, b
}
}
-static int gaudi2_test_queue(struct hl_device *hdev, u32 hw_queue_id)
+static inline u32 gaudi2_test_queue_hw_queue_id_to_sob_id(struct hl_device *hdev, u32 hw_queue_id)
+{
+ return hdev->asic_prop.first_available_user_sob[0] +
+ hw_queue_id - GAUDI2_QUEUE_ID_PDMA_0_0;
+}
+
+static void gaudi2_test_queue_clear(struct hl_device *hdev, u32 hw_queue_id)
{
- u32 sob_offset = hdev->asic_prop.first_available_user_sob[0] * 4;
+ u32 sob_offset = gaudi2_test_queue_hw_queue_id_to_sob_id(hdev, hw_queue_id) * 4;
u32 sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
- u32 timeout_usec, tmp, sob_base = 1, sob_val = 0x5a5a;
- struct packet_msg_short *msg_short_pkt;
- dma_addr_t pkt_dma_addr;
- size_t pkt_size;
- int rc;
- if (hdev->pldm)
- timeout_usec = GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC;
- else
- timeout_usec = GAUDI2_TEST_QUEUE_WAIT_USEC;
+ /* Reset the SOB value */
+ WREG32(sob_addr, 0);
+}
- pkt_size = sizeof(*msg_short_pkt);
- msg_short_pkt = hl_asic_dma_pool_zalloc(hdev, pkt_size, GFP_KERNEL, &pkt_dma_addr);
- if (!msg_short_pkt) {
- dev_err(hdev->dev, "Failed to allocate packet for H/W queue %d testing\n",
- hw_queue_id);
- return -ENOMEM;
- }
+static int gaudi2_test_queue_send_msg_short(struct hl_device *hdev, u32 hw_queue_id, u32 sob_val,
+ struct gaudi2_queues_test_info *msg_info)
+{
+ u32 sob_offset = gaudi2_test_queue_hw_queue_id_to_sob_id(hdev, hw_queue_id) * 4;
+ u32 tmp, sob_base = 1;
+ struct packet_msg_short *msg_short_pkt = msg_info->kern_addr;
+ size_t pkt_size = sizeof(struct packet_msg_short);
+ int rc;
tmp = (PACKET_MSG_SHORT << GAUDI2_PKT_CTL_OPCODE_SHIFT) |
(1 << GAUDI2_PKT_CTL_EB_SHIFT) |
@@ -6291,15 +7385,26 @@ static int gaudi2_test_queue(struct hl_device *hdev, u32 hw_queue_id)
msg_short_pkt->value = cpu_to_le32(sob_val);
msg_short_pkt->ctl = cpu_to_le32(tmp);
- /* Reset the SOB value */
- WREG32(sob_addr, 0);
+ rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, msg_info->dma_addr);
+ if (rc)
+ dev_err(hdev->dev,
+ "Failed to send msg_short packet to H/W queue %s\n",
+ GAUDI2_QUEUE_ID_TO_STR(hw_queue_id));
- rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, pkt_dma_addr);
- if (rc) {
- dev_err(hdev->dev, "Failed to send msg_short packet to H/W queue %d\n",
- hw_queue_id);
- goto free_pkt;
- }
+ return rc;
+}
+
+static int gaudi2_test_queue_wait_completion(struct hl_device *hdev, u32 hw_queue_id, u32 sob_val)
+{
+ u32 sob_offset = gaudi2_test_queue_hw_queue_id_to_sob_id(hdev, hw_queue_id) * 4;
+ u32 sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
+ u32 timeout_usec, tmp;
+ int rc;
+
+ if (hdev->pldm)
+ timeout_usec = GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC;
+ else
+ timeout_usec = GAUDI2_TEST_QUEUE_WAIT_USEC;
rc = hl_poll_timeout(
hdev,
@@ -6310,16 +7415,11 @@ static int gaudi2_test_queue(struct hl_device *hdev, u32 hw_queue_id)
timeout_usec);
if (rc == -ETIMEDOUT) {
- dev_err(hdev->dev, "H/W queue %d test failed (SOB_OBJ_0 == 0x%x)\n",
- hw_queue_id, tmp);
+ dev_err(hdev->dev, "H/W queue %s test failed (SOB_OBJ_0 == 0x%x)\n",
+ GAUDI2_QUEUE_ID_TO_STR(hw_queue_id), tmp);
rc = -EIO;
}
- /* Reset the SOB value */
- WREG32(sob_addr, 0);
-
-free_pkt:
- hl_asic_dma_pool_free(hdev, (void *) msg_short_pkt, pkt_dma_addr);
return rc;
}
@@ -6339,42 +7439,60 @@ static int gaudi2_test_cpu_queue(struct hl_device *hdev)
static int gaudi2_test_queues(struct hl_device *hdev)
{
- int i, rc, ret_val = 0;
+ struct gaudi2_device *gaudi2 = hdev->asic_specific;
+ struct gaudi2_queues_test_info *msg_info;
+ u32 sob_val = 0x5a5a;
+ int i, rc;
+ /* send test message on all enabled Qs */
for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ; i++) {
- if (!gaudi2_is_queue_enabled(hdev, i))
+ if (!gaudi2_is_queue_enabled(hdev, i) || gaudi2_is_edma_queue_id(i))
continue;
+ msg_info = &gaudi2->queues_test_info[i - GAUDI2_QUEUE_ID_PDMA_0_0];
gaudi2_qman_set_test_mode(hdev, i, true);
- rc = gaudi2_test_queue(hdev, i);
- gaudi2_qman_set_test_mode(hdev, i, false);
-
- if (rc) {
- ret_val = -EINVAL;
+ gaudi2_test_queue_clear(hdev, i);
+ rc = gaudi2_test_queue_send_msg_short(hdev, i, sob_val, msg_info);
+ if (rc)
goto done;
- }
}
rc = gaudi2_test_cpu_queue(hdev);
- if (rc) {
- ret_val = -EINVAL;
+ if (rc)
goto done;
+
+ /* verify that all messages were processed */
+ for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ; i++) {
+ if (!gaudi2_is_queue_enabled(hdev, i) || gaudi2_is_edma_queue_id(i))
+ continue;
+
+ rc = gaudi2_test_queue_wait_completion(hdev, i, sob_val);
+ if (rc)
+ /* chip is not usable, no need for cleanups, just bail-out with error */
+ goto done;
+
+ gaudi2_test_queue_clear(hdev, i);
+ gaudi2_qman_set_test_mode(hdev, i, false);
}
done:
- return ret_val;
+ return rc;
}
static int gaudi2_compute_reset_late_init(struct hl_device *hdev)
{
struct gaudi2_device *gaudi2 = hdev->asic_specific;
size_t irq_arr_size;
+ int rc;
- /* TODO: missing gaudi2_nic_resume.
- * Until implemented nic_hw_cap_initialized will remain zeroed
- */
gaudi2_init_arcs(hdev);
- gaudi2_scrub_arcs_dccm(hdev);
+
+ rc = gaudi2_scrub_arcs_dccm(hdev);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to scrub arcs DCCM\n");
+ return rc;
+ }
+
gaudi2_init_security(hdev);
/* Unmask all IRQs since some could have been received during the soft reset */
@@ -6382,74 +7500,21 @@ static int gaudi2_compute_reset_late_init(struct hl_device *hdev)
return hl_fw_unmask_irq_arr(hdev, gaudi2->hw_events, irq_arr_size);
}
-static void gaudi2_is_tpc_engine_idle(struct hl_device *hdev, int dcore, int inst, u32 offset,
- struct iterate_module_ctx *ctx)
+static bool gaudi2_get_edma_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
+ struct engines_data *e)
{
- struct gaudi2_tpc_idle_data *idle_data = ctx->data;
- u32 tpc_cfg_sts, qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts;
- bool is_eng_idle;
- int engine_idx;
-
- if ((dcore == 0) && (inst == (NUM_DCORE0_TPC - 1)))
- engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_6;
- else
- engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_0 +
- dcore * GAUDI2_ENGINE_ID_DCORE_OFFSET + inst;
-
- tpc_cfg_sts = RREG32(mmDCORE0_TPC0_CFG_STATUS + offset);
- qm_glbl_sts0 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS0 + offset);
- qm_glbl_sts1 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS1 + offset);
- qm_cgm_sts = RREG32(mmDCORE0_TPC0_QM_CGM_STS + offset);
-
- is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
- IS_TPC_IDLE(tpc_cfg_sts);
- *(idle_data->is_idle) &= is_eng_idle;
-
- if (idle_data->mask && !is_eng_idle)
- set_bit(engine_idx, idle_data->mask);
-
- if (idle_data->e)
- hl_engine_data_sprintf(idle_data->e,
- idle_data->tpc_fmt, dcore, inst,
- is_eng_idle ? "Y" : "N",
- qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
-}
-
-static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
- struct engines_data *e)
-{
- u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, dma_core_idle_ind_mask,
- mme_arch_sts, dec_swreg15, dec_enabled_bit;
+ u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, dma_core_sts0, dma_core_sts1;
struct asic_fixed_properties *prop = &hdev->asic_prop;
- const char *rot_fmt = "%-6d%-5d%-9s%#-14x%#-12x%s\n";
unsigned long *mask = (unsigned long *) mask_arr;
- const char *edma_fmt = "%-6d%-6d%-9s%#-14x%#x\n";
- const char *mme_fmt = "%-5d%-6s%-9s%#-14x%#x\n";
- const char *nic_fmt = "%-5d%-9s%#-14x%#-12x\n";
- const char *pdma_fmt = "%-6d%-9s%#-14x%#x\n";
- const char *pcie_dec_fmt = "%-10d%-9s%#x\n";
- const char *dec_fmt = "%-6d%-5d%-9s%#x\n";
+ const char *edma_fmt = "%-6d%-6d%-9s%#-14x%#-15x%#x\n";
bool is_idle = true, is_eng_idle;
- u64 offset;
-
- struct gaudi2_tpc_idle_data tpc_idle_data = {
- .tpc_fmt = "%-6d%-5d%-9s%#-14x%#-12x%#x\n",
- .e = e,
- .mask = mask,
- .is_idle = &is_idle,
- };
- struct iterate_module_ctx tpc_iter = {
- .fn = &gaudi2_is_tpc_engine_idle,
- .data = &tpc_idle_data,
- };
-
int engine_idx, i, j;
+ u64 offset;
- /* EDMA, Two engines per Dcore */
if (e)
hl_engine_data_sprintf(e,
- "\nCORE EDMA is_idle QM_GLBL_STS0 DMA_CORE_IDLE_IND_MASK\n"
- "---- ---- ------- ------------ ----------------------\n");
+ "\nCORE EDMA is_idle QM_GLBL_STS0 DMA_CORE_STS0 DMA_CORE_STS1\n"
+ "---- ---- ------- ------------ ------------- -------------\n");
for (i = 0; i < NUM_OF_DCORES; i++) {
for (j = 0 ; j < NUM_OF_EDMA_PER_DCORE ; j++) {
@@ -6462,45 +7527,56 @@ static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask
i * GAUDI2_ENGINE_ID_DCORE_OFFSET + j;
offset = i * DCORE_OFFSET + j * DCORE_EDMA_OFFSET;
- dma_core_idle_ind_mask =
- RREG32(mmDCORE0_EDMA0_CORE_IDLE_IND_MASK + offset);
+ dma_core_sts0 = RREG32(mmDCORE0_EDMA0_CORE_STS0 + offset);
+ dma_core_sts1 = RREG32(mmDCORE0_EDMA0_CORE_STS1 + offset);
qm_glbl_sts0 = RREG32(mmDCORE0_EDMA0_QM_GLBL_STS0 + offset);
qm_glbl_sts1 = RREG32(mmDCORE0_EDMA0_QM_GLBL_STS1 + offset);
qm_cgm_sts = RREG32(mmDCORE0_EDMA0_QM_CGM_STS + offset);
is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
- IS_DMA_IDLE(dma_core_idle_ind_mask);
+ IS_DMA_IDLE(dma_core_sts0) && !IS_DMA_HALTED(dma_core_sts1);
is_idle &= is_eng_idle;
if (mask && !is_eng_idle)
set_bit(engine_idx, mask);
if (e)
- hl_engine_data_sprintf(e, edma_fmt, i, j,
- is_eng_idle ? "Y" : "N",
- qm_glbl_sts0,
- dma_core_idle_ind_mask);
+ hl_engine_data_sprintf(e, edma_fmt, i, j, is_eng_idle ? "Y" : "N",
+ qm_glbl_sts0, dma_core_sts0, dma_core_sts1);
}
}
- /* PDMA, Two engines in Full chip */
+ return is_idle;
+}
+
+static bool gaudi2_get_pdma_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
+ struct engines_data *e)
+{
+ u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, dma_core_sts0, dma_core_sts1;
+ unsigned long *mask = (unsigned long *) mask_arr;
+ const char *pdma_fmt = "%-6d%-9s%#-14x%#-15x%#x\n";
+ bool is_idle = true, is_eng_idle;
+ int engine_idx, i;
+ u64 offset;
+
if (e)
hl_engine_data_sprintf(e,
- "\nPDMA is_idle QM_GLBL_STS0 DMA_CORE_IDLE_IND_MASK\n"
- "---- ------- ------------ ----------------------\n");
+ "\nPDMA is_idle QM_GLBL_STS0 DMA_CORE_STS0 DMA_CORE_STS1\n"
+ "---- ------- ------------ ------------- -------------\n");
for (i = 0 ; i < NUM_OF_PDMA ; i++) {
engine_idx = GAUDI2_ENGINE_ID_PDMA_0 + i;
offset = i * PDMA_OFFSET;
- dma_core_idle_ind_mask = RREG32(mmPDMA0_CORE_IDLE_IND_MASK + offset);
+ dma_core_sts0 = RREG32(mmPDMA0_CORE_STS0 + offset);
+ dma_core_sts1 = RREG32(mmPDMA0_CORE_STS1 + offset);
qm_glbl_sts0 = RREG32(mmPDMA0_QM_GLBL_STS0 + offset);
qm_glbl_sts1 = RREG32(mmPDMA0_QM_GLBL_STS1 + offset);
qm_cgm_sts = RREG32(mmPDMA0_QM_CGM_STS + offset);
is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
- IS_DMA_IDLE(dma_core_idle_ind_mask);
+ IS_DMA_IDLE(dma_core_sts0) && !IS_DMA_HALTED(dma_core_sts1);
is_idle &= is_eng_idle;
if (mask && !is_eng_idle)
@@ -6508,9 +7584,22 @@ static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask
if (e)
hl_engine_data_sprintf(e, pdma_fmt, i, is_eng_idle ? "Y" : "N",
- qm_glbl_sts0, dma_core_idle_ind_mask);
+ qm_glbl_sts0, dma_core_sts0, dma_core_sts1);
}
+ return is_idle;
+}
+
+static bool gaudi2_get_nic_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
+ struct engines_data *e)
+{
+ unsigned long *mask = (unsigned long *) mask_arr;
+ const char *nic_fmt = "%-5d%-9s%#-14x%#-12x\n";
+ u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts;
+ bool is_idle = true, is_eng_idle;
+ int engine_idx, i;
+ u64 offset = 0;
+
/* NIC, twelve macros in Full chip */
if (e && hdev->nic_ports_mask)
hl_engine_data_sprintf(e,
@@ -6544,6 +7633,19 @@ static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask
qm_glbl_sts0, qm_cgm_sts);
}
+ return is_idle;
+}
+
+static bool gaudi2_get_mme_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
+ struct engines_data *e)
+{
+ u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, mme_arch_sts;
+ unsigned long *mask = (unsigned long *) mask_arr;
+ const char *mme_fmt = "%-5d%-6s%-9s%#-14x%#x\n";
+ bool is_idle = true, is_eng_idle;
+ int engine_idx, i;
+ u64 offset;
+
if (e)
hl_engine_data_sprintf(e,
"\nMME Stub is_idle QM_GLBL_STS0 MME_ARCH_STATUS\n"
@@ -6574,16 +7676,82 @@ static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask
set_bit(engine_idx, mask);
}
- /*
- * TPC
- */
+ return is_idle;
+}
+
+static void gaudi2_is_tpc_engine_idle(struct hl_device *hdev, int dcore, int inst, u32 offset,
+ struct iterate_module_ctx *ctx)
+{
+ struct gaudi2_tpc_idle_data *idle_data = ctx->data;
+ u32 tpc_cfg_sts, qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts;
+ bool is_eng_idle;
+ int engine_idx;
+
+ if ((dcore == 0) && (inst == (NUM_DCORE0_TPC - 1)))
+ engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_6;
+ else
+ engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_0 +
+ dcore * GAUDI2_ENGINE_ID_DCORE_OFFSET + inst;
+
+ tpc_cfg_sts = RREG32(mmDCORE0_TPC0_CFG_STATUS + offset);
+ qm_glbl_sts0 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS0 + offset);
+ qm_glbl_sts1 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS1 + offset);
+ qm_cgm_sts = RREG32(mmDCORE0_TPC0_QM_CGM_STS + offset);
+
+ is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
+ IS_TPC_IDLE(tpc_cfg_sts);
+ *(idle_data->is_idle) &= is_eng_idle;
+
+ if (idle_data->mask && !is_eng_idle)
+ set_bit(engine_idx, idle_data->mask);
+
+ if (idle_data->e)
+ hl_engine_data_sprintf(idle_data->e,
+ idle_data->tpc_fmt, dcore, inst,
+ is_eng_idle ? "Y" : "N",
+ qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
+}
+
+static bool gaudi2_get_tpc_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
+ struct engines_data *e)
+{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ unsigned long *mask = (unsigned long *) mask_arr;
+ bool is_idle = true;
+
+ struct gaudi2_tpc_idle_data tpc_idle_data = {
+ .tpc_fmt = "%-6d%-5d%-9s%#-14x%#-12x%#x\n",
+ .e = e,
+ .mask = mask,
+ .is_idle = &is_idle,
+ };
+ struct iterate_module_ctx tpc_iter = {
+ .fn = &gaudi2_is_tpc_engine_idle,
+ .data = &tpc_idle_data,
+ };
+
if (e && prop->tpc_enabled_mask)
hl_engine_data_sprintf(e,
- "\nCORE TPC is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_IDLE_IND_MASK\n"
- "---- --- -------- ------------ ---------- ----------------------\n");
+ "\nCORE TPC is_idle QM_GLBL_STS0 QM_CGM_STS STATUS\n"
+ "---- --- ------- ------------ ---------- ------\n");
gaudi2_iterate_tpcs(hdev, &tpc_iter);
+ return *tpc_idle_data.is_idle;
+}
+
+static bool gaudi2_get_decoder_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
+ struct engines_data *e)
+{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ unsigned long *mask = (unsigned long *) mask_arr;
+ const char *pcie_dec_fmt = "%-10d%-9s%#x\n";
+ const char *dec_fmt = "%-6d%-5d%-9s%#x\n";
+ bool is_idle = true, is_eng_idle;
+ u32 dec_swreg15, dec_enabled_bit;
+ int engine_idx, i, j;
+ u64 offset;
+
/* Decoders, two each Dcore and two shared PCIe decoders */
if (e && (prop->decoder_enabled_mask & (~PCIE_DEC_EN_MASK)))
hl_engine_data_sprintf(e,
@@ -6638,10 +7806,23 @@ static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask
is_eng_idle ? "Y" : "N", dec_swreg15);
}
+ return is_idle;
+}
+
+static bool gaudi2_get_rotator_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
+ struct engines_data *e)
+{
+ const char *rot_fmt = "%-6d%-5d%-9s%#-14x%#-14x%#x\n";
+ unsigned long *mask = (unsigned long *) mask_arr;
+ u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts;
+ bool is_idle = true, is_eng_idle;
+ int engine_idx, i;
+ u64 offset;
+
if (e)
hl_engine_data_sprintf(e,
- "\nCORE ROT is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n"
- "---- ---- ------- ------------ ---------- -------------\n");
+ "\nCORE ROT is_idle QM_GLBL_STS0 QM_GLBL_STS1 QM_CGM_STS\n"
+ "---- --- ------- ------------ ------------ ----------\n");
for (i = 0 ; i < NUM_OF_ROT ; i++) {
engine_idx = GAUDI2_ENGINE_ID_ROT_0 + i;
@@ -6660,12 +7841,28 @@ static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask
if (e)
hl_engine_data_sprintf(e, rot_fmt, i, 0, is_eng_idle ? "Y" : "N",
- qm_glbl_sts0, qm_cgm_sts, "-");
+ qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
}
return is_idle;
}
+static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
+ struct engines_data *e)
+{
+ bool is_idle = true;
+
+ is_idle &= gaudi2_get_edma_idle_status(hdev, mask_arr, mask_len, e);
+ is_idle &= gaudi2_get_pdma_idle_status(hdev, mask_arr, mask_len, e);
+ is_idle &= gaudi2_get_nic_idle_status(hdev, mask_arr, mask_len, e);
+ is_idle &= gaudi2_get_mme_idle_status(hdev, mask_arr, mask_len, e);
+ is_idle &= gaudi2_get_tpc_idle_status(hdev, mask_arr, mask_len, e);
+ is_idle &= gaudi2_get_decoder_idle_status(hdev, mask_arr, mask_len, e);
+ is_idle &= gaudi2_get_rotator_idle_status(hdev, mask_arr, mask_len, e);
+
+ return is_idle;
+}
+
static void gaudi2_hw_queues_lock(struct hl_device *hdev)
__acquires(&gaudi2->hw_queues_lock)
{
@@ -6996,11 +8193,13 @@ static inline bool is_info_event(u32 event)
switch (event) {
case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE:
case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S ... GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
+ case GAUDI2_EVENT_ARC_PWR_BRK_ENTRY ... GAUDI2_EVENT_ARC_PWR_RD_MODE3:
/* return in case of NIC status event - these events are received periodically and not as
* an indication to an error.
*/
case GAUDI2_EVENT_CPU0_STATUS_NIC0_ENG0 ... GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG1:
+ case GAUDI2_EVENT_ARC_EQ_HEARTBEAT:
return true;
default:
return false;
@@ -7032,155 +8231,76 @@ static void gaudi2_print_event(struct hl_device *hdev, u16 event_type,
static bool gaudi2_handle_ecc_event(struct hl_device *hdev, u16 event_type,
struct hl_eq_ecc_data *ecc_data)
{
- u64 ecc_address = 0, ecc_syndrom = 0;
+ u64 ecc_address = 0, ecc_syndrome = 0;
u8 memory_wrapper_idx = 0;
+ bool has_block_id = false;
+ u16 block_id;
+
+ if (hl_fw_version_cmp(hdev, 1, 12, 0) >= 0)
+ has_block_id = true;
ecc_address = le64_to_cpu(ecc_data->ecc_address);
- ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
+ ecc_syndrome = le64_to_cpu(ecc_data->ecc_syndrom);
memory_wrapper_idx = ecc_data->memory_wrapper_idx;
- gaudi2_print_event(hdev, event_type, !ecc_data->is_critical,
- "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u. critical %u.\n",
- ecc_address, ecc_syndrom, memory_wrapper_idx, ecc_data->is_critical);
+ if (has_block_id) {
+ block_id = le16_to_cpu(ecc_data->block_id);
+ gaudi2_print_event(hdev, event_type, !ecc_data->is_critical,
+ "ECC error detected. address: %#llx. Syndrome: %#llx. wrapper id %u. block id %#x. critical %u.",
+ ecc_address, ecc_syndrome, memory_wrapper_idx, block_id,
+ ecc_data->is_critical);
+ } else {
+ gaudi2_print_event(hdev, event_type, !ecc_data->is_critical,
+ "ECC error detected. address: %#llx. Syndrome: %#llx. wrapper id %u. critical %u.",
+ ecc_address, ecc_syndrome, memory_wrapper_idx, ecc_data->is_critical);
+ }
return !!ecc_data->is_critical;
}
-/*
- * gaudi2_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
- *
- * @idx: the current pi/ci value
- * @q_len: the queue length (power of 2)
- *
- * @return the cyclically decremented index
- */
-static inline u32 gaudi2_queue_idx_dec(u32 idx, u32 q_len)
-{
- u32 mask = q_len - 1;
-
- /*
- * modular decrement is equivalent to adding (queue_size -1)
- * later we take LSBs to make sure the value is in the
- * range [0, queue_len - 1]
- */
- return (idx + q_len - 1) & mask;
-}
-
-/**
- * gaudi2_print_sw_config_stream_data - print SW config stream data
- *
- * @hdev: pointer to the habanalabs device structure
- * @stream: the QMAN's stream
- * @qman_base: base address of QMAN registers block
- */
-static void gaudi2_print_sw_config_stream_data(struct hl_device *hdev,
- u32 stream, u64 qman_base)
+static void handle_lower_qman_data_on_err(struct hl_device *hdev, u64 qman_base, u32 engine_id)
{
- u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
- u32 cq_ptr_lo_off, size;
-
- cq_ptr_lo_off = mmDCORE0_TPC0_QM_CQ_PTR_LO_1 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0;
-
- cq_ptr_lo = qman_base + (mmDCORE0_TPC0_QM_CQ_PTR_LO_0 - mmDCORE0_TPC0_QM_BASE) +
- stream * cq_ptr_lo_off;
-
- cq_ptr_hi = cq_ptr_lo + (mmDCORE0_TPC0_QM_CQ_PTR_HI_0 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0);
-
- cq_tsize = cq_ptr_lo + (mmDCORE0_TPC0_QM_CQ_TSIZE_0 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0);
-
- cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
- size = RREG32(cq_tsize);
- dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %x\n",
- stream, cq_ptr, size);
-}
-
-/**
- * gaudi2_print_last_pqes_on_err - print last PQEs on error
- *
- * @hdev: pointer to the habanalabs device structure
- * @qid_base: first QID of the QMAN (out of 4 streams)
- * @stream: the QMAN's stream
- * @qman_base: base address of QMAN registers block
- * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
- */
-static void gaudi2_print_last_pqes_on_err(struct hl_device *hdev, u32 qid_base, u32 stream,
- u64 qman_base, bool pr_sw_conf)
-{
- u32 ci, qm_ci_stream_off;
- struct hl_hw_queue *q;
- u64 pq_ci;
- int i;
-
- q = &hdev->kernel_queues[qid_base + stream];
-
- qm_ci_stream_off = mmDCORE0_TPC0_QM_PQ_CI_1 - mmDCORE0_TPC0_QM_PQ_CI_0;
- pq_ci = qman_base + (mmDCORE0_TPC0_QM_PQ_CI_0 - mmDCORE0_TPC0_QM_BASE) +
- stream * qm_ci_stream_off;
-
- hdev->asic_funcs->hw_queues_lock(hdev);
-
- if (pr_sw_conf)
- gaudi2_print_sw_config_stream_data(hdev, stream, qman_base);
+ struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode;
+ u64 cq_ptr, cp_current_inst;
+ u32 lo, hi, cq_size, cp_sts;
+ bool is_arc_cq;
- ci = RREG32(pq_ci);
+ cp_sts = RREG32(qman_base + QM_CP_STS_4_OFFSET);
+ is_arc_cq = FIELD_GET(PDMA0_QM_CP_STS_CUR_CQ_MASK, cp_sts); /* 0 - legacy CQ, 1 - ARC_CQ */
- /* we should start printing form ci -1 */
- ci = gaudi2_queue_idx_dec(ci, HL_QUEUE_LENGTH);
-
- for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
- struct hl_bd *bd;
- u64 addr;
- u32 len;
-
- bd = q->kernel_address;
- bd += ci;
-
- len = le32_to_cpu(bd->len);
- /* len 0 means uninitialized entry- break */
- if (!len)
- break;
-
- addr = le64_to_cpu(bd->ptr);
-
- dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %x\n",
- stream, ci, addr, len);
-
- /* get previous ci, wrap if needed */
- ci = gaudi2_queue_idx_dec(ci, HL_QUEUE_LENGTH);
+ if (is_arc_cq) {
+ lo = RREG32(qman_base + QM_ARC_CQ_PTR_LO_STS_OFFSET);
+ hi = RREG32(qman_base + QM_ARC_CQ_PTR_HI_STS_OFFSET);
+ cq_ptr = ((u64) hi) << 32 | lo;
+ cq_size = RREG32(qman_base + QM_ARC_CQ_TSIZE_STS_OFFSET);
+ } else {
+ lo = RREG32(qman_base + QM_CQ_PTR_LO_STS_4_OFFSET);
+ hi = RREG32(qman_base + QM_CQ_PTR_HI_STS_4_OFFSET);
+ cq_ptr = ((u64) hi) << 32 | lo;
+ cq_size = RREG32(qman_base + QM_CQ_TSIZE_STS_4_OFFSET);
}
- hdev->asic_funcs->hw_queues_unlock(hdev);
-}
+ lo = RREG32(qman_base + QM_CP_CURRENT_INST_LO_4_OFFSET);
+ hi = RREG32(qman_base + QM_CP_CURRENT_INST_HI_4_OFFSET);
+ cp_current_inst = ((u64) hi) << 32 | lo;
-/**
- * print_qman_data_on_err - extract QMAN data on error
- *
- * @hdev: pointer to the habanalabs device structure
- * @qid_base: first QID of the QMAN (out of 4 streams)
- * @stream: the QMAN's stream
- * @qman_base: base address of QMAN registers block
- *
- * This function attempt to extract as much data as possible on QMAN error.
- * On upper CP print the SW config stream data and last 8 PQEs.
- * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
- */
-static void print_qman_data_on_err(struct hl_device *hdev, u32 qid_base, u32 stream, u64 qman_base)
-{
- u32 i;
+ dev_info(hdev->dev,
+ "LowerQM. %sCQ: {ptr %#llx, size %u}, CP: {instruction %#018llx}\n",
+ is_arc_cq ? "ARC_" : "", cq_ptr, cq_size, cp_current_inst);
- if (stream != QMAN_STREAMS) {
- gaudi2_print_last_pqes_on_err(hdev, qid_base, stream, qman_base, true);
- return;
+ if (undef_opcode->write_enable) {
+ memset(undef_opcode, 0, sizeof(*undef_opcode));
+ undef_opcode->timestamp = ktime_get();
+ undef_opcode->cq_addr = cq_ptr;
+ undef_opcode->cq_size = cq_size;
+ undef_opcode->engine_id = engine_id;
+ undef_opcode->stream_id = QMAN_STREAMS;
+ undef_opcode->write_enable = 0;
}
-
- gaudi2_print_sw_config_stream_data(hdev, stream, qman_base);
-
- for (i = 0 ; i < QMAN_STREAMS ; i++)
- gaudi2_print_last_pqes_on_err(hdev, qid_base, i, qman_base, false);
}
static int gaudi2_handle_qman_err_generic(struct hl_device *hdev, u16 event_type,
- u64 qman_base, u32 qid_base)
+ u64 qman_base, u32 qid_base, u64 *event_mask)
{
u32 i, j, glbl_sts_val, arb_err_val, num_error_causes, error_count = 0;
u64 glbl_sts_addr, arb_err_addr;
@@ -7197,8 +8317,8 @@ static int gaudi2_handle_qman_err_generic(struct hl_device *hdev, u16 event_type
continue;
if (i == QMAN_STREAMS) {
- snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
- num_error_causes = GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE;
+ snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerQM");
+ num_error_causes = GAUDI2_NUM_OF_LOWER_QM_ERR_CAUSE;
} else {
snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
num_error_causes = GAUDI2_NUM_OF_QM_ERR_CAUSE;
@@ -7209,12 +8329,18 @@ static int gaudi2_handle_qman_err_generic(struct hl_device *hdev, u16 event_type
gaudi2_print_event(hdev, event_type, true,
"%s. err cause: %s", reg_desc,
i == QMAN_STREAMS ?
- gaudi2_qman_lower_cp_error_cause[j] :
+ gaudi2_lower_qman_error_cause[j] :
gaudi2_qman_error_cause[j]);
error_count++;
}
- print_qman_data_on_err(hdev, qid_base, i, qman_base);
+ /* Check for undefined opcode error in lower QM */
+ if ((i == QMAN_STREAMS) &&
+ (glbl_sts_val & PDMA0_QM_GLBL_ERR_STS_CP_UNDEF_CMD_ERR_MASK)) {
+ handle_lower_qman_data_on_err(hdev, qman_base,
+ gaudi2_queue_id_to_engine_id[qid_base]);
+ *event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
+ }
}
arb_err_val = RREG32(arb_err_addr);
@@ -7328,6 +8454,9 @@ static enum gaudi2_engine_id gaudi2_razwi_calc_engine_id(struct hl_device *hdev,
case RAZWI_ROT:
return GAUDI2_ENGINE_ID_ROT_0 + module_idx;
+ case RAZWI_ARC_FARM:
+ return GAUDI2_ENGINE_ID_ARC_FARM;
+
default:
return GAUDI2_ENGINE_ID_SIZE;
}
@@ -7342,7 +8471,7 @@ static void gaudi2_ack_module_razwi_event_handler(struct hl_device *hdev,
u8 module_sub_idx, u64 *event_mask)
{
bool via_sft = false;
- u32 hbw_rtr_id, lbw_rtr_id, dcore_id, dcore_rtr_id, eng_id;
+ u32 hbw_rtr_id, lbw_rtr_id, dcore_id, dcore_rtr_id, eng_id, binned_idx;
u64 hbw_rtr_mstr_if_base_addr, lbw_rtr_mstr_if_base_addr;
u32 hbw_shrd_aw = 0, hbw_shrd_ar = 0;
u32 lbw_shrd_aw = 0, lbw_shrd_ar = 0;
@@ -7350,17 +8479,15 @@ static void gaudi2_ack_module_razwi_event_handler(struct hl_device *hdev,
switch (module) {
case RAZWI_TPC:
- hbw_rtr_id = gaudi2_tpc_initiator_hbw_rtr_id[module_idx];
-
- /* TODO : remove this check and depend only on tpc routers table
- * when SW-118828 is resolved
- */
- if (!hdev->asic_prop.fw_security_enabled &&
- ((module_idx == 0) || (module_idx == 1)))
- lbw_rtr_id = DCORE0_RTR0;
- else
- lbw_rtr_id = gaudi2_tpc_initiator_lbw_rtr_id[module_idx];
sprintf(initiator_name, "TPC_%u", module_idx);
+ if (hdev->tpc_binning) {
+ binned_idx = __ffs(hdev->tpc_binning);
+ if (binned_idx == module_idx)
+ module_idx = TPC_ID_DCORE0_TPC6;
+ }
+
+ hbw_rtr_id = gaudi2_tpc_initiator_hbw_rtr_id[module_idx];
+ lbw_rtr_id = gaudi2_tpc_initiator_lbw_rtr_id[module_idx];
break;
case RAZWI_MME:
sprintf(initiator_name, "MME_%u", module_idx);
@@ -7419,15 +8546,25 @@ static void gaudi2_ack_module_razwi_event_handler(struct hl_device *hdev,
sprintf(initiator_name, "NIC_%u", module_idx);
break;
case RAZWI_DEC:
+ sprintf(initiator_name, "DEC_%u", module_idx);
+ if (hdev->decoder_binning) {
+ binned_idx = __ffs(hdev->decoder_binning);
+ if (binned_idx == module_idx)
+ module_idx = DEC_ID_PCIE_VDEC1;
+ }
hbw_rtr_id = gaudi2_dec_initiator_hbw_rtr_id[module_idx];
lbw_rtr_id = gaudi2_dec_initiator_lbw_rtr_id[module_idx];
- sprintf(initiator_name, "DEC_%u", module_idx);
break;
case RAZWI_ROT:
hbw_rtr_id = gaudi2_rot_initiator_hbw_rtr_id[module_idx];
lbw_rtr_id = gaudi2_rot_initiator_lbw_rtr_id[module_idx];
sprintf(initiator_name, "ROT_%u", module_idx);
break;
+ case RAZWI_ARC_FARM:
+ lbw_rtr_id = DCORE1_RTR5;
+ hbw_rtr_id = DCORE1_RTR7;
+ sprintf(initiator_name, "ARC_FARM_%u", module_idx);
+ break;
default:
return;
}
@@ -7526,297 +8663,126 @@ static void gaudi2_check_if_razwi_happened(struct hl_device *hdev)
gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, mod_idx, 0, NULL);
}
-static const char *gaudi2_get_initiators_name(u32 rtr_id)
-{
- switch (rtr_id) {
- case DCORE0_RTR0:
- return "DEC0/1/8/9, TPC24, PDMA0/1, PMMU, PCIE_IF, EDMA0/2, HMMU0/2/4/6, CPU";
- case DCORE0_RTR1:
- return "TPC0/1";
- case DCORE0_RTR2:
- return "TPC2/3";
- case DCORE0_RTR3:
- return "TPC4/5";
- case DCORE0_RTR4:
- return "MME0_SBTE0/1";
- case DCORE0_RTR5:
- return "MME0_WAP0/SBTE2";
- case DCORE0_RTR6:
- return "MME0_CTRL_WR/SBTE3";
- case DCORE0_RTR7:
- return "MME0_WAP1/CTRL_RD/SBTE4";
- case DCORE1_RTR0:
- return "MME1_WAP1/CTRL_RD/SBTE4";
- case DCORE1_RTR1:
- return "MME1_CTRL_WR/SBTE3";
- case DCORE1_RTR2:
- return "MME1_WAP0/SBTE2";
- case DCORE1_RTR3:
- return "MME1_SBTE0/1";
- case DCORE1_RTR4:
- return "TPC10/11";
- case DCORE1_RTR5:
- return "TPC8/9";
- case DCORE1_RTR6:
- return "TPC6/7";
- case DCORE1_RTR7:
- return "DEC2/3, NIC0/1/2/3/4, ARC_FARM, KDMA, EDMA1/3, HMMU1/3/5/7";
- case DCORE2_RTR0:
- return "DEC4/5, NIC5/6/7/8, EDMA4/6, HMMU8/10/12/14, ROT0";
- case DCORE2_RTR1:
- return "TPC16/17";
- case DCORE2_RTR2:
- return "TPC14/15";
- case DCORE2_RTR3:
- return "TPC12/13";
- case DCORE2_RTR4:
- return "MME2_SBTE0/1";
- case DCORE2_RTR5:
- return "MME2_WAP0/SBTE2";
- case DCORE2_RTR6:
- return "MME2_CTRL_WR/SBTE3";
- case DCORE2_RTR7:
- return "MME2_WAP1/CTRL_RD/SBTE4";
- case DCORE3_RTR0:
- return "MME3_WAP1/CTRL_RD/SBTE4";
- case DCORE3_RTR1:
- return "MME3_CTRL_WR/SBTE3";
- case DCORE3_RTR2:
- return "MME3_WAP0/SBTE2";
- case DCORE3_RTR3:
- return "MME3_SBTE0/1";
- case DCORE3_RTR4:
- return "TPC18/19";
- case DCORE3_RTR5:
- return "TPC20/21";
- case DCORE3_RTR6:
- return "TPC22/23";
- case DCORE3_RTR7:
- return "DEC6/7, NIC9/10/11, EDMA5/7, HMMU9/11/13/15, ROT1, PSOC";
- default:
- return "N/A";
- }
-}
-
-static u16 gaudi2_get_razwi_initiators(u32 rtr_id, u16 *engines)
-{
- switch (rtr_id) {
- case DCORE0_RTR0:
- engines[0] = GAUDI2_DCORE0_ENGINE_ID_DEC_0;
- engines[1] = GAUDI2_DCORE0_ENGINE_ID_DEC_1;
- engines[2] = GAUDI2_PCIE_ENGINE_ID_DEC_0;
- engines[3] = GAUDI2_PCIE_ENGINE_ID_DEC_1;
- engines[4] = GAUDI2_DCORE0_ENGINE_ID_TPC_6;
- engines[5] = GAUDI2_ENGINE_ID_PDMA_0;
- engines[6] = GAUDI2_ENGINE_ID_PDMA_1;
- engines[7] = GAUDI2_ENGINE_ID_PCIE;
- engines[8] = GAUDI2_DCORE0_ENGINE_ID_EDMA_0;
- engines[9] = GAUDI2_DCORE1_ENGINE_ID_EDMA_0;
- engines[10] = GAUDI2_ENGINE_ID_PSOC;
- return 11;
-
- case DCORE0_RTR1:
- engines[0] = GAUDI2_DCORE0_ENGINE_ID_TPC_0;
- engines[1] = GAUDI2_DCORE0_ENGINE_ID_TPC_1;
- return 2;
-
- case DCORE0_RTR2:
- engines[0] = GAUDI2_DCORE0_ENGINE_ID_TPC_2;
- engines[1] = GAUDI2_DCORE0_ENGINE_ID_TPC_3;
- return 2;
-
- case DCORE0_RTR3:
- engines[0] = GAUDI2_DCORE0_ENGINE_ID_TPC_4;
- engines[1] = GAUDI2_DCORE0_ENGINE_ID_TPC_5;
- return 2;
-
- case DCORE0_RTR4:
- case DCORE0_RTR5:
- case DCORE0_RTR6:
- case DCORE0_RTR7:
- engines[0] = GAUDI2_DCORE0_ENGINE_ID_MME;
- return 1;
-
- case DCORE1_RTR0:
- case DCORE1_RTR1:
- case DCORE1_RTR2:
- case DCORE1_RTR3:
- engines[0] = GAUDI2_DCORE1_ENGINE_ID_MME;
- return 1;
-
- case DCORE1_RTR4:
- engines[0] = GAUDI2_DCORE1_ENGINE_ID_TPC_4;
- engines[1] = GAUDI2_DCORE1_ENGINE_ID_TPC_5;
- return 2;
-
- case DCORE1_RTR5:
- engines[0] = GAUDI2_DCORE1_ENGINE_ID_TPC_2;
- engines[1] = GAUDI2_DCORE1_ENGINE_ID_TPC_3;
- return 2;
-
- case DCORE1_RTR6:
- engines[0] = GAUDI2_DCORE1_ENGINE_ID_TPC_0;
- engines[1] = GAUDI2_DCORE1_ENGINE_ID_TPC_1;
- return 2;
-
- case DCORE1_RTR7:
- engines[0] = GAUDI2_DCORE1_ENGINE_ID_DEC_0;
- engines[1] = GAUDI2_DCORE1_ENGINE_ID_DEC_1;
- engines[2] = GAUDI2_ENGINE_ID_NIC0_0;
- engines[3] = GAUDI2_ENGINE_ID_NIC1_0;
- engines[4] = GAUDI2_ENGINE_ID_NIC2_0;
- engines[5] = GAUDI2_ENGINE_ID_NIC3_0;
- engines[6] = GAUDI2_ENGINE_ID_NIC4_0;
- engines[7] = GAUDI2_ENGINE_ID_ARC_FARM;
- engines[8] = GAUDI2_ENGINE_ID_KDMA;
- engines[9] = GAUDI2_DCORE0_ENGINE_ID_EDMA_1;
- engines[10] = GAUDI2_DCORE1_ENGINE_ID_EDMA_1;
- return 11;
-
- case DCORE2_RTR0:
- engines[0] = GAUDI2_DCORE2_ENGINE_ID_DEC_0;
- engines[1] = GAUDI2_DCORE2_ENGINE_ID_DEC_1;
- engines[2] = GAUDI2_ENGINE_ID_NIC5_0;
- engines[3] = GAUDI2_ENGINE_ID_NIC6_0;
- engines[4] = GAUDI2_ENGINE_ID_NIC7_0;
- engines[5] = GAUDI2_ENGINE_ID_NIC8_0;
- engines[6] = GAUDI2_DCORE2_ENGINE_ID_EDMA_0;
- engines[7] = GAUDI2_DCORE3_ENGINE_ID_EDMA_0;
- engines[8] = GAUDI2_ENGINE_ID_ROT_0;
- return 9;
-
- case DCORE2_RTR1:
- engines[0] = GAUDI2_DCORE2_ENGINE_ID_TPC_4;
- engines[1] = GAUDI2_DCORE2_ENGINE_ID_TPC_5;
- return 2;
-
- case DCORE2_RTR2:
- engines[0] = GAUDI2_DCORE2_ENGINE_ID_TPC_2;
- engines[1] = GAUDI2_DCORE2_ENGINE_ID_TPC_3;
- return 2;
-
- case DCORE2_RTR3:
- engines[0] = GAUDI2_DCORE2_ENGINE_ID_TPC_0;
- engines[1] = GAUDI2_DCORE2_ENGINE_ID_TPC_1;
- return 2;
-
- case DCORE2_RTR4:
- case DCORE2_RTR5:
- case DCORE2_RTR6:
- case DCORE2_RTR7:
- engines[0] = GAUDI2_DCORE2_ENGINE_ID_MME;
- return 1;
- case DCORE3_RTR0:
- case DCORE3_RTR1:
- case DCORE3_RTR2:
- case DCORE3_RTR3:
- engines[0] = GAUDI2_DCORE3_ENGINE_ID_MME;
- return 1;
- case DCORE3_RTR4:
- engines[0] = GAUDI2_DCORE3_ENGINE_ID_TPC_0;
- engines[1] = GAUDI2_DCORE3_ENGINE_ID_TPC_1;
- return 2;
- case DCORE3_RTR5:
- engines[0] = GAUDI2_DCORE3_ENGINE_ID_TPC_2;
- engines[1] = GAUDI2_DCORE3_ENGINE_ID_TPC_3;
- return 2;
- case DCORE3_RTR6:
- engines[0] = GAUDI2_DCORE3_ENGINE_ID_TPC_4;
- engines[1] = GAUDI2_DCORE3_ENGINE_ID_TPC_5;
- return 2;
- case DCORE3_RTR7:
- engines[0] = GAUDI2_DCORE3_ENGINE_ID_DEC_0;
- engines[1] = GAUDI2_DCORE3_ENGINE_ID_DEC_1;
- engines[2] = GAUDI2_ENGINE_ID_NIC9_0;
- engines[3] = GAUDI2_ENGINE_ID_NIC10_0;
- engines[4] = GAUDI2_ENGINE_ID_NIC11_0;
- engines[5] = GAUDI2_DCORE2_ENGINE_ID_EDMA_1;
- engines[6] = GAUDI2_DCORE3_ENGINE_ID_EDMA_1;
- engines[7] = GAUDI2_ENGINE_ID_ROT_1;
- engines[8] = GAUDI2_ENGINE_ID_ROT_0;
- return 9;
- default:
- return 0;
- }
-}
-
-static void gaudi2_razwi_unmapped_addr_hbw_printf_info(struct hl_device *hdev, u32 rtr_id,
- u64 rtr_ctrl_base_addr, bool is_write,
- u64 *event_mask)
+static int gaudi2_psoc_razwi_get_engines(struct gaudi2_razwi_info *razwi_info, u32 array_size,
+ u32 axuser_xy, u32 *base, u16 *eng_id,
+ char *eng_name)
{
- u16 engines[HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR], num_of_eng;
- u32 razwi_hi, razwi_lo;
- u8 rd_wr_flag;
- num_of_eng = gaudi2_get_razwi_initiators(rtr_id, &engines[0]);
-
- if (is_write) {
- razwi_hi = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_ADDR_HI);
- razwi_lo = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_ADDR_LO);
- rd_wr_flag = HL_RAZWI_WRITE;
+ int i, num_of_eng = 0;
+ u16 str_size = 0;
- /* Clear set indication */
- WREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_SET, 0x1);
- } else {
- razwi_hi = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_ADDR_HI);
- razwi_lo = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_ADDR_LO);
- rd_wr_flag = HL_RAZWI_READ;
+ for (i = 0 ; i < array_size ; i++) {
+ if (axuser_xy != razwi_info[i].axuser_xy)
+ continue;
- /* Clear set indication */
- WREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_SET, 0x1);
+ eng_id[num_of_eng] = razwi_info[i].eng_id;
+ base[num_of_eng] = razwi_info[i].rtr_ctrl;
+ if (!num_of_eng)
+ str_size += scnprintf(eng_name + str_size,
+ PSOC_RAZWI_ENG_STR_SIZE - str_size, "%s",
+ razwi_info[i].eng_name);
+ else
+ str_size += scnprintf(eng_name + str_size,
+ PSOC_RAZWI_ENG_STR_SIZE - str_size, " or %s",
+ razwi_info[i].eng_name);
+ num_of_eng++;
}
- hl_handle_razwi(hdev, (u64)razwi_hi << 32 | razwi_lo, &engines[0], num_of_eng,
- rd_wr_flag | HL_RAZWI_HBW, event_mask);
- dev_err_ratelimited(hdev->dev,
- "RAZWI PSOC unmapped HBW %s error, rtr id %u, address %#llx\n",
- is_write ? "WR" : "RD", rtr_id, (u64)razwi_hi << 32 | razwi_lo);
-
- dev_err_ratelimited(hdev->dev,
- "Initiators: %s\n", gaudi2_get_initiators_name(rtr_id));
+ return num_of_eng;
}
-static void gaudi2_razwi_unmapped_addr_lbw_printf_info(struct hl_device *hdev, u32 rtr_id,
- u64 rtr_ctrl_base_addr, bool is_write,
- u64 *event_mask)
+static bool gaudi2_handle_psoc_razwi_happened(struct hl_device *hdev, u32 razwi_reg,
+ u64 *event_mask)
{
- u16 engines[HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR], num_of_eng;
- u64 razwi_addr = CFG_BASE;
- u8 rd_wr_flag;
+ u32 axuser_xy = RAZWI_GET_AXUSER_XY(razwi_reg), addr_hi = 0, addr_lo = 0;
+ u32 base[PSOC_RAZWI_MAX_ENG_PER_RTR];
+ u16 num_of_eng, eng_id[PSOC_RAZWI_MAX_ENG_PER_RTR];
+ char eng_name_str[PSOC_RAZWI_ENG_STR_SIZE];
+ bool razwi_happened = false;
+ u64 addr;
+ int i;
- num_of_eng = gaudi2_get_razwi_initiators(rtr_id, &engines[0]);
+ num_of_eng = gaudi2_psoc_razwi_get_engines(common_razwi_info, ARRAY_SIZE(common_razwi_info),
+ axuser_xy, base, eng_id, eng_name_str);
+
+ /* If no match for XY coordinates, try to find it in MME razwi table */
+ if (!num_of_eng) {
+ axuser_xy = RAZWI_GET_AXUSER_LOW_XY(razwi_reg);
+ num_of_eng = gaudi2_psoc_razwi_get_engines(mme_razwi_info,
+ ARRAY_SIZE(mme_razwi_info),
+ axuser_xy, base, eng_id,
+ eng_name_str);
+ }
+
+ for (i = 0 ; i < num_of_eng ; i++) {
+ if (RREG32(base[i] + DEC_RAZWI_HBW_AW_SET)) {
+ addr_hi = RREG32(base[i] + DEC_RAZWI_HBW_AW_ADDR_HI);
+ addr_lo = RREG32(base[i] + DEC_RAZWI_HBW_AW_ADDR_LO);
+ addr = ((u64)addr_hi << 32) + addr_lo;
+ if (addr) {
+ dev_err(hdev->dev,
+ "PSOC HBW AW RAZWI: %s, address (aligned to 128 byte): 0x%llX\n",
+ eng_name_str, addr);
+ hl_handle_razwi(hdev, addr, &eng_id[0],
+ num_of_eng, HL_RAZWI_HBW | HL_RAZWI_WRITE, event_mask);
+ razwi_happened = true;
+ }
+ }
- if (is_write) {
- razwi_addr += RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AW_ADDR);
- rd_wr_flag = HL_RAZWI_WRITE;
+ if (RREG32(base[i] + DEC_RAZWI_HBW_AR_SET)) {
+ addr_hi = RREG32(base[i] + DEC_RAZWI_HBW_AR_ADDR_HI);
+ addr_lo = RREG32(base[i] + DEC_RAZWI_HBW_AR_ADDR_LO);
+ addr = ((u64)addr_hi << 32) + addr_lo;
+ if (addr) {
+ dev_err(hdev->dev,
+ "PSOC HBW AR RAZWI: %s, address (aligned to 128 byte): 0x%llX\n",
+ eng_name_str, addr);
+ hl_handle_razwi(hdev, addr, &eng_id[0],
+ num_of_eng, HL_RAZWI_HBW | HL_RAZWI_READ, event_mask);
+ razwi_happened = true;
+ }
+ }
- /* Clear set indication */
- WREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AW_SET, 0x1);
- } else {
- razwi_addr += RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_ADDR);
- rd_wr_flag = HL_RAZWI_READ;
+ if (RREG32(base[i] + DEC_RAZWI_LBW_AW_SET)) {
+ addr_lo = RREG32(base[i] + DEC_RAZWI_LBW_AW_ADDR);
+ if (addr_lo) {
+ dev_err(hdev->dev,
+ "PSOC LBW AW RAZWI: %s, address (aligned to 128 byte): 0x%X\n",
+ eng_name_str, addr_lo);
+ hl_handle_razwi(hdev, addr_lo, &eng_id[0],
+ num_of_eng, HL_RAZWI_LBW | HL_RAZWI_WRITE, event_mask);
+ razwi_happened = true;
+ }
+ }
- /* Clear set indication */
- WREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_SET, 0x1);
+ if (RREG32(base[i] + DEC_RAZWI_LBW_AR_SET)) {
+ addr_lo = RREG32(base[i] + DEC_RAZWI_LBW_AR_ADDR);
+ if (addr_lo) {
+ dev_err(hdev->dev,
+ "PSOC LBW AR RAZWI: %s, address (aligned to 128 byte): 0x%X\n",
+ eng_name_str, addr_lo);
+ hl_handle_razwi(hdev, addr_lo, &eng_id[0],
+ num_of_eng, HL_RAZWI_LBW | HL_RAZWI_READ, event_mask);
+ razwi_happened = true;
+ }
+ }
+ /* In common case the loop will break, when there is only one engine id, or
+ * several engines with the same router. The exceptional case is with psoc razwi
+ * from EDMA, where it's possible to get axuser id which fits 2 routers (2
+ * interfaces of sft router). In this case, maybe the first router won't hold info
+ * and we will need to iterate on the other router.
+ */
+ if (razwi_happened)
+ break;
}
- hl_handle_razwi(hdev, razwi_addr, &engines[0], num_of_eng, rd_wr_flag | HL_RAZWI_LBW,
- event_mask);
- dev_err_ratelimited(hdev->dev,
- "RAZWI PSOC unmapped LBW %s error, rtr id %u, address 0x%llX\n",
- is_write ? "WR" : "RD", rtr_id, razwi_addr);
-
- dev_err_ratelimited(hdev->dev,
- "Initiators: %s\n", gaudi2_get_initiators_name(rtr_id));
+ return razwi_happened;
}
/* PSOC RAZWI interrupt occurs only when trying to access a bad address */
static int gaudi2_ack_psoc_razwi_event_handler(struct hl_device *hdev, u64 *event_mask)
{
- u32 hbw_aw_set, hbw_ar_set, lbw_aw_set, lbw_ar_set, rtr_id, dcore_id, dcore_rtr_id, xy,
- razwi_mask_info, razwi_intr = 0, error_count = 0;
- int rtr_map_arr_len = NUM_OF_RTR_PER_DCORE * NUM_OF_DCORES;
- u64 rtr_ctrl_base_addr;
+ u32 razwi_mask_info, razwi_intr = 0, error_count = 0;
if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX)) {
razwi_intr = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_INTERRUPT);
@@ -7825,63 +8791,22 @@ static int gaudi2_ack_psoc_razwi_event_handler(struct hl_device *hdev, u64 *even
}
razwi_mask_info = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_MASK_INFO);
- xy = FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_L_MASK, razwi_mask_info);
dev_err_ratelimited(hdev->dev,
"PSOC RAZWI interrupt: Mask %d, AR %d, AW %d, AXUSER_L 0x%x AXUSER_H 0x%x\n",
FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_MASK_MASK, razwi_mask_info),
FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AR_MASK, razwi_mask_info),
FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AW_MASK, razwi_mask_info),
- xy,
+ FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_L_MASK, razwi_mask_info),
FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_H_MASK, razwi_mask_info));
- if (xy == 0) {
- dev_err_ratelimited(hdev->dev,
- "PSOC RAZWI interrupt: received event from 0 rtr coordinates\n");
- goto clear;
- }
-
- /* Find router id by router coordinates */
- for (rtr_id = 0 ; rtr_id < rtr_map_arr_len ; rtr_id++)
- if (rtr_coordinates_to_rtr_id[rtr_id] == xy)
- break;
-
- if (rtr_id == rtr_map_arr_len) {
+ if (gaudi2_handle_psoc_razwi_happened(hdev, razwi_mask_info, event_mask))
+ error_count++;
+ else
dev_err_ratelimited(hdev->dev,
- "PSOC RAZWI interrupt: invalid rtr coordinates (0x%x)\n", xy);
- goto clear;
- }
-
- /* Find router mstr_if register base */
- dcore_id = rtr_id / NUM_OF_RTR_PER_DCORE;
- dcore_rtr_id = rtr_id % NUM_OF_RTR_PER_DCORE;
- rtr_ctrl_base_addr = mmDCORE0_RTR0_CTRL_BASE + dcore_id * DCORE_OFFSET +
- dcore_rtr_id * DCORE_RTR_OFFSET;
-
- hbw_aw_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_SET);
- hbw_ar_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_SET);
- lbw_aw_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AW_SET);
- lbw_ar_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_SET);
+ "PSOC RAZWI interrupt: invalid razwi info (0x%x)\n",
+ razwi_mask_info);
- if (hbw_aw_set)
- gaudi2_razwi_unmapped_addr_hbw_printf_info(hdev, rtr_id,
- rtr_ctrl_base_addr, true, event_mask);
-
- if (hbw_ar_set)
- gaudi2_razwi_unmapped_addr_hbw_printf_info(hdev, rtr_id,
- rtr_ctrl_base_addr, false, event_mask);
-
- if (lbw_aw_set)
- gaudi2_razwi_unmapped_addr_lbw_printf_info(hdev, rtr_id,
- rtr_ctrl_base_addr, true, event_mask);
-
- if (lbw_ar_set)
- gaudi2_razwi_unmapped_addr_lbw_printf_info(hdev, rtr_id,
- rtr_ctrl_base_addr, false, event_mask);
-
- error_count++;
-
-clear:
/* Clear Interrupts only on pldm or if f/w doesn't handle interrupts */
if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX))
WREG32(mmPSOC_GLOBAL_CONF_RAZWI_INTERRUPT, razwi_intr);
@@ -7976,7 +8901,7 @@ static int gaudi2_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *e
{
u32 qid_base, error_count = 0;
u64 qman_base;
- u8 index;
+ u8 index = 0;
switch (event_type) {
case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_TPC5_QM:
@@ -8079,7 +9004,8 @@ static int gaudi2_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *e
return 0;
}
- error_count = gaudi2_handle_qman_err_generic(hdev, event_type, qman_base, qid_base);
+ error_count = gaudi2_handle_qman_err_generic(hdev, event_type, qman_base,
+ qid_base, event_mask);
/* Handle EDMA QM SEI here because there is no AXI error response event for EDMA */
if (event_type >= GAUDI2_EVENT_HDMA2_QM && event_type <= GAUDI2_EVENT_HDMA5_QM) {
@@ -8092,25 +9018,31 @@ static int gaudi2_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *e
return error_count;
}
-static int gaudi2_handle_arc_farm_sei_err(struct hl_device *hdev, u16 event_type)
+static int gaudi2_handle_arc_farm_sei_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
{
- u32 i, sts_val, sts_clr_val = 0, error_count = 0;
+ u32 i, sts_val, sts_clr_val, error_count = 0, arc_farm;
- sts_val = RREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_STS);
+ for (arc_farm = 0 ; arc_farm < NUM_OF_ARC_FARMS_ARC ; arc_farm++) {
+ sts_clr_val = 0;
+ sts_val = RREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_STS +
+ (arc_farm * ARC_FARM_OFFSET));
- for (i = 0 ; i < GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE ; i++) {
- if (sts_val & BIT(i)) {
- gaudi2_print_event(hdev, event_type, true,
- "err cause: %s", gaudi2_arc_sei_error_cause[i]);
- sts_clr_val |= BIT(i);
- error_count++;
+ for (i = 0 ; i < GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE ; i++) {
+ if (sts_val & BIT(i)) {
+ gaudi2_print_event(hdev, event_type, true,
+ "ARC FARM ARC %u err cause: %s",
+ arc_farm, gaudi2_arc_sei_error_cause[i]);
+ sts_clr_val |= BIT(i);
+ error_count++;
+ }
}
+ WREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_CLR + (arc_farm * ARC_FARM_OFFSET),
+ sts_clr_val);
}
+ gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ARC_FARM, 0, 0, event_mask);
hl_check_for_glbl_errors(hdev);
- WREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_CLR, sts_clr_val);
-
return error_count;
}
@@ -8248,21 +9180,16 @@ static int gaudi2_handle_mme_err(struct hl_device *hdev, u8 mme_index, u16 event
return error_count;
}
-static int gaudi2_handle_mme_sbte_err(struct hl_device *hdev, u16 event_type,
- u64 intr_cause_data)
+static int gaudi2_handle_mme_sbte_err(struct hl_device *hdev, u16 event_type)
{
- int i, error_count = 0;
-
- for (i = 0 ; i < GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE ; i++)
- if (intr_cause_data & BIT(i)) {
- gaudi2_print_event(hdev, event_type, true,
- "err cause: %s", guadi2_mme_sbte_error_cause[i]);
- error_count++;
- }
-
+ /*
+ * We have a single error cause here but the report mechanism is
+ * buggy. Hence there is no good reason to fetch the cause so we
+ * just check for glbl_errors and exit.
+ */
hl_check_for_glbl_errors(hdev);
- return error_count;
+ return GAUDI2_NA_EVENT_CAUSE;
}
static int gaudi2_handle_mme_wap_err(struct hl_device *hdev, u8 mme_index, u16 event_type,
@@ -8318,14 +9245,13 @@ static int gaudi2_handle_kdma_core_event(struct hl_device *hdev, u16 event_type,
return error_count;
}
-static int gaudi2_handle_dma_core_event(struct hl_device *hdev, u16 event_type,
- u64 intr_cause_data)
+static int gaudi2_handle_dma_core_event(struct hl_device *hdev, u16 event_type, u64 intr_cause)
{
u32 error_count = 0;
int i;
for (i = 0 ; i < GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE ; i++)
- if (intr_cause_data & BIT(i)) {
+ if (intr_cause & BIT(i)) {
gaudi2_print_event(hdev, event_type, true,
"err cause: %s", gaudi2_dma_core_interrupts_cause[i]);
error_count++;
@@ -8433,7 +9359,7 @@ static int gaudi2_handle_hif_fatal(struct hl_device *hdev, u16 event_type, u64 i
static void gaudi2_handle_page_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu,
u64 *event_mask)
{
- u32 valid, val, axid_l, axid_h;
+ u32 valid, val;
u64 addr;
valid = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID));
@@ -8446,14 +9372,18 @@ static void gaudi2_handle_page_error(struct hl_device *hdev, u64 mmu_base, bool
addr <<= 32;
addr |= RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE_VA));
- axid_l = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_FAULT_ID_LSB));
- axid_h = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_FAULT_ID_MSB));
+ if (is_pmmu) {
+ dev_err_ratelimited(hdev->dev, "PMMU page fault on va 0x%llx\n", addr);
+ } else {
+ addr = gaudi2_mmu_descramble_addr(hdev, addr);
+ addr &= HW_UNSCRAMBLED_BITS_MASK;
+ dev_err_ratelimited(hdev->dev, "HMMU page fault on va range 0x%llx - 0x%llx\n",
+ addr, addr + ~HW_UNSCRAMBLED_BITS_MASK);
+ }
- dev_err_ratelimited(hdev->dev, "%s page fault on va 0x%llx, transaction id 0x%llX\n",
- is_pmmu ? "PMMU" : "HMMU", addr, ((u64)axid_h << 32) + axid_l);
hl_handle_page_fault(hdev, addr, 0, is_pmmu, event_mask);
- WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE), 0);
+ WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID), 0);
}
static void gaudi2_handle_access_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu)
@@ -8471,9 +9401,12 @@ static void gaudi2_handle_access_error(struct hl_device *hdev, u64 mmu_base, boo
addr <<= 32;
addr |= RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE_VA));
+ if (!is_pmmu)
+ addr = gaudi2_mmu_descramble_addr(hdev, addr);
+
dev_err_ratelimited(hdev->dev, "%s access error on va 0x%llx\n",
is_pmmu ? "PMMU" : "HMMU", addr);
- WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE), 0);
+ WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID), 0);
}
static int gaudi2_handle_mmu_spi_sei_generic(struct hl_device *hdev, u16 event_type,
@@ -8534,7 +9467,7 @@ static int gaudi2_handle_sm_err(struct hl_device *hdev, u16 event_type, u8 sm_in
continue;
gaudi2_print_event(hdev, event_type, true,
- "err cause: %s. %s: 0x%X\n",
+ "err cause: %s. %s: 0x%X",
gaudi2_sm_sei_cause[i].cause_name,
gaudi2_sm_sei_cause[i].log_name,
sei_cause_log);
@@ -8565,46 +9498,110 @@ static int gaudi2_handle_sm_err(struct hl_device *hdev, u16 event_type, u8 sm_in
return error_count;
}
+static u64 get_hmmu_base(u16 event_type)
+{
+ u8 dcore, index_in_dcore;
+
+ switch (event_type) {
+ case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP:
+ case GAUDI2_EVENT_HMMU0_SPI_BASE ... GAUDI2_EVENT_HMMU0_SECURITY_ERROR:
+ dcore = 0;
+ index_in_dcore = 0;
+ break;
+ case GAUDI2_EVENT_HMMU_1_AXI_ERR_RSP:
+ case GAUDI2_EVENT_HMMU1_SPI_BASE ... GAUDI2_EVENT_HMMU1_SECURITY_ERROR:
+ dcore = 1;
+ index_in_dcore = 0;
+ break;
+ case GAUDI2_EVENT_HMMU_2_AXI_ERR_RSP:
+ case GAUDI2_EVENT_HMMU2_SPI_BASE ... GAUDI2_EVENT_HMMU2_SECURITY_ERROR:
+ dcore = 0;
+ index_in_dcore = 1;
+ break;
+ case GAUDI2_EVENT_HMMU_3_AXI_ERR_RSP:
+ case GAUDI2_EVENT_HMMU3_SPI_BASE ... GAUDI2_EVENT_HMMU3_SECURITY_ERROR:
+ dcore = 1;
+ index_in_dcore = 1;
+ break;
+ case GAUDI2_EVENT_HMMU_4_AXI_ERR_RSP:
+ case GAUDI2_EVENT_HMMU4_SPI_BASE ... GAUDI2_EVENT_HMMU4_SECURITY_ERROR:
+ dcore = 3;
+ index_in_dcore = 2;
+ break;
+ case GAUDI2_EVENT_HMMU_5_AXI_ERR_RSP:
+ case GAUDI2_EVENT_HMMU5_SPI_BASE ... GAUDI2_EVENT_HMMU5_SECURITY_ERROR:
+ dcore = 2;
+ index_in_dcore = 2;
+ break;
+ case GAUDI2_EVENT_HMMU_6_AXI_ERR_RSP:
+ case GAUDI2_EVENT_HMMU6_SPI_BASE ... GAUDI2_EVENT_HMMU6_SECURITY_ERROR:
+ dcore = 3;
+ index_in_dcore = 3;
+ break;
+ case GAUDI2_EVENT_HMMU_7_AXI_ERR_RSP:
+ case GAUDI2_EVENT_HMMU7_SPI_BASE ... GAUDI2_EVENT_HMMU7_SECURITY_ERROR:
+ dcore = 2;
+ index_in_dcore = 3;
+ break;
+ case GAUDI2_EVENT_HMMU_8_AXI_ERR_RSP:
+ case GAUDI2_EVENT_HMMU8_SPI_BASE ... GAUDI2_EVENT_HMMU8_SECURITY_ERROR:
+ dcore = 0;
+ index_in_dcore = 2;
+ break;
+ case GAUDI2_EVENT_HMMU_9_AXI_ERR_RSP:
+ case GAUDI2_EVENT_HMMU9_SPI_BASE ... GAUDI2_EVENT_HMMU9_SECURITY_ERROR:
+ dcore = 1;
+ index_in_dcore = 2;
+ break;
+ case GAUDI2_EVENT_HMMU_10_AXI_ERR_RSP:
+ case GAUDI2_EVENT_HMMU10_SPI_BASE ... GAUDI2_EVENT_HMMU10_SECURITY_ERROR:
+ dcore = 0;
+ index_in_dcore = 3;
+ break;
+ case GAUDI2_EVENT_HMMU_11_AXI_ERR_RSP:
+ case GAUDI2_EVENT_HMMU11_SPI_BASE ... GAUDI2_EVENT_HMMU11_SECURITY_ERROR:
+ dcore = 1;
+ index_in_dcore = 3;
+ break;
+ case GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP:
+ case GAUDI2_EVENT_HMMU12_SPI_BASE ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR:
+ dcore = 3;
+ index_in_dcore = 0;
+ break;
+ case GAUDI2_EVENT_HMMU_13_AXI_ERR_RSP:
+ case GAUDI2_EVENT_HMMU13_SPI_BASE ... GAUDI2_EVENT_HMMU13_SECURITY_ERROR:
+ dcore = 2;
+ index_in_dcore = 0;
+ break;
+ case GAUDI2_EVENT_HMMU_14_AXI_ERR_RSP:
+ case GAUDI2_EVENT_HMMU14_SPI_BASE ... GAUDI2_EVENT_HMMU14_SECURITY_ERROR:
+ dcore = 3;
+ index_in_dcore = 1;
+ break;
+ case GAUDI2_EVENT_HMMU_15_AXI_ERR_RSP:
+ case GAUDI2_EVENT_HMMU15_SPI_BASE ... GAUDI2_EVENT_HMMU15_SECURITY_ERROR:
+ dcore = 2;
+ index_in_dcore = 1;
+ break;
+ default:
+ return ULONG_MAX;
+ }
+
+ return mmDCORE0_HMMU0_MMU_BASE + dcore * DCORE_OFFSET + index_in_dcore * DCORE_HMMU_OFFSET;
+}
+
static int gaudi2_handle_mmu_spi_sei_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
{
bool is_pmmu = false;
u32 error_count = 0;
u64 mmu_base;
- u8 index;
switch (event_type) {
- case GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM ... GAUDI2_EVENT_HMMU3_SECURITY_ERROR:
- index = (event_type - GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM) / 3;
- mmu_base = mmDCORE0_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
- break;
- case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_3_AXI_ERR_RSP:
- index = (event_type - GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP);
- mmu_base = mmDCORE0_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
- break;
- case GAUDI2_EVENT_HMMU8_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_HMMU11_SECURITY_ERROR:
- index = (event_type - GAUDI2_EVENT_HMMU8_PAGE_FAULT_WR_PERM) / 3;
- mmu_base = mmDCORE1_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
- break;
- case GAUDI2_EVENT_HMMU_8_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_11_AXI_ERR_RSP:
- index = (event_type - GAUDI2_EVENT_HMMU_8_AXI_ERR_RSP);
- mmu_base = mmDCORE1_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
- break;
- case GAUDI2_EVENT_HMMU7_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_HMMU4_SECURITY_ERROR:
- index = (event_type - GAUDI2_EVENT_HMMU7_PAGE_FAULT_WR_PERM) / 3;
- mmu_base = mmDCORE2_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
- break;
- case GAUDI2_EVENT_HMMU_7_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_4_AXI_ERR_RSP:
- index = (event_type - GAUDI2_EVENT_HMMU_7_AXI_ERR_RSP);
- mmu_base = mmDCORE2_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
- break;
- case GAUDI2_EVENT_HMMU15_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR:
- index = (event_type - GAUDI2_EVENT_HMMU15_PAGE_FAULT_WR_PERM) / 3;
- mmu_base = mmDCORE3_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
- break;
- case GAUDI2_EVENT_HMMU_15_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP:
- index = (event_type - GAUDI2_EVENT_HMMU_15_AXI_ERR_RSP);
- mmu_base = mmDCORE3_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
+ case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP:
+ case GAUDI2_EVENT_HMMU0_SPI_BASE ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR:
+ mmu_base = get_hmmu_base(event_type);
break;
+
case GAUDI2_EVENT_PMMU0_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_PMMU0_SECURITY_ERROR:
case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0:
is_pmmu = true;
@@ -8614,6 +9611,9 @@ static int gaudi2_handle_mmu_spi_sei_err(struct hl_device *hdev, u16 event_type,
return 0;
}
+ if (mmu_base == ULONG_MAX)
+ return 0;
+
error_count = gaudi2_handle_mmu_spi_sei_generic(hdev, event_type, mmu_base,
is_pmmu, event_mask);
hl_check_for_glbl_errors(hdev);
@@ -8626,8 +9626,8 @@ static int gaudi2_handle_mmu_spi_sei_err(struct hl_device *hdev, u16 event_type,
static bool gaudi2_hbm_sei_handle_read_err(struct hl_device *hdev,
struct hl_eq_hbm_sei_read_err_intr_info *rd_err_data, u32 err_cnt)
{
+ bool require_hard_reset = false;
u32 addr, beat, beat_shift;
- bool rc = false;
dev_err_ratelimited(hdev->dev,
"READ ERROR count: ECC SERR: %d, ECC DERR: %d, RD_PARITY: %d\n",
@@ -8659,7 +9659,7 @@ static bool gaudi2_hbm_sei_handle_read_err(struct hl_device *hdev,
beat,
le32_to_cpu(rd_err_data->dbg_rd_err_dm),
le32_to_cpu(rd_err_data->dbg_rd_err_syndrome));
- rc |= true;
+ require_hard_reset = true;
}
beat_shift = beat * HBM_RD_ERR_BEAT_SHIFT;
@@ -8672,7 +9672,7 @@ static bool gaudi2_hbm_sei_handle_read_err(struct hl_device *hdev,
(le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
(HBM_RD_ERR_PAR_DATA_BEAT0_MASK << beat_shift)) >>
(HBM_RD_ERR_PAR_DATA_BEAT0_SHIFT + beat_shift));
- rc |= true;
+ require_hard_reset = true;
}
dev_err_ratelimited(hdev->dev, "Beat%d DQ data:\n", beat);
@@ -8682,7 +9682,7 @@ static bool gaudi2_hbm_sei_handle_read_err(struct hl_device *hdev,
le32_to_cpu(rd_err_data->dbg_rd_err_data[beat * 2 + 1]));
}
- return rc;
+ return require_hard_reset;
}
static void gaudi2_hbm_sei_print_wr_par_info(struct hl_device *hdev,
@@ -8740,12 +9740,12 @@ static bool gaudi2_handle_hbm_mc_sei_err(struct hl_device *hdev, u16 event_type,
if (cause_idx > GAUDI2_NUM_OF_HBM_SEI_CAUSE - 1) {
gaudi2_print_event(hdev, event_type, true,
"err cause: %s",
- "Invalid HBM SEI event cause (%d) provided by FW\n", cause_idx);
+ "Invalid HBM SEI event cause (%d) provided by FW", cause_idx);
return true;
}
gaudi2_print_event(hdev, event_type, !sei_data->hdr.is_critical,
- "System %s Error Interrupt - HBM(%u) MC(%u) MC_CH(%u) MC_PC(%u). Error cause: %s\n",
+ "System %s Error Interrupt - HBM(%u) MC(%u) MC_CH(%u) MC_PC(%u). Error cause: %s",
sei_data->hdr.is_critical ? "Critical" : "Non-critical",
hbm_id, mc_id, sei_data->hdr.mc_channel, sei_data->hdr.mc_pseudo_channel,
hbm_mc_sei_cause[cause_idx]);
@@ -8869,7 +9869,7 @@ static void gaudi2_print_out_of_sync_info(struct hl_device *hdev, u16 event_type
struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
gaudi2_print_event(hdev, event_type, false,
- "FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n",
+ "FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d",
le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci),
q->pi, atomic_read(&q->ci));
}
@@ -8883,7 +9883,7 @@ static int gaudi2_handle_pcie_p2p_msix(struct hl_device *hdev, u16 event_type)
if (p2p_intr) {
gaudi2_print_event(hdev, event_type, true,
- "pcie p2p transaction terminated due to security, req_id(0x%x)\n",
+ "pcie p2p transaction terminated due to security, req_id(0x%x)",
RREG32(mmPCIE_WRAP_P2P_REQ_ID));
WREG32(mmPCIE_WRAP_P2P_INTR, 0x1);
@@ -8892,7 +9892,7 @@ static int gaudi2_handle_pcie_p2p_msix(struct hl_device *hdev, u16 event_type)
if (msix_gw_intr) {
gaudi2_print_event(hdev, event_type, true,
- "pcie msi-x gen denied due to vector num check failure, vec(0x%X)\n",
+ "pcie msi-x gen denied due to vector num check failure, vec(0x%X)",
RREG32(mmPCIE_WRAP_MSIX_GW_VEC));
WREG32(mmPCIE_WRAP_MSIX_GW_INTR, 0x1);
@@ -8905,25 +9905,17 @@ static int gaudi2_handle_pcie_p2p_msix(struct hl_device *hdev, u16 event_type)
static int gaudi2_handle_pcie_drain(struct hl_device *hdev,
struct hl_eq_pcie_drain_ind_data *drain_data)
{
- u64 lbw_rd, lbw_wr, hbw_rd, hbw_wr, cause, error_count = 0;
+ u64 cause, error_count = 0;
cause = le64_to_cpu(drain_data->intr_cause.intr_cause_data);
- lbw_rd = le64_to_cpu(drain_data->drain_rd_addr_lbw);
- lbw_wr = le64_to_cpu(drain_data->drain_wr_addr_lbw);
- hbw_rd = le64_to_cpu(drain_data->drain_rd_addr_hbw);
- hbw_wr = le64_to_cpu(drain_data->drain_wr_addr_hbw);
if (cause & BIT_ULL(0)) {
- dev_err_ratelimited(hdev->dev,
- "PCIE AXI drain LBW completed, read_err %u, write_err %u\n",
- !!lbw_rd, !!lbw_wr);
+ dev_err_ratelimited(hdev->dev, "PCIE AXI drain LBW completed\n");
error_count++;
}
if (cause & BIT_ULL(1)) {
- dev_err_ratelimited(hdev->dev,
- "PCIE AXI drain HBW completed, raddr %#llx, waddr %#llx\n",
- hbw_rd, hbw_wr);
+ dev_err_ratelimited(hdev->dev, "PCIE AXI drain HBW completed\n");
error_count++;
}
@@ -8954,7 +9946,7 @@ static void gaudi2_print_cpu_pkt_failure_info(struct hl_device *hdev, u16 event_
struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
gaudi2_print_event(hdev, event_type, false,
- "FW reported sanity check failure, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n",
+ "FW reported sanity check failure, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d",
le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci));
}
@@ -8974,20 +9966,185 @@ static int hl_arc_event_handle(struct hl_device *hdev, u16 event_type,
q = (struct hl_engine_arc_dccm_queue_full_irq *) &payload;
gaudi2_print_event(hdev, event_type, true,
- "ARC DCCM Full event: EngId: %u, Intr_type: %u, Qidx: %u\n",
- engine_id, intr_type, q->queue_index);
+ "ARC DCCM Full event: Eng: %s, Intr_type: %u, Qidx: %u",
+ GAUDI2_ENG_ID_TO_STR(engine_id), intr_type, q->queue_index);
return 1;
default:
- gaudi2_print_event(hdev, event_type, true, "Unknown ARC event type\n");
+ gaudi2_print_event(hdev, event_type, true, "Unknown ARC event type");
return 0;
}
}
+static u16 event_id_to_engine_id(struct hl_device *hdev, u16 event_type)
+{
+ enum gaudi2_block_types type = GAUDI2_BLOCK_TYPE_MAX;
+ u16 index;
+
+ switch (event_type) {
+ case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC24_AXI_ERR_RSP:
+ index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP;
+ type = GAUDI2_BLOCK_TYPE_TPC;
+ break;
+ case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_TPC24_QM:
+ index = event_type - GAUDI2_EVENT_TPC0_QM;
+ type = GAUDI2_BLOCK_TYPE_TPC;
+ break;
+ case GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE:
+ case GAUDI2_EVENT_MME0_SPI_BASE ... GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID:
+ case GAUDI2_EVENT_MME0_QM:
+ index = 0;
+ type = GAUDI2_BLOCK_TYPE_MME;
+ break;
+ case GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE:
+ case GAUDI2_EVENT_MME1_SPI_BASE ... GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID:
+ case GAUDI2_EVENT_MME1_QM:
+ index = 1;
+ type = GAUDI2_BLOCK_TYPE_MME;
+ break;
+ case GAUDI2_EVENT_MME2_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE:
+ case GAUDI2_EVENT_MME2_SPI_BASE ... GAUDI2_EVENT_MME2_WAP_SOURCE_RESULT_INVALID:
+ case GAUDI2_EVENT_MME2_QM:
+ index = 2;
+ type = GAUDI2_BLOCK_TYPE_MME;
+ break;
+ case GAUDI2_EVENT_MME3_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE:
+ case GAUDI2_EVENT_MME3_SPI_BASE ... GAUDI2_EVENT_MME3_WAP_SOURCE_RESULT_INVALID:
+ case GAUDI2_EVENT_MME3_QM:
+ index = 3;
+ type = GAUDI2_BLOCK_TYPE_MME;
+ break;
+ case GAUDI2_EVENT_KDMA_CH0_AXI_ERR_RSP:
+ case GAUDI2_EVENT_KDMA_BM_SPMU:
+ case GAUDI2_EVENT_KDMA0_CORE:
+ return GAUDI2_ENGINE_ID_KDMA;
+ case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP:
+ case GAUDI2_EVENT_PDMA0_CORE:
+ case GAUDI2_EVENT_PDMA0_BM_SPMU:
+ case GAUDI2_EVENT_PDMA0_QM:
+ return GAUDI2_ENGINE_ID_PDMA_0;
+ case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP:
+ case GAUDI2_EVENT_PDMA1_CORE:
+ case GAUDI2_EVENT_PDMA1_BM_SPMU:
+ case GAUDI2_EVENT_PDMA1_QM:
+ return GAUDI2_ENGINE_ID_PDMA_1;
+ case GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE ... GAUDI2_EVENT_DEC9_AXI_ERR_RSPONSE:
+ index = event_type - GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE;
+ type = GAUDI2_BLOCK_TYPE_DEC;
+ break;
+ case GAUDI2_EVENT_DEC0_SPI ... GAUDI2_EVENT_DEC9_BMON_SPMU:
+ index = (event_type - GAUDI2_EVENT_DEC0_SPI) >> 1;
+ type = GAUDI2_BLOCK_TYPE_DEC;
+ break;
+ case GAUDI2_EVENT_NIC0_AXI_ERROR_RESPONSE ... GAUDI2_EVENT_NIC11_AXI_ERROR_RESPONSE:
+ index = event_type - GAUDI2_EVENT_NIC0_AXI_ERROR_RESPONSE;
+ return GAUDI2_ENGINE_ID_NIC0_0 + (index * 2);
+ case GAUDI2_EVENT_NIC0_QM0 ... GAUDI2_EVENT_NIC11_QM1:
+ index = event_type - GAUDI2_EVENT_NIC0_QM0;
+ return GAUDI2_ENGINE_ID_NIC0_0 + index;
+ case GAUDI2_EVENT_NIC0_BMON_SPMU ... GAUDI2_EVENT_NIC11_SW_ERROR:
+ index = event_type - GAUDI2_EVENT_NIC0_BMON_SPMU;
+ return GAUDI2_ENGINE_ID_NIC0_0 + (index * 2);
+ case GAUDI2_EVENT_TPC0_BMON_SPMU ... GAUDI2_EVENT_TPC24_KERNEL_ERR:
+ index = (event_type - GAUDI2_EVENT_TPC0_BMON_SPMU) >> 1;
+ type = GAUDI2_BLOCK_TYPE_TPC;
+ break;
+ case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE:
+ case GAUDI2_EVENT_ROTATOR0_BMON_SPMU:
+ case GAUDI2_EVENT_ROTATOR0_ROT0_QM:
+ return GAUDI2_ENGINE_ID_ROT_0;
+ case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE:
+ case GAUDI2_EVENT_ROTATOR1_BMON_SPMU:
+ case GAUDI2_EVENT_ROTATOR1_ROT1_QM:
+ return GAUDI2_ENGINE_ID_ROT_1;
+ case GAUDI2_EVENT_HDMA0_BM_SPMU:
+ case GAUDI2_EVENT_HDMA0_QM:
+ case GAUDI2_EVENT_HDMA0_CORE:
+ return GAUDI2_DCORE0_ENGINE_ID_EDMA_0;
+ case GAUDI2_EVENT_HDMA1_BM_SPMU:
+ case GAUDI2_EVENT_HDMA1_QM:
+ case GAUDI2_EVENT_HDMA1_CORE:
+ return GAUDI2_DCORE0_ENGINE_ID_EDMA_1;
+ case GAUDI2_EVENT_HDMA2_BM_SPMU:
+ case GAUDI2_EVENT_HDMA2_QM:
+ case GAUDI2_EVENT_HDMA2_CORE:
+ return GAUDI2_DCORE1_ENGINE_ID_EDMA_0;
+ case GAUDI2_EVENT_HDMA3_BM_SPMU:
+ case GAUDI2_EVENT_HDMA3_QM:
+ case GAUDI2_EVENT_HDMA3_CORE:
+ return GAUDI2_DCORE1_ENGINE_ID_EDMA_1;
+ case GAUDI2_EVENT_HDMA4_BM_SPMU:
+ case GAUDI2_EVENT_HDMA4_QM:
+ case GAUDI2_EVENT_HDMA4_CORE:
+ return GAUDI2_DCORE2_ENGINE_ID_EDMA_0;
+ case GAUDI2_EVENT_HDMA5_BM_SPMU:
+ case GAUDI2_EVENT_HDMA5_QM:
+ case GAUDI2_EVENT_HDMA5_CORE:
+ return GAUDI2_DCORE2_ENGINE_ID_EDMA_1;
+ case GAUDI2_EVENT_HDMA6_BM_SPMU:
+ case GAUDI2_EVENT_HDMA6_QM:
+ case GAUDI2_EVENT_HDMA6_CORE:
+ return GAUDI2_DCORE3_ENGINE_ID_EDMA_0;
+ case GAUDI2_EVENT_HDMA7_BM_SPMU:
+ case GAUDI2_EVENT_HDMA7_QM:
+ case GAUDI2_EVENT_HDMA7_CORE:
+ return GAUDI2_DCORE3_ENGINE_ID_EDMA_1;
+ default:
+ break;
+ }
+
+ switch (type) {
+ case GAUDI2_BLOCK_TYPE_TPC:
+ switch (index) {
+ case TPC_ID_DCORE0_TPC0 ... TPC_ID_DCORE0_TPC5:
+ return GAUDI2_DCORE0_ENGINE_ID_TPC_0 + index;
+ case TPC_ID_DCORE1_TPC0 ... TPC_ID_DCORE1_TPC5:
+ return GAUDI2_DCORE1_ENGINE_ID_TPC_0 + index - TPC_ID_DCORE1_TPC0;
+ case TPC_ID_DCORE2_TPC0 ... TPC_ID_DCORE2_TPC5:
+ return GAUDI2_DCORE2_ENGINE_ID_TPC_0 + index - TPC_ID_DCORE2_TPC0;
+ case TPC_ID_DCORE3_TPC0 ... TPC_ID_DCORE3_TPC5:
+ return GAUDI2_DCORE3_ENGINE_ID_TPC_0 + index - TPC_ID_DCORE3_TPC0;
+ default:
+ break;
+ }
+ break;
+ case GAUDI2_BLOCK_TYPE_MME:
+ switch (index) {
+ case MME_ID_DCORE0: return GAUDI2_DCORE0_ENGINE_ID_MME;
+ case MME_ID_DCORE1: return GAUDI2_DCORE1_ENGINE_ID_MME;
+ case MME_ID_DCORE2: return GAUDI2_DCORE2_ENGINE_ID_MME;
+ case MME_ID_DCORE3: return GAUDI2_DCORE3_ENGINE_ID_MME;
+ default:
+ break;
+ }
+ break;
+ case GAUDI2_BLOCK_TYPE_DEC:
+ switch (index) {
+ case DEC_ID_DCORE0_DEC0: return GAUDI2_DCORE0_ENGINE_ID_DEC_0;
+ case DEC_ID_DCORE0_DEC1: return GAUDI2_DCORE0_ENGINE_ID_DEC_1;
+ case DEC_ID_DCORE1_DEC0: return GAUDI2_DCORE1_ENGINE_ID_DEC_0;
+ case DEC_ID_DCORE1_DEC1: return GAUDI2_DCORE1_ENGINE_ID_DEC_1;
+ case DEC_ID_DCORE2_DEC0: return GAUDI2_DCORE2_ENGINE_ID_DEC_0;
+ case DEC_ID_DCORE2_DEC1: return GAUDI2_DCORE2_ENGINE_ID_DEC_1;
+ case DEC_ID_DCORE3_DEC0: return GAUDI2_DCORE3_ENGINE_ID_DEC_0;
+ case DEC_ID_DCORE3_DEC1: return GAUDI2_DCORE3_ENGINE_ID_DEC_1;
+ case DEC_ID_PCIE_VDEC0: return GAUDI2_PCIE_ENGINE_ID_DEC_0;
+ case DEC_ID_PCIE_VDEC1: return GAUDI2_PCIE_ENGINE_ID_DEC_1;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+
+ return U16_MAX;
+}
+
static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
{
struct gaudi2_device *gaudi2 = hdev->asic_specific;
bool reset_required = false, is_critical = false;
- u32 index, ctl, reset_flags = HL_DRV_RESET_HARD, error_count = 0;
+ u32 index, ctl, reset_flags = 0, error_count = 0;
u64 event_mask = 0;
u16 event_type;
@@ -9024,19 +10181,18 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
break;
case GAUDI2_EVENT_ARC_AXI_ERROR_RESPONSE_0:
- reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
- error_count = gaudi2_handle_arc_farm_sei_err(hdev, event_type);
- event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
+ error_count = gaudi2_handle_arc_farm_sei_err(hdev, event_type, &event_mask);
+ event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
break;
case GAUDI2_EVENT_CPU_AXI_ERR_RSP:
error_count = gaudi2_handle_cpu_sei_err(hdev, event_type);
- event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
+ reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
+ event_mask |= HL_NOTIFIER_EVENT_CRITICL_FW_ERR;
break;
case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP:
case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP:
- reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
error_count = gaudi2_handle_qm_sei_err(hdev, event_type, true, &event_mask);
event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
break;
@@ -9149,13 +10305,19 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
case GAUDI2_EVENT_KDMA_CH0_AXI_ERR_RSP:
case GAUDI2_EVENT_KDMA0_CORE:
error_count = gaudi2_handle_kdma_core_event(hdev, event_type,
- le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
+ le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
break;
- case GAUDI2_EVENT_HDMA2_CORE ... GAUDI2_EVENT_PDMA1_CORE:
+ case GAUDI2_EVENT_HDMA2_CORE ... GAUDI2_EVENT_HDMA5_CORE:
error_count = gaudi2_handle_dma_core_event(hdev, event_type,
- le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
+ le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
+ event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
+ break;
+
+ case GAUDI2_EVENT_PDMA0_CORE ... GAUDI2_EVENT_PDMA1_CORE:
+ error_count = gaudi2_handle_dma_core_event(hdev, event_type,
+ le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
break;
@@ -9199,6 +10361,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
if (gaudi2_handle_hbm_mc_sei_err(hdev, event_type, &eq_entry->sei_data)) {
reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
reset_required = true;
+ is_critical = eq_entry->sei_data.hdr.is_critical;
}
error_count++;
break;
@@ -9217,12 +10380,16 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
case GAUDI2_EVENT_PCIE_DRAIN_COMPLETE:
error_count = gaudi2_handle_pcie_drain(hdev, &eq_entry->pcie_drain_ind_data);
+ reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
+ if (hl_fw_version_cmp(hdev, 1, 13, 0) >= 0)
+ is_critical = true;
break;
case GAUDI2_EVENT_PSOC59_RPM_ERROR_OR_DRAIN:
error_count = gaudi2_handle_psoc_drain(hdev,
le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
+ reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
break;
@@ -9240,8 +10407,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
case GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME1_SBTE4_AXI_ERR_RSP:
case GAUDI2_EVENT_MME2_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME2_SBTE4_AXI_ERR_RSP:
case GAUDI2_EVENT_MME3_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME3_SBTE4_AXI_ERR_RSP:
- error_count = gaudi2_handle_mme_sbte_err(hdev, event_type,
- le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
+ error_count = gaudi2_handle_mme_sbte_err(hdev, event_type);
event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
break;
case GAUDI2_EVENT_VM0_ALARM_A ... GAUDI2_EVENT_VM3_ALARM_B:
@@ -9251,6 +10417,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
break;
case GAUDI2_EVENT_PSOC_AXI_ERR_RSP:
error_count = GAUDI2_NA_EVENT_CAUSE;
+ reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
break;
case GAUDI2_EVENT_PSOC_PRSTN_FALL:
@@ -9264,6 +10431,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
break;
case GAUDI2_EVENT_PCIE_FATAL_ERR:
error_count = GAUDI2_NA_EVENT_CAUSE;
+ reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
break;
case GAUDI2_EVENT_TPC0_BMON_SPMU:
@@ -9331,6 +10499,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
case GAUDI2_EVENT_CPU_PKT_QUEUE_OUT_SYNC:
gaudi2_print_out_of_sync_info(hdev, event_type, &eq_entry->pkt_sync_err);
error_count = GAUDI2_NA_EVENT_CAUSE;
+ reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
break;
@@ -9366,12 +10535,13 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
dev_err(hdev->dev, "CPLD shutdown event, reset reason: 0x%llx\n",
le64_to_cpu(eq_entry->data[0]));
error_count = GAUDI2_NA_EVENT_CAUSE;
- event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
+ hl_eq_cpld_shutdown_event_handle(hdev, event_type, &event_mask);
break;
case GAUDI2_EVENT_CPU_PKT_SANITY_FAILED:
gaudi2_print_cpu_pkt_failure_info(hdev, event_type, &eq_entry->pkt_sync_err);
error_count = GAUDI2_NA_EVENT_CAUSE;
+ reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
break;
@@ -9381,12 +10551,27 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
break;
case GAUDI2_EVENT_CPU_FP32_NOT_SUPPORTED:
- case GAUDI2_EVENT_DEV_RESET_REQ:
+ case GAUDI2_EVENT_CPU_DEV_RESET_REQ:
event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
error_count = GAUDI2_NA_EVENT_CAUSE;
is_critical = true;
break;
+ case GAUDI2_EVENT_ARC_PWR_BRK_ENTRY:
+ case GAUDI2_EVENT_ARC_PWR_BRK_EXT:
+ case GAUDI2_EVENT_ARC_PWR_RD_MODE0:
+ case GAUDI2_EVENT_ARC_PWR_RD_MODE1:
+ case GAUDI2_EVENT_ARC_PWR_RD_MODE2:
+ case GAUDI2_EVENT_ARC_PWR_RD_MODE3:
+ error_count = GAUDI2_NA_EVENT_CAUSE;
+ dev_info_ratelimited(hdev->dev, "%s event received\n",
+ gaudi2_irq_map_table[event_type].name);
+ break;
+
+ case GAUDI2_EVENT_ARC_EQ_HEARTBEAT:
+ hl_eq_heartbeat_event_handle(hdev);
+ error_count = GAUDI2_NA_EVENT_CAUSE;
+ break;
default:
if (gaudi2_irq_map_table[event_type].valid) {
dev_err_ratelimited(hdev->dev, "Cannot find handler for event %d\n",
@@ -9395,6 +10580,9 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
}
}
+ if (event_mask & HL_NOTIFIER_EVENT_USER_ENGINE_ERR)
+ hl_capture_engine_err(hdev, event_id_to_engine_id(hdev, event_type), error_count);
+
/* Make sure to dump an error in case no error cause was printed so far.
* Note that although we have counted the errors, we use this number as
* a boolean.
@@ -9403,12 +10591,17 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
gaudi2_print_event(hdev, event_type, true, "%d", event_type);
else if (error_count == 0)
gaudi2_print_event(hdev, event_type, true,
- "No error cause for H/W event %u\n", event_type);
+ "No error cause for H/W event %u", event_type);
- if ((gaudi2_irq_map_table[event_type].reset || reset_required) &&
- (hdev->hard_reset_on_fw_events ||
- (hdev->asic_prop.fw_security_enabled && is_critical)))
- goto reset_device;
+ if ((gaudi2_irq_map_table[event_type].reset != EVENT_RESET_TYPE_NONE) || reset_required) {
+ if (reset_required ||
+ (gaudi2_irq_map_table[event_type].reset == EVENT_RESET_TYPE_HARD))
+ reset_flags |= HL_DRV_RESET_HARD;
+
+ if (hdev->hard_reset_on_fw_events ||
+ (hdev->asic_prop.fw_security_enabled && is_critical))
+ goto reset_device;
+ }
/* Send unmask irq only for interrupts not classified as MSG */
if (!gaudi2_irq_map_table[event_type].msg)
@@ -9426,16 +10619,21 @@ reset_device:
} else {
reset_flags |= HL_DRV_RESET_DELAY;
}
+ /* escalate general hw errors to critical/fatal error */
+ if (event_mask & HL_NOTIFIER_EVENT_GENERAL_HW_ERR)
+ hl_handle_critical_hw_err(hdev, event_type, &event_mask);
+
+ hl_debugfs_cfg_access_history_dump(hdev);
event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
hl_device_cond_reset(hdev, reset_flags, event_mask);
}
static int gaudi2_memset_memory_chunk_using_edma_qm(struct hl_device *hdev,
- struct packet_lin_dma *lin_dma_pkt, dma_addr_t pkt_dma_addr,
- u32 hw_queue_id, u32 size, u64 addr, u32 val)
+ struct packet_lin_dma *lin_dma_pkt,
+ u64 phys_addr, u32 hw_queue_id, u32 size, u64 addr, u32 val)
{
u32 ctl, pkt_size;
- int rc = 0;
+ int rc = 0, i;
ctl = FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
ctl |= FIELD_PREP(GAUDI2_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
@@ -9449,10 +10647,21 @@ static int gaudi2_memset_memory_chunk_using_edma_qm(struct hl_device *hdev,
pkt_size = sizeof(struct packet_lin_dma);
- rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, pkt_dma_addr);
+ for (i = 0; i < 3; i++) {
+ rc = hdev->asic_funcs->access_dev_mem(hdev, PCI_REGION_DRAM,
+ phys_addr + (i * sizeof(u64)),
+ ((u64 *)(lin_dma_pkt)) + i, DEBUGFS_WRITE64);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to copy lin_dma packet to HBM (%#llx)\n",
+ phys_addr);
+ return rc;
+ }
+ }
+
+ rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, phys_addr);
if (rc)
- dev_err(hdev->dev, "Failed to send lin dma packet to H/W queue %d\n",
- hw_queue_id);
+ dev_err(hdev->dev, "Failed to send lin_dma packet to H/W queue %s\n",
+ GAUDI2_QUEUE_ID_TO_STR(hw_queue_id));
return rc;
}
@@ -9464,12 +10673,11 @@ static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 siz
GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0,
GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0};
u32 chunk_size, dcore, edma_idx, sob_offset, sob_addr, comp_val,
- old_mmubp, mmubp, num_of_pkts, busy, pkt_size;
+ old_mmubp, mmubp, num_of_pkts, busy, pkt_size, cb_len;
u64 comp_addr, cur_addr = addr, end_addr = addr + size;
struct asic_fixed_properties *prop = &hdev->asic_prop;
+ int rc = 0, dma_num = 0, i;
void *lin_dma_pkts_arr;
- dma_addr_t pkt_dma_addr;
- int rc = 0, dma_num = 0;
if (prop->edma_enabled_mask == 0) {
dev_info(hdev->dev, "non of the EDMA engines is enabled - skip dram scrubbing\n");
@@ -9487,9 +10695,19 @@ static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 siz
/* Calculate how many lin dma pkts we'll need */
num_of_pkts = div64_u64(round_up(size, SZ_2G), SZ_2G);
pkt_size = sizeof(struct packet_lin_dma);
+ cb_len = pkt_size * num_of_pkts;
- lin_dma_pkts_arr = hl_asic_dma_alloc_coherent(hdev, pkt_size * num_of_pkts,
- &pkt_dma_addr, GFP_KERNEL);
+ /*
+ * if we're not scrubing HMMU or NIC reserved sections in hbm,
+ * then it the scrubing of the user section, as we use the start of the user section
+ * to store the CB of the EDMA QM, so shift the start address of the scrubbing accordingly
+ * and scrub the CB section before leaving this function.
+ */
+ if ((addr >= prop->dram_user_base_address) &&
+ (addr < prop->dram_user_base_address + cb_len))
+ cur_addr += (prop->dram_user_base_address + cb_len) - addr;
+
+ lin_dma_pkts_arr = kvcalloc(num_of_pkts, pkt_size, GFP_KERNEL);
if (!lin_dma_pkts_arr)
return -ENOMEM;
@@ -9535,7 +10753,7 @@ static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 siz
rc = gaudi2_memset_memory_chunk_using_edma_qm(hdev,
(struct packet_lin_dma *)lin_dma_pkts_arr + dma_num,
- pkt_dma_addr + dma_num * pkt_size,
+ prop->dram_user_base_address + (dma_num * pkt_size),
edma_queues_id[dcore] + edma_idx * 4,
chunk_size, cur_addr, val);
if (rc)
@@ -9544,14 +10762,16 @@ static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 siz
dma_num++;
cur_addr += chunk_size;
if (cur_addr == end_addr)
- break;
+ goto edma_wait;
}
}
}
+edma_wait:
rc = hl_poll_timeout(hdev, sob_addr, busy, (busy == dma_num), 1000, 1000000);
if (rc) {
- dev_err(hdev->dev, "DMA Timeout during HBM scrubbing\n");
+ dev_err(hdev->dev, "DMA Timeout during HBM scrubbing(sob: 0x%x, dma_num: 0x%x)\n",
+ busy, dma_num);
goto end;
}
end:
@@ -9572,8 +10792,16 @@ end:
}
}
+ memset(lin_dma_pkts_arr, 0, sizeof(u64));
+
+ /* Zero the HBM area where we copied the CB */
+ for (i = 0; i < cb_len / sizeof(u64); i += sizeof(u64))
+ rc = hdev->asic_funcs->access_dev_mem(hdev, PCI_REGION_DRAM,
+ prop->dram_user_base_address + i,
+ (u64 *)(lin_dma_pkts_arr), DEBUGFS_WRITE64);
WREG32(sob_addr, 0);
- hl_asic_dma_free_coherent(hdev, pkt_size * num_of_pkts, lin_dma_pkts_arr, pkt_dma_addr);
+
+ kvfree(lin_dma_pkts_arr);
return rc;
}
@@ -9832,16 +11060,23 @@ static int gaudi2_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, v
/* Create mapping on asic side */
mutex_lock(&hdev->mmu_lock);
+
rc = hl_mmu_map_contiguous(ctx, reserved_va_base, host_mem_dma_addr, SZ_2M);
- hl_mmu_invalidate_cache_range(hdev, false,
+ if (rc) {
+ dev_err(hdev->dev, "Failed to create mapping on asic mmu\n");
+ goto unreserve_va;
+ }
+
+ rc = hl_mmu_invalidate_cache_range(hdev, false,
MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV,
ctx->asid, reserved_va_base, SZ_2M);
- mutex_unlock(&hdev->mmu_lock);
if (rc) {
- dev_err(hdev->dev, "Failed to create mapping on asic mmu\n");
+ hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M);
goto unreserve_va;
}
+ mutex_unlock(&hdev->mmu_lock);
+
/* Enable MMU on KDMA */
gaudi2_kdma_set_mmbp_asid(hdev, false, ctx->asid);
@@ -9870,11 +11105,16 @@ static int gaudi2_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, v
gaudi2_kdma_set_mmbp_asid(hdev, true, HL_KERNEL_ASID_ID);
mutex_lock(&hdev->mmu_lock);
- hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M);
- hl_mmu_invalidate_cache_range(hdev, false, MMU_OP_USERPTR,
+
+ rc = hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M);
+ if (rc)
+ goto unreserve_va;
+
+ rc = hl_mmu_invalidate_cache_range(hdev, false, MMU_OP_USERPTR,
ctx->asid, reserved_va_base, SZ_2M);
- mutex_unlock(&hdev->mmu_lock);
+
unreserve_va:
+ mutex_unlock(&hdev->mmu_lock);
hl_unreserve_va_block(hdev, ctx, reserved_va_base, SZ_2M);
free_data_buffer:
hl_asic_dma_free_coherent(hdev, SZ_2M, host_mem_virtual_addr, host_mem_dma_addr);
@@ -9927,17 +11167,24 @@ static int gaudi2_internal_cb_pool_init(struct hl_device *hdev, struct hl_ctx *c
}
mutex_lock(&hdev->mmu_lock);
+
rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base, hdev->internal_cb_pool_dma_addr,
HOST_SPACE_INTERNAL_CB_SZ);
- hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
- mutex_unlock(&hdev->mmu_lock);
-
if (rc)
goto unreserve_internal_cb_pool;
+ rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
+ if (rc)
+ goto unmap_internal_cb_pool;
+
+ mutex_unlock(&hdev->mmu_lock);
+
return 0;
+unmap_internal_cb_pool:
+ hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
unreserve_internal_cb_pool:
+ mutex_unlock(&hdev->mmu_lock);
hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
destroy_internal_cb_pool:
gen_pool_destroy(hdev->internal_cb_pool);
@@ -10006,6 +11253,9 @@ static int gaudi2_ctx_init(struct hl_ctx *ctx)
{
int rc;
+ if (ctx->asid == HL_KERNEL_ASID_ID)
+ return 0;
+
rc = gaudi2_mmu_prepare(ctx->hdev, ctx->asid);
if (rc)
return rc;
@@ -10375,8 +11625,8 @@ static int gaudi2_block_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
address = pci_resource_start(hdev->pdev, SRAM_CFG_BAR_ID) + offset_in_bar;
- vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
- VM_DONTCOPY | VM_NORESERVE;
+ vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
+ VM_DONTCOPY | VM_NORESERVE);
rc = remap_pfn_range(vma, vma->vm_start, address >> PAGE_SHIFT,
block_size, vma->vm_page_prot);
@@ -10497,6 +11747,7 @@ static int gaudi2_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64
static void gaudi2_get_msi_info(__le32 *table)
{
table[CPUCP_EVENT_QUEUE_MSI_TYPE] = cpu_to_le32(GAUDI2_EVENT_QUEUE_MSIX_IDX);
+ table[CPUCP_EVENT_QUEUE_ERR_MSI_TYPE] = cpu_to_le32(GAUDI2_IRQ_NUM_EQ_ERROR);
}
static int gaudi2_map_pll_idx_to_fw_idx(u32 pll_idx)
@@ -10608,7 +11859,7 @@ static int gaudi2_mmu_get_real_page_size(struct hl_device *hdev, struct hl_mmu_p
return 0;
page_size_err:
- dev_err(hdev->dev, "page size of %u is not %uKB aligned, can't map\n",
+ dev_err(hdev->dev, "page size of 0x%X is not 0x%X aligned, can't map\n",
page_size, mmu_prop->page_size >> 10);
return -EFAULT;
}
@@ -10628,6 +11879,29 @@ int gaudi2_send_device_activity(struct hl_device *hdev, bool open)
return hl_fw_send_device_activity(hdev, open);
}
+static u64 gaudi2_read_pte(struct hl_device *hdev, u64 addr)
+{
+ struct gaudi2_device *gaudi2 = hdev->asic_specific;
+ u64 val;
+
+ if (hdev->reset_info.hard_reset_pending)
+ return U64_MAX;
+
+ val = readq(hdev->pcie_bar[DRAM_BAR_ID] + (addr - gaudi2->dram_bar_cur_addr));
+
+ return val;
+}
+
+static void gaudi2_write_pte(struct hl_device *hdev, u64 addr, u64 val)
+{
+ struct gaudi2_device *gaudi2 = hdev->asic_specific;
+
+ if (hdev->reset_info.hard_reset_pending)
+ return;
+
+ writeq(val, hdev->pcie_bar[DRAM_BAR_ID] + (addr - gaudi2->dram_bar_cur_addr));
+}
+
static const struct hl_asic_funcs gaudi2_funcs = {
.early_init = gaudi2_early_init,
.early_fini = gaudi2_early_fini,
@@ -10653,11 +11927,9 @@ static const struct hl_asic_funcs gaudi2_funcs = {
.asic_dma_pool_free = gaudi2_dma_pool_free,
.cpu_accessible_dma_pool_alloc = gaudi2_cpu_accessible_dma_pool_alloc,
.cpu_accessible_dma_pool_free = gaudi2_cpu_accessible_dma_pool_free,
- .asic_dma_unmap_single = gaudi2_dma_unmap_single,
- .asic_dma_map_single = gaudi2_dma_map_single,
- .hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
+ .dma_unmap_sgtable = hl_asic_dma_unmap_sgtable,
.cs_parser = gaudi2_cs_parser,
- .asic_dma_map_sgtable = hl_dma_map_sgtable,
+ .dma_map_sgtable = hl_asic_dma_map_sgtable,
.add_end_of_cb_packets = NULL,
.update_eq_ci = gaudi2_update_eq_ci,
.context_switch = gaudi2_context_switch,
@@ -10666,8 +11938,8 @@ static const struct hl_asic_funcs gaudi2_funcs = {
.add_device_attr = gaudi2_add_device_attr,
.handle_eqe = gaudi2_handle_eqe,
.get_events_stat = gaudi2_get_events_stat,
- .read_pte = NULL,
- .write_pte = NULL,
+ .read_pte = gaudi2_read_pte,
+ .write_pte = gaudi2_write_pte,
.mmu_invalidate_cache = gaudi2_mmu_invalidate_cache,
.mmu_invalidate_cache_range = gaudi2_mmu_invalidate_cache_range,
.mmu_prefetch_cache_range = NULL,
@@ -10724,6 +11996,7 @@ static const struct hl_asic_funcs gaudi2_funcs = {
.access_dev_mem = hl_access_dev_mem,
.set_dram_bar_base = gaudi2_set_hbm_bar_base,
.set_engine_cores = gaudi2_set_engine_cores,
+ .set_engines = gaudi2_set_engines,
.send_device_activity = gaudi2_send_device_activity,
.set_dram_properties = gaudi2_set_dram_properties,
.set_binning_masks = gaudi2_set_binning_masks,