diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c | 98 |
1 files changed, 89 insertions, 9 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c index 8cf58be80f4e..d5c9f30552e3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -23,10 +23,40 @@ #include "kfd_priv.h" #include "kfd_events.h" +#include "kfd_debug.h" #include "soc15_int.h" #include "kfd_device_queue_manager.h" #include "kfd_smi_events.h" +/* + * GFX9 SQ Interrupts + * + * There are 3 encoding types of interrupts sourced from SQ sent as a 44-bit + * packet to the Interrupt Handler: + * Auto - Generated by the SQG (various cmd overflows, timestamps etc) + * Wave - Generated by S_SENDMSG through a shader program + * Error - HW generated errors (Illegal instructions, Memviols, EDC etc) + * + * The 44-bit packet is mapped as {context_id1[7:0],context_id0[31:0]} plus + * 4-bits for VMID (SOC15_VMID_FROM_IH_ENTRY) as such: + * + * - context_id0[27:26] + * Encoding type (0 = Auto, 1 = Wave, 2 = Error) + * + * - context_id0[13] + * PRIV bit indicates that Wave S_SEND or error occurred within trap + * + * - {context_id1[7:0],context_id0[31:28],context_id0[11:0]} + * 24-bit data with the following layout per encoding type: + * Auto - only context_id0[8:0] is used, which reports various interrupts + * generated by SQG. The rest is 0. + * Wave - user data sent from m0 via S_SENDMSG + * Error - Error type (context_id1[7:4]), Error Details (rest of bits) + * + * The other context_id bits show coordinates (SE/SH/CU/SIMD/WAVE) for wave + * S_SENDMSG and Errors. These are 0 for Auto. + */ + enum SQ_INTERRUPT_WORD_ENCODING { SQ_INTERRUPT_WORD_ENCODING_AUTO = 0x0, SQ_INTERRUPT_WORD_ENCODING_INST, @@ -84,12 +114,32 @@ enum SQ_INTERRUPT_ERROR_TYPE { #define SQ_INTERRUPT_WORD_WAVE_CTXID__SE_ID_MASK 0x03000000 #define SQ_INTERRUPT_WORD_WAVE_CTXID__ENCODING_MASK 0x0c000000 +/* GFX9 SQ interrupt 24-bit data from context_id<0,1> */ #define KFD_CONTEXT_ID_GET_SQ_INT_DATA(ctx0, ctx1) \ ((ctx0 & 0xfff) | ((ctx0 >> 16) & 0xf000) | ((ctx1 << 16) & 0xff0000)) #define KFD_SQ_INT_DATA__ERR_TYPE_MASK 0xF00000 #define KFD_SQ_INT_DATA__ERR_TYPE__SHIFT 20 +/* + * The debugger will send user data(m0) with PRIV=1 to indicate it requires + * notification from the KFD with the following queue id (DOORBELL_ID) and + * trap code (TRAP_CODE). + */ +#define KFD_INT_DATA_DEBUG_DOORBELL_MASK 0x0003ff +#define KFD_INT_DATA_DEBUG_TRAP_CODE_SHIFT 10 +#define KFD_INT_DATA_DEBUG_TRAP_CODE_MASK 0x07fc00 +#define KFD_DEBUG_DOORBELL_ID(sq_int_data) ((sq_int_data) & \ + KFD_INT_DATA_DEBUG_DOORBELL_MASK) +#define KFD_DEBUG_TRAP_CODE(sq_int_data) (((sq_int_data) & \ + KFD_INT_DATA_DEBUG_TRAP_CODE_MASK) \ + >> KFD_INT_DATA_DEBUG_TRAP_CODE_SHIFT) +#define KFD_DEBUG_CP_BAD_OP_ECODE_MASK 0x3fffc00 +#define KFD_DEBUG_CP_BAD_OP_ECODE_SHIFT 10 +#define KFD_DEBUG_CP_BAD_OP_ECODE(ctxid0) (((ctxid0) & \ + KFD_DEBUG_CP_BAD_OP_ECODE_MASK) \ + >> KFD_DEBUG_CP_BAD_OP_ECODE_SHIFT) + static void event_interrupt_poison_consumption_v9(struct kfd_node *dev, uint16_t pasid, uint16_t client_id) { @@ -168,14 +218,16 @@ static bool event_interrupt_isr_v9(struct kfd_node *dev, uint16_t source_id, client_id, pasid, vmid; const uint32_t *data = ih_ring_entry; + source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry); + client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry); + /* Only handle interrupts from KFD VMIDs */ vmid = SOC15_VMID_FROM_IH_ENTRY(ih_ring_entry); - if (vmid < dev->vm_info.first_vmid_kfd || - vmid > dev->vm_info.last_vmid_kfd) + if (!KFD_IRQ_IS_FENCE(client_id, source_id) && + (vmid < dev->vm_info.first_vmid_kfd || + vmid > dev->vm_info.last_vmid_kfd)) return false; - source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry); - client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry); pasid = SOC15_PASID_FROM_IH_ENTRY(ih_ring_entry); /* Only handle clients we care about */ @@ -194,7 +246,8 @@ static bool event_interrupt_isr_v9(struct kfd_node *dev, client_id != SOC15_IH_CLIENTID_SE0SH && client_id != SOC15_IH_CLIENTID_SE1SH && client_id != SOC15_IH_CLIENTID_SE2SH && - client_id != SOC15_IH_CLIENTID_SE3SH) + client_id != SOC15_IH_CLIENTID_SE3SH && + !KFD_IRQ_IS_FENCE(client_id, source_id)) return false; /* This is a known issue for gfx9. Under non HWS, pasid is not set @@ -247,6 +300,7 @@ static bool event_interrupt_isr_v9(struct kfd_node *dev, source_id == SOC15_INTSRC_SDMA_ECC || source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG || source_id == SOC15_INTSRC_CP_BAD_OPCODE || + KFD_IRQ_IS_FENCE(client_id, source_id) || ((client_id == SOC15_IH_CLIENTID_VMC || client_id == SOC15_IH_CLIENTID_VMC1 || client_id == SOC15_IH_CLIENTID_UTCL2) && @@ -302,6 +356,13 @@ static void event_interrupt_wq_v9(struct kfd_node *dev, REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SIMD_ID), REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, CU_ID), sq_int_data); + if (context_id0 & SQ_INTERRUPT_WORD_WAVE_CTXID__PRIV_MASK) { + if (kfd_set_dbg_ev_from_interrupt(dev, pasid, + KFD_DEBUG_DOORBELL_ID(sq_int_data), + KFD_DEBUG_TRAP_CODE(sq_int_data), + NULL, 0)) + return; + } break; case SQ_INTERRUPT_WORD_ENCODING_ERROR: sq_intr_err = REG_GET_FIELD(sq_int_data, KFD_SQ_INT_DATA, ERR_TYPE); @@ -324,8 +385,12 @@ static void event_interrupt_wq_v9(struct kfd_node *dev, break; } kfd_signal_event_interrupt(pasid, context_id0 & 0xffffff, 24); - } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) - kfd_signal_hw_exception_event(pasid); + } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) { + kfd_set_dbg_ev_from_interrupt(dev, pasid, + KFD_DEBUG_DOORBELL_ID(context_id0), + KFD_EC_MASK(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0)), + NULL, 0); + } } else if (client_id == SOC15_IH_CLIENTID_SDMA0 || client_id == SOC15_IH_CLIENTID_SDMA1 || client_id == SOC15_IH_CLIENTID_SDMA2 || @@ -345,6 +410,7 @@ static void event_interrupt_wq_v9(struct kfd_node *dev, client_id == SOC15_IH_CLIENTID_UTCL2) { struct kfd_vm_fault_info info = {0}; uint16_t ring_id = SOC15_RING_ID_FROM_IH_ENTRY(ih_ring_entry); + struct kfd_hsa_memory_exception_data exception_data; if (client_id == SOC15_IH_CLIENTID_UTCL2 && amdgpu_amdkfd_ras_query_utcl2_poison_status(dev->adev)) { @@ -360,9 +426,23 @@ static void event_interrupt_wq_v9(struct kfd_node *dev, info.prot_read = ring_id & 0x10; info.prot_write = ring_id & 0x20; + memset(&exception_data, 0, sizeof(exception_data)); + exception_data.gpu_id = dev->id; + exception_data.va = (info.page_addr) << PAGE_SHIFT; + exception_data.failure.NotPresent = info.prot_valid ? 1 : 0; + exception_data.failure.NoExecute = info.prot_exec ? 1 : 0; + exception_data.failure.ReadOnly = info.prot_write ? 1 : 0; + exception_data.failure.imprecise = 0; + + kfd_set_dbg_ev_from_interrupt(dev, + pasid, + -1, + KFD_EC_MASK(EC_DEVICE_MEMORY_VIOLATION), + &exception_data, + sizeof(exception_data)); kfd_smi_event_update_vmfault(dev, pasid); - kfd_dqm_evict_pasid(dev->dqm, pasid); - kfd_signal_vm_fault_event(dev, pasid, &info, NULL); + } else if (KFD_IRQ_IS_FENCE(client_id, source_id)) { + kfd_process_close_interrupt_drain(pasid); } } |