summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdkfd
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/Kconfig2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/Makefile3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c19
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h4097
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm296
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm1136
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm62
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c205
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_crat.c76
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_crat.h2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_debug.c70
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_debug.h5
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c76
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c311
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c858
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h45
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c75
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c43
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c43
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v12.c90
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c45
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c77
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_events.c92
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c216
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c116
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c227
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c62
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c39
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_migrate.c62
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c30
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h4
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c6
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c12
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c11
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c459
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c85
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c6
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c177
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c113
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c7
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_pasid.c70
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h8
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h124
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c289
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c169
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_queue.c385
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c88
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h9
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_svm.c292
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_svm.h18
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.c212
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.h28
-rw-r--r--drivers/gpu/drm/amd/amdkfd/soc15_int.h1
55 files changed, 8020 insertions, 3038 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig b/drivers/gpu/drm/amd/amdkfd/Kconfig
index d3c3d3ab7225..62e88e5362e9 100644
--- a/drivers/gpu/drm/amd/amdkfd/Kconfig
+++ b/drivers/gpu/drm/amd/amdkfd/Kconfig
@@ -5,7 +5,7 @@
config HSA_AMD
bool "HSA kernel driver for AMD GPU devices"
- depends on DRM_AMDGPU && (X86_64 || ARM64 || PPC64)
+ depends on DRM_AMDGPU && (X86_64 || ARM64 || PPC64 || (RISCV && 64BIT))
select HMM_MIRROR
select MMU_NOTIFIER
select DRM_AMDGPU_USERPTR
diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile
index a5ae7bcf44eb..0ce08113c9f0 100644
--- a/drivers/gpu/drm/amd/amdkfd/Makefile
+++ b/drivers/gpu/drm/amd/amdkfd/Makefile
@@ -27,7 +27,6 @@ AMDKFD_FILES := $(AMDKFD_PATH)/kfd_module.o \
$(AMDKFD_PATH)/kfd_device.o \
$(AMDKFD_PATH)/kfd_chardev.o \
$(AMDKFD_PATH)/kfd_topology.o \
- $(AMDKFD_PATH)/kfd_pasid.o \
$(AMDKFD_PATH)/kfd_doorbell.o \
$(AMDKFD_PATH)/kfd_flat_memory.o \
$(AMDKFD_PATH)/kfd_process.o \
@@ -38,6 +37,7 @@ AMDKFD_FILES := $(AMDKFD_PATH)/kfd_module.o \
$(AMDKFD_PATH)/kfd_mqd_manager_v9.o \
$(AMDKFD_PATH)/kfd_mqd_manager_v10.o \
$(AMDKFD_PATH)/kfd_mqd_manager_v11.o \
+ $(AMDKFD_PATH)/kfd_mqd_manager_v12.o \
$(AMDKFD_PATH)/kfd_kernel_queue.o \
$(AMDKFD_PATH)/kfd_packet_manager.o \
$(AMDKFD_PATH)/kfd_packet_manager_vi.o \
@@ -49,6 +49,7 @@ AMDKFD_FILES := $(AMDKFD_PATH)/kfd_module.o \
$(AMDKFD_PATH)/kfd_device_queue_manager_v9.o \
$(AMDKFD_PATH)/kfd_device_queue_manager_v10.o \
$(AMDKFD_PATH)/kfd_device_queue_manager_v11.o \
+ $(AMDKFD_PATH)/kfd_device_queue_manager_v12.o \
$(AMDKFD_PATH)/kfd_interrupt.o \
$(AMDKFD_PATH)/kfd_events.o \
$(AMDKFD_PATH)/cik_event_interrupt.o \
diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
index 795382b55e0a..73acbe0b7c21 100644
--- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
@@ -91,7 +91,6 @@ static void cik_event_interrupt_wq(struct kfd_node *dev,
const struct cik_ih_ring_entry *ihre =
(const struct cik_ih_ring_entry *)ih_ring_entry;
uint32_t context_id = ihre->data & 0xfffffff;
- unsigned int vmid = (ihre->ring_id & 0x0000ff00) >> 8;
u32 pasid = (ihre->ring_id & 0xffff0000) >> 16;
if (pasid == 0)
@@ -107,20 +106,26 @@ static void cik_event_interrupt_wq(struct kfd_node *dev,
kfd_signal_hw_exception_event(pasid);
else if (ihre->source_id == CIK_INTSRC_GFX_PAGE_INV_FAULT ||
ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT) {
+ struct kfd_process_device *pdd = NULL;
struct kfd_vm_fault_info info;
+ struct kfd_process *p;
kfd_smi_event_update_vmfault(dev, pasid);
- kfd_dqm_evict_pasid(dev->dqm, pasid);
+ p = kfd_lookup_process_by_pasid(pasid, &pdd);
+ if (!pdd)
+ return;
+
+ kfd_evict_process_device(pdd);
memset(&info, 0, sizeof(info));
amdgpu_amdkfd_gpuvm_get_vm_fault_info(dev->adev, &info);
- if (!info.page_addr && !info.status)
+ if (!info.page_addr && !info.status) {
+ kfd_unref_process(p);
return;
+ }
- if (info.vmid == vmid)
- kfd_signal_vm_fault_event(dev, pasid, &info, NULL);
- else
- kfd_signal_vm_fault_event(dev, pasid, NULL, NULL);
+ kfd_signal_vm_fault_event(pdd, &info, NULL);
+ kfd_unref_process(p);
}
}
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
index 5a0308d26b53..0320163b6e74 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
@@ -274,7 +274,7 @@ static const uint32_t cwsr_trap_gfx8_hex[] = {
static const uint32_t cwsr_trap_gfx9_hex[] = {
- 0xbf820001, 0xbf820258,
+ 0xbf820001, 0xbf820259,
0xb8f8f802, 0x8978ff78,
0x00020006, 0xb8fbf803,
0x866eff78, 0x00002000,
@@ -390,141 +390,98 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
0xbefe007c, 0xbefc0070,
0xc0611c7a, 0x0000007c,
0xbf8cc07f, 0x80708470,
- 0xbefc007e, 0x867aff7f,
- 0x04000000, 0xbeef0080,
- 0x876f6f7a, 0xb8f02a05,
- 0x80708170, 0x8e708a70,
- 0xb8fb1605, 0x807b817b,
- 0x8e7b847b, 0x8e76827b,
- 0xbef600ff, 0x01000000,
- 0xbef20174, 0x80747074,
- 0x82758075, 0xbefc0080,
- 0xbf800000, 0xbe802b00,
- 0xbe822b02, 0xbe842b04,
- 0xbe862b06, 0xbe882b08,
- 0xbe8a2b0a, 0xbe8c2b0c,
- 0xbe8e2b0e, 0xc06b003a,
- 0x00000000, 0xbf8cc07f,
- 0xc06b013a, 0x00000010,
- 0xbf8cc07f, 0xc06b023a,
- 0x00000020, 0xbf8cc07f,
- 0xc06b033a, 0x00000030,
- 0xbf8cc07f, 0x8074c074,
- 0x82758075, 0x807c907c,
- 0xbf0a7b7c, 0xbf85ffe7,
- 0xbef40172, 0xbef00080,
- 0xbefe00c1, 0xbeff00c1,
- 0xbee80080, 0xbee90080,
- 0xbef600ff, 0x01000000,
- 0x867aff78, 0x00400000,
- 0xbf850003, 0xb8faf803,
- 0x897a7aff, 0x10000000,
- 0xbf85004d, 0xbe840080,
- 0xd2890000, 0x00000900,
- 0x80048104, 0xd2890001,
- 0x00000900, 0x80048104,
- 0xd2890002, 0x00000900,
- 0x80048104, 0xd2890003,
- 0x00000900, 0x80048104,
- 0xc069003a, 0x00000070,
- 0xbf8cc07f, 0x80709070,
- 0xbf06c004, 0xbf84ffee,
+ 0xbefc007e, 0xbf108080,
+ 0x867aff7f, 0x04000000,
+ 0xbeef0080, 0x876f6f7a,
+ 0xb8f02a05, 0x80708170,
+ 0x8e708a70, 0xb8fb1605,
+ 0x807b817b, 0x8e7b847b,
+ 0x8e76827b, 0xbef600ff,
+ 0x01000000, 0xbef20174,
+ 0x80747074, 0x82758075,
+ 0xbefc0080, 0xbf800000,
+ 0xbe802b00, 0xbe822b02,
+ 0xbe842b04, 0xbe862b06,
+ 0xbe882b08, 0xbe8a2b0a,
+ 0xbe8c2b0c, 0xbe8e2b0e,
+ 0xc06b003a, 0x00000000,
+ 0xbf8cc07f, 0xc06b013a,
+ 0x00000010, 0xbf8cc07f,
+ 0xc06b023a, 0x00000020,
+ 0xbf8cc07f, 0xc06b033a,
+ 0x00000030, 0xbf8cc07f,
+ 0x8074c074, 0x82758075,
+ 0x807c907c, 0xbf0a7b7c,
+ 0xbf85ffe7, 0xbef40172,
+ 0xbef00080, 0xbefe00c1,
+ 0xbeff00c1, 0xbee80080,
+ 0xbee90080, 0xbef600ff,
+ 0x01000000, 0x867aff78,
+ 0x00400000, 0xbf850003,
+ 0xb8faf803, 0x897a7aff,
+ 0x10000000, 0xbf85004d,
0xbe840080, 0xd2890000,
- 0x00000901, 0x80048104,
- 0xd2890001, 0x00000901,
+ 0x00000900, 0x80048104,
+ 0xd2890001, 0x00000900,
0x80048104, 0xd2890002,
- 0x00000901, 0x80048104,
- 0xd2890003, 0x00000901,
+ 0x00000900, 0x80048104,
+ 0xd2890003, 0x00000900,
0x80048104, 0xc069003a,
0x00000070, 0xbf8cc07f,
0x80709070, 0xbf06c004,
0xbf84ffee, 0xbe840080,
- 0xd2890000, 0x00000902,
+ 0xd2890000, 0x00000901,
0x80048104, 0xd2890001,
- 0x00000902, 0x80048104,
- 0xd2890002, 0x00000902,
+ 0x00000901, 0x80048104,
+ 0xd2890002, 0x00000901,
0x80048104, 0xd2890003,
- 0x00000902, 0x80048104,
+ 0x00000901, 0x80048104,
0xc069003a, 0x00000070,
0xbf8cc07f, 0x80709070,
0xbf06c004, 0xbf84ffee,
0xbe840080, 0xd2890000,
- 0x00000903, 0x80048104,
- 0xd2890001, 0x00000903,
+ 0x00000902, 0x80048104,
+ 0xd2890001, 0x00000902,
0x80048104, 0xd2890002,
- 0x00000903, 0x80048104,
- 0xd2890003, 0x00000903,
+ 0x00000902, 0x80048104,
+ 0xd2890003, 0x00000902,
0x80048104, 0xc069003a,
0x00000070, 0xbf8cc07f,
0x80709070, 0xbf06c004,
- 0xbf84ffee, 0xbf820008,
- 0xe0724000, 0x701d0000,
- 0xe0724100, 0x701d0100,
- 0xe0724200, 0x701d0200,
- 0xe0724300, 0x701d0300,
- 0xbefe00c1, 0xbeff00c1,
- 0xb8fb4306, 0x867bc17b,
- 0xbf840063, 0xbf8a0000,
- 0x867aff6f, 0x04000000,
- 0xbf84005f, 0x8e7b867b,
- 0x8e7b827b, 0xbef6007b,
- 0xb8f02a05, 0x80708170,
- 0x8e708a70, 0xb8fa1605,
- 0x807a817a, 0x8e7a867a,
- 0x80707a70, 0x8070ff70,
- 0x00000080, 0xbef600ff,
- 0x01000000, 0xbefc0080,
- 0xd28c0002, 0x000100c1,
- 0xd28d0003, 0x000204c1,
- 0x867aff78, 0x00400000,
- 0xbf850003, 0xb8faf803,
- 0x897a7aff, 0x10000000,
- 0xbf850030, 0x24040682,
- 0xd86e4000, 0x00000002,
- 0xbf8cc07f, 0xbe840080,
- 0xd2890000, 0x00000900,
+ 0xbf84ffee, 0xbe840080,
+ 0xd2890000, 0x00000903,
0x80048104, 0xd2890001,
- 0x00000900, 0x80048104,
- 0xd2890002, 0x00000900,
+ 0x00000903, 0x80048104,
+ 0xd2890002, 0x00000903,
0x80048104, 0xd2890003,
- 0x00000900, 0x80048104,
+ 0x00000903, 0x80048104,
0xc069003a, 0x00000070,
0xbf8cc07f, 0x80709070,
0xbf06c004, 0xbf84ffee,
- 0xbe840080, 0xd2890000,
- 0x00000901, 0x80048104,
- 0xd2890001, 0x00000901,
- 0x80048104, 0xd2890002,
- 0x00000901, 0x80048104,
- 0xd2890003, 0x00000901,
- 0x80048104, 0xc069003a,
- 0x00000070, 0xbf8cc07f,
- 0x80709070, 0xbf06c004,
- 0xbf84ffee, 0x680404ff,
- 0x00000200, 0xd0c9006a,
- 0x0000f702, 0xbf87ffd2,
- 0xbf820015, 0xd1060002,
- 0x00011103, 0x7e0602ff,
- 0x00000200, 0xbefc00ff,
- 0x00010000, 0xbe800077,
- 0x8677ff77, 0xff7fffff,
- 0x8777ff77, 0x00058000,
- 0xd8ec0000, 0x00000002,
- 0xbf8cc07f, 0xe0765000,
- 0x701d0002, 0x68040702,
- 0xd0c9006a, 0x0000f702,
- 0xbf87fff7, 0xbef70000,
- 0xbef000ff, 0x00000400,
- 0xbefe00c1, 0xbeff00c1,
- 0xb8fb2a05, 0x807b817b,
- 0x8e7b827b, 0xbef600ff,
- 0x01000000, 0xbefc0084,
- 0xbf0a7b7c, 0xbf84006d,
- 0xbf11017c, 0x807bff7b,
- 0x00001000, 0x867aff78,
+ 0xbf820008, 0xe0724000,
+ 0x701d0000, 0xe0724100,
+ 0x701d0100, 0xe0724200,
+ 0x701d0200, 0xe0724300,
+ 0x701d0300, 0xbefe00c1,
+ 0xbeff00c1, 0xb8fb4306,
+ 0x867bc17b, 0xbf840063,
+ 0xbf8a0000, 0x867aff6f,
+ 0x04000000, 0xbf84005f,
+ 0x8e7b867b, 0x8e7b827b,
+ 0xbef6007b, 0xb8f02a05,
+ 0x80708170, 0x8e708a70,
+ 0xb8fa1605, 0x807a817a,
+ 0x8e7a867a, 0x80707a70,
+ 0x8070ff70, 0x00000080,
+ 0xbef600ff, 0x01000000,
+ 0xbefc0080, 0xd28c0002,
+ 0x000100c1, 0xd28d0003,
+ 0x000204c1, 0x867aff78,
0x00400000, 0xbf850003,
0xb8faf803, 0x897a7aff,
- 0x10000000, 0xbf850051,
+ 0x10000000, 0xbf850030,
+ 0x24040682, 0xd86e4000,
+ 0x00000002, 0xbf8cc07f,
0xbe840080, 0xd2890000,
0x00000900, 0x80048104,
0xd2890001, 0x00000900,
@@ -544,141 +501,185 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
0xc069003a, 0x00000070,
0xbf8cc07f, 0x80709070,
0xbf06c004, 0xbf84ffee,
+ 0x680404ff, 0x00000200,
+ 0xd0c9006a, 0x0000f702,
+ 0xbf87ffd2, 0xbf820015,
+ 0xd1060002, 0x00011103,
+ 0x7e0602ff, 0x00000200,
+ 0xbefc00ff, 0x00010000,
+ 0xbe800077, 0x8677ff77,
+ 0xff7fffff, 0x8777ff77,
+ 0x00058000, 0xd8ec0000,
+ 0x00000002, 0xbf8cc07f,
+ 0xe0765000, 0x701d0002,
+ 0x68040702, 0xd0c9006a,
+ 0x0000f702, 0xbf87fff7,
+ 0xbef70000, 0xbef000ff,
+ 0x00000400, 0xbefe00c1,
+ 0xbeff00c1, 0xb8fb2a05,
+ 0x807b817b, 0x8e7b827b,
+ 0xbef600ff, 0x01000000,
+ 0xbefc0084, 0xbf0a7b7c,
+ 0xbf84006d, 0xbf11017c,
+ 0x807bff7b, 0x00001000,
+ 0x867aff78, 0x00400000,
+ 0xbf850003, 0xb8faf803,
+ 0x897a7aff, 0x10000000,
+ 0xbf850051, 0xbe840080,
+ 0xd2890000, 0x00000900,
+ 0x80048104, 0xd2890001,
+ 0x00000900, 0x80048104,
+ 0xd2890002, 0x00000900,
+ 0x80048104, 0xd2890003,
+ 0x00000900, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
0xbe840080, 0xd2890000,
- 0x00000902, 0x80048104,
- 0xd2890001, 0x00000902,
+ 0x00000901, 0x80048104,
+ 0xd2890001, 0x00000901,
0x80048104, 0xd2890002,
- 0x00000902, 0x80048104,
- 0xd2890003, 0x00000902,
+ 0x00000901, 0x80048104,
+ 0xd2890003, 0x00000901,
0x80048104, 0xc069003a,
0x00000070, 0xbf8cc07f,
0x80709070, 0xbf06c004,
0xbf84ffee, 0xbe840080,
- 0xd2890000, 0x00000903,
+ 0xd2890000, 0x00000902,
0x80048104, 0xd2890001,
- 0x00000903, 0x80048104,
- 0xd2890002, 0x00000903,
+ 0x00000902, 0x80048104,
+ 0xd2890002, 0x00000902,
0x80048104, 0xd2890003,
- 0x00000903, 0x80048104,
+ 0x00000902, 0x80048104,
0xc069003a, 0x00000070,
0xbf8cc07f, 0x80709070,
0xbf06c004, 0xbf84ffee,
- 0x807c847c, 0xbf0a7b7c,
- 0xbf85ffb1, 0xbf9c0000,
- 0xbf820012, 0x7e000300,
- 0x7e020301, 0x7e040302,
- 0x7e060303, 0xe0724000,
- 0x701d0000, 0xe0724100,
- 0x701d0100, 0xe0724200,
- 0x701d0200, 0xe0724300,
- 0x701d0300, 0x807c847c,
- 0x8070ff70, 0x00000400,
- 0xbf0a7b7c, 0xbf85ffef,
- 0xbf9c0000, 0xbf8200c7,
- 0xbef4007e, 0x8675ff7f,
- 0x0000ffff, 0x8775ff75,
- 0x00040000, 0xbef60080,
- 0xbef700ff, 0x00807fac,
- 0x866eff7f, 0x04000000,
- 0xbf84001e, 0xbefe00c1,
- 0xbeff00c1, 0xb8ef4306,
- 0x866fc16f, 0xbf840019,
- 0x8e6f866f, 0x8e6f826f,
- 0xbef6006f, 0xb8f82a05,
- 0x80788178, 0x8e788a78,
- 0xb8ee1605, 0x806e816e,
- 0x8e6e866e, 0x80786e78,
- 0x8078ff78, 0x00000080,
- 0xbef600ff, 0x01000000,
- 0xbefc0080, 0xe0510000,
- 0x781d0000, 0xe0510100,
- 0x781d0000, 0x807cff7c,
- 0x00000200, 0x8078ff78,
- 0x00000200, 0xbf0a6f7c,
- 0xbf85fff6, 0xbefe00c1,
- 0xbeff00c1, 0xbef600ff,
- 0x01000000, 0xb8ef2a05,
- 0x806f816f, 0x8e6f826f,
- 0x806fff6f, 0x00008000,
- 0xbef80080, 0xbeee0078,
- 0x8078ff78, 0x00000400,
- 0xbefc0084, 0xbf11087c,
- 0xe0524000, 0x781d0000,
- 0xe0524100, 0x781d0100,
- 0xe0524200, 0x781d0200,
- 0xe0524300, 0x781d0300,
- 0xbf8c0f70, 0x7e000300,
- 0x7e020301, 0x7e040302,
- 0x7e060303, 0x807c847c,
- 0x8078ff78, 0x00000400,
- 0xbf0a6f7c, 0xbf85ffee,
- 0xbf9c0000, 0xe0524000,
- 0x6e1d0000, 0xe0524100,
- 0x6e1d0100, 0xe0524200,
- 0x6e1d0200, 0xe0524300,
- 0x6e1d0300, 0xbf8c0f70,
+ 0xbe840080, 0xd2890000,
+ 0x00000903, 0x80048104,
+ 0xd2890001, 0x00000903,
+ 0x80048104, 0xd2890002,
+ 0x00000903, 0x80048104,
+ 0xd2890003, 0x00000903,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0x807c847c,
+ 0xbf0a7b7c, 0xbf85ffb1,
+ 0xbf9c0000, 0xbf820012,
+ 0x7e000300, 0x7e020301,
+ 0x7e040302, 0x7e060303,
+ 0xe0724000, 0x701d0000,
+ 0xe0724100, 0x701d0100,
+ 0xe0724200, 0x701d0200,
+ 0xe0724300, 0x701d0300,
+ 0x807c847c, 0x8070ff70,
+ 0x00000400, 0xbf0a7b7c,
+ 0xbf85ffef, 0xbf9c0000,
+ 0xbf8200c7, 0xbef4007e,
+ 0x8675ff7f, 0x0000ffff,
+ 0x8775ff75, 0x00040000,
+ 0xbef60080, 0xbef700ff,
+ 0x00807fac, 0x866eff7f,
+ 0x04000000, 0xbf84001e,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xb8ef4306, 0x866fc16f,
+ 0xbf840019, 0x8e6f866f,
+ 0x8e6f826f, 0xbef6006f,
0xb8f82a05, 0x80788178,
0x8e788a78, 0xb8ee1605,
0x806e816e, 0x8e6e866e,
- 0x80786e78, 0x80f8c078,
- 0xb8ef1605, 0x806f816f,
- 0x8e6f846f, 0x8e76826f,
+ 0x80786e78, 0x8078ff78,
+ 0x00000080, 0xbef600ff,
+ 0x01000000, 0xbefc0080,
+ 0xe0510000, 0x781d0000,
+ 0xe0510100, 0x781d0000,
+ 0x807cff7c, 0x00000200,
+ 0x8078ff78, 0x00000200,
+ 0xbf0a6f7c, 0xbf85fff6,
+ 0xbefe00c1, 0xbeff00c1,
0xbef600ff, 0x01000000,
- 0xbefc006f, 0xc031003a,
- 0x00000078, 0x80f8c078,
- 0xbf8cc07f, 0x80fc907c,
- 0xbf800000, 0xbe802d00,
- 0xbe822d02, 0xbe842d04,
- 0xbe862d06, 0xbe882d08,
- 0xbe8a2d0a, 0xbe8c2d0c,
- 0xbe8e2d0e, 0xbf06807c,
- 0xbf84fff0, 0xb8f82a05,
+ 0xb8ef2a05, 0x806f816f,
+ 0x8e6f826f, 0x806fff6f,
+ 0x00008000, 0xbef80080,
+ 0xbeee0078, 0x8078ff78,
+ 0x00000400, 0xbefc0084,
+ 0xbf11087c, 0xe0524000,
+ 0x781d0000, 0xe0524100,
+ 0x781d0100, 0xe0524200,
+ 0x781d0200, 0xe0524300,
+ 0x781d0300, 0xbf8c0f70,
+ 0x7e000300, 0x7e020301,
+ 0x7e040302, 0x7e060303,
+ 0x807c847c, 0x8078ff78,
+ 0x00000400, 0xbf0a6f7c,
+ 0xbf85ffee, 0xbf9c0000,
+ 0xe0524000, 0x6e1d0000,
+ 0xe0524100, 0x6e1d0100,
+ 0xe0524200, 0x6e1d0200,
+ 0xe0524300, 0x6e1d0300,
+ 0xbf8c0f70, 0xb8f82a05,
0x80788178, 0x8e788a78,
0xb8ee1605, 0x806e816e,
0x8e6e866e, 0x80786e78,
- 0xbef60084, 0xbef600ff,
- 0x01000000, 0xc0211bfa,
+ 0x80f8c078, 0xb8ef1605,
+ 0x806f816f, 0x8e6f846f,
+ 0x8e76826f, 0xbef600ff,
+ 0x01000000, 0xbefc006f,
+ 0xc031003a, 0x00000078,
+ 0x80f8c078, 0xbf8cc07f,
+ 0x80fc907c, 0xbf800000,
+ 0xbe802d00, 0xbe822d02,
+ 0xbe842d04, 0xbe862d06,
+ 0xbe882d08, 0xbe8a2d0a,
+ 0xbe8c2d0c, 0xbe8e2d0e,
+ 0xbf06807c, 0xbf84fff0,
+ 0xb8f82a05, 0x80788178,
+ 0x8e788a78, 0xb8ee1605,
+ 0x806e816e, 0x8e6e866e,
+ 0x80786e78, 0xbef60084,
+ 0xbef600ff, 0x01000000,
+ 0xc0211bfa, 0x00000078,
+ 0x80788478, 0xc0211b3a,
0x00000078, 0x80788478,
- 0xc0211b3a, 0x00000078,
- 0x80788478, 0xc0211b7a,
+ 0xc0211b7a, 0x00000078,
+ 0x80788478, 0xc0211c3a,
0x00000078, 0x80788478,
- 0xc0211c3a, 0x00000078,
- 0x80788478, 0xc0211c7a,
+ 0xc0211c7a, 0x00000078,
+ 0x80788478, 0xc0211eba,
0x00000078, 0x80788478,
- 0xc0211eba, 0x00000078,
- 0x80788478, 0xc0211efa,
+ 0xc0211efa, 0x00000078,
+ 0x80788478, 0xc0211a3a,
0x00000078, 0x80788478,
- 0xc0211a3a, 0x00000078,
- 0x80788478, 0xc0211a7a,
+ 0xc0211a7a, 0x00000078,
+ 0x80788478, 0xc0211cfa,
0x00000078, 0x80788478,
- 0xc0211cfa, 0x00000078,
- 0x80788478, 0xbf8cc07f,
- 0xbefc006f, 0xbefe0070,
- 0xbeff0071, 0x866f7bff,
- 0x000003ff, 0xb96f4803,
- 0x866f7bff, 0xfffff800,
- 0x8f6f8b6f, 0xb96fa2c3,
- 0xb973f801, 0xb8ee2a05,
- 0x806e816e, 0x8e6e8a6e,
- 0xb8ef1605, 0x806f816f,
- 0x8e6f866f, 0x806e6f6e,
- 0x806e746e, 0x826f8075,
- 0x866fff6f, 0x0000ffff,
- 0xc00b1c37, 0x00000050,
- 0xc00b1d37, 0x00000060,
- 0xc0031e77, 0x00000074,
- 0xbf8cc07f, 0x8f6e8b77,
- 0x866eff6e, 0x001f8000,
- 0xb96ef807, 0x866dff6d,
- 0x0000ffff, 0x86fe7e7e,
- 0x86ea6a6a, 0x8f6e837a,
- 0xb96ee0c2, 0xbf800002,
- 0xb97a0002, 0xbf8a0000,
- 0xbe801f6c, 0xbf9b0000,
+ 0xbf8cc07f, 0xbefc006f,
+ 0xbefe0070, 0xbeff0071,
+ 0x866f7bff, 0x000003ff,
+ 0xb96f4803, 0x866f7bff,
+ 0xfffff800, 0x8f6f8b6f,
+ 0xb96fa2c3, 0xb973f801,
+ 0xb8ee2a05, 0x806e816e,
+ 0x8e6e8a6e, 0xb8ef1605,
+ 0x806f816f, 0x8e6f866f,
+ 0x806e6f6e, 0x806e746e,
+ 0x826f8075, 0x866fff6f,
+ 0x0000ffff, 0xc00b1c37,
+ 0x00000050, 0xc00b1d37,
+ 0x00000060, 0xc0031e77,
+ 0x00000074, 0xbf8cc07f,
+ 0x8f6e8b77, 0x866eff6e,
+ 0x001f8000, 0xb96ef807,
+ 0x866dff6d, 0x0000ffff,
+ 0x86fe7e7e, 0x86ea6a6a,
+ 0x8f6e837a, 0xb96ee0c2,
+ 0xbf800002, 0xb97a0002,
+ 0xbf8a0000, 0xbe801f6c,
+ 0xbf9b0000, 0x00000000,
};
static const uint32_t cwsr_trap_nv1x_hex[] = {
- 0xbf820001, 0xbf820394,
+ 0xbf820001, 0xbf820393,
0xb0804004, 0xb978f802,
0x8a78ff78, 0x00020006,
0xb97bf803, 0x876eff78,
@@ -711,19 +712,19 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0xbf0d8f7b, 0xbf840002,
0x887bff7b, 0xffff0000,
0xf4011bbd, 0xfa000010,
- 0xbf8cc07f, 0x8f6e976e,
+ 0xbf8c0000, 0x8f6e976e,
0x8a77ff77, 0x00800000,
0x88776e77, 0xf4051bbd,
- 0xfa000000, 0xbf8cc07f,
+ 0xfa000000, 0xbf8c0000,
0xf4051ebd, 0xfa000008,
- 0xbf8cc07f, 0x87ee6e6e,
+ 0xbf8c0000, 0x87ee6e6e,
0xbf840001, 0xbe80206e,
- 0x876eff6d, 0x01ff0000,
- 0xbf850005, 0x8878ff78,
- 0x00002000, 0x80ec886c,
- 0x82ed806d, 0xbf820005,
- 0x876eff6d, 0x01000000,
- 0xbf850002, 0x806c846c,
+ 0x876eff6d, 0x00ff0000,
+ 0xbf850008, 0x876eff6d,
+ 0x01000000, 0xbf850007,
+ 0x8878ff78, 0x00002000,
+ 0x80ec886c, 0x82ed806d,
+ 0xbf820002, 0x806c846c,
0x826d806d, 0x876dff6d,
0x0000ffff, 0x907a8977,
0x877bff7a, 0x003f8000,
@@ -932,23 +933,48 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0xbf850002, 0xbeff0380,
0xbf820001, 0xbeff03c1,
0xb97b4306, 0x877bc17b,
- 0xbf840086, 0xbf8a0000,
+ 0xbf840085, 0xbf8a0000,
0x877aff6d, 0x80000000,
- 0xbf840082, 0x8f7b867b,
- 0x8f7b827b, 0xbef6037b,
- 0xb9703a05, 0x80708170,
- 0xbf0d9973, 0xbf850002,
- 0x8f708970, 0xbf820001,
- 0x8f708a70, 0xb97a1e06,
- 0x8f7a8a7a, 0x80707a70,
- 0x8070ff70, 0x00000200,
- 0x8070ff70, 0x00000080,
- 0xbef603ff, 0x01000000,
- 0xd7650000, 0x000100c1,
- 0xd7660000, 0x000200c1,
- 0x16000084, 0x907c9973,
- 0x877c817c, 0xbf06817c,
- 0xbefc0380, 0xbf850033,
+ 0xbf840081, 0x8f7b887b,
+ 0xbef6037b, 0xb9703a05,
+ 0x80708170, 0xbf0d9973,
+ 0xbf850002, 0x8f708970,
+ 0xbf820001, 0x8f708a70,
+ 0xb97a1e06, 0x8f7a8a7a,
+ 0x80707a70, 0x8070ff70,
+ 0x00000200, 0x8070ff70,
+ 0x00000080, 0xbef603ff,
+ 0x01000000, 0xd7650000,
+ 0x000100c1, 0xd7660000,
+ 0x000200c1, 0x16000084,
+ 0x907c9973, 0x877c817c,
+ 0xbf06817c, 0xbefc0380,
+ 0xbf850033, 0xb97af803,
+ 0x8a7a7aff, 0x10000000,
+ 0xbf85001d, 0xd8d80000,
+ 0x01000000, 0xbf8c0000,
+ 0xbe840380, 0xd7600000,
+ 0x00000901, 0x80048104,
+ 0xd7600001, 0x00000901,
+ 0x80048104, 0xd7600002,
+ 0x00000901, 0x80048104,
+ 0xd7600003, 0x00000901,
+ 0x80048104, 0xf469003a,
+ 0xe0000000, 0x80709070,
+ 0xbf06a004, 0xbf84ffef,
+ 0x807cff7c, 0x00000080,
+ 0xd5250000, 0x0001ff00,
+ 0x00000080, 0xbf0a7b7c,
+ 0xbf85ffe4, 0xbf820044,
+ 0xbe8303ff, 0x00000080,
+ 0xbf800000, 0xbf800000,
+ 0xbf800000, 0xd8d80000,
+ 0x01000000, 0xbf8c0000,
+ 0xe0704000, 0x705d0100,
+ 0x807c037c, 0x80700370,
+ 0xd5250000, 0x0001ff00,
+ 0x00000080, 0xbf0a7b7c,
+ 0xbf85fff4, 0xbf820032,
0xb97af803, 0x8a7a7aff,
0x10000000, 0xbf85001d,
0xd8d80000, 0x01000000,
@@ -960,24 +986,45 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0x80048104, 0xd7600003,
0x00000901, 0x80048104,
0xf469003a, 0xe0000000,
- 0x80709070, 0xbf06a004,
+ 0x80709070, 0xbf06c004,
0xbf84ffef, 0x807cff7c,
- 0x00000080, 0xd5250000,
- 0x0001ff00, 0x00000080,
+ 0x00000100, 0xd5250000,
+ 0x0001ff00, 0x00000100,
0xbf0a7b7c, 0xbf85ffe4,
- 0xbf820044, 0xbe8303ff,
- 0x00000080, 0xbf800000,
+ 0xbf820011, 0xbe8303ff,
+ 0x00000100, 0xbf800000,
0xbf800000, 0xbf800000,
0xd8d80000, 0x01000000,
0xbf8c0000, 0xe0704000,
0x705d0100, 0x807c037c,
0x80700370, 0xd5250000,
- 0x0001ff00, 0x00000080,
+ 0x0001ff00, 0x00000100,
0xbf0a7b7c, 0xbf85fff4,
- 0xbf820032, 0xb97af803,
- 0x8a7a7aff, 0x10000000,
- 0xbf85001d, 0xd8d80000,
- 0x01000000, 0xbf8c0000,
+ 0xbefe03c1, 0x907c9973,
+ 0x877c817c, 0xbf06817c,
+ 0xbf850004, 0xbef003ff,
+ 0x00000200, 0xbeff0380,
+ 0xbf820003, 0xbef003ff,
+ 0x00000400, 0xbeff03c1,
+ 0xb97b3a05, 0x807b817b,
+ 0x8f7b827b, 0x907c9973,
+ 0x877c817c, 0xbf06817c,
+ 0xbf85006b, 0xbef603ff,
+ 0x01000000, 0xbefc0384,
+ 0xbf0a7b7c, 0xbf8400fa,
+ 0xb97af803, 0x8a7a7aff,
+ 0x10000000, 0xbf850050,
+ 0x7e008700, 0x7e028701,
+ 0x7e048702, 0x7e068703,
+ 0xbe840380, 0xd7600000,
+ 0x00000900, 0x80048104,
+ 0xd7600001, 0x00000900,
+ 0x80048104, 0xd7600002,
+ 0x00000900, 0x80048104,
+ 0xd7600003, 0x00000900,
+ 0x80048104, 0xf469003a,
+ 0xe0000000, 0x80709070,
+ 0xbf06a004, 0xbf84ffef,
0xbe840380, 0xd7600000,
0x00000901, 0x80048104,
0xd7600001, 0x00000901,
@@ -986,32 +1033,39 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0xd7600003, 0x00000901,
0x80048104, 0xf469003a,
0xe0000000, 0x80709070,
- 0xbf06c004, 0xbf84ffef,
- 0x807cff7c, 0x00000100,
- 0xd5250000, 0x0001ff00,
- 0x00000100, 0xbf0a7b7c,
- 0xbf85ffe4, 0xbf820011,
- 0xbe8303ff, 0x00000100,
- 0xbf800000, 0xbf800000,
- 0xbf800000, 0xd8d80000,
- 0x01000000, 0xbf8c0000,
- 0xe0704000, 0x705d0100,
- 0x807c037c, 0x80700370,
- 0xd5250000, 0x0001ff00,
- 0x00000100, 0xbf0a7b7c,
- 0xbf85fff4, 0xbefe03c1,
- 0x907c9973, 0x877c817c,
- 0xbf06817c, 0xbf850004,
- 0xbef003ff, 0x00000200,
- 0xbeff0380, 0xbf820003,
- 0xbef003ff, 0x00000400,
- 0xbeff03c1, 0xb97b3a05,
- 0x807b817b, 0x8f7b827b,
- 0x907c9973, 0x877c817c,
- 0xbf06817c, 0xbf85006b,
+ 0xbf06a004, 0xbf84ffef,
+ 0xbe840380, 0xd7600000,
+ 0x00000902, 0x80048104,
+ 0xd7600001, 0x00000902,
+ 0x80048104, 0xd7600002,
+ 0x00000902, 0x80048104,
+ 0xd7600003, 0x00000902,
+ 0x80048104, 0xf469003a,
+ 0xe0000000, 0x80709070,
+ 0xbf06a004, 0xbf84ffef,
+ 0xbe840380, 0xd7600000,
+ 0x00000903, 0x80048104,
+ 0xd7600001, 0x00000903,
+ 0x80048104, 0xd7600002,
+ 0x00000903, 0x80048104,
+ 0xd7600003, 0x00000903,
+ 0x80048104, 0xf469003a,
+ 0xe0000000, 0x80709070,
+ 0xbf06a004, 0xbf84ffef,
+ 0x807c847c, 0xbf0a7b7c,
+ 0xbf85ffb1, 0xbf8200a6,
+ 0x7e008700, 0x7e028701,
+ 0x7e048702, 0x7e068703,
+ 0xe0704000, 0x705d0000,
+ 0xe0704080, 0x705d0100,
+ 0xe0704100, 0x705d0200,
+ 0xe0704180, 0x705d0300,
+ 0x807c847c, 0x8070ff70,
+ 0x00000200, 0xbf0a7b7c,
+ 0xbf85ffef, 0xbf820094,
0xbef603ff, 0x01000000,
0xbefc0384, 0xbf0a7b7c,
- 0xbf8400fa, 0xb97af803,
+ 0xbf840065, 0xb97af803,
0x8a7a7aff, 0x10000000,
0xbf850050, 0x7e008700,
0x7e028701, 0x7e048702,
@@ -1023,7 +1077,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0x80048104, 0xd7600003,
0x00000900, 0x80048104,
0xf469003a, 0xe0000000,
- 0x80709070, 0xbf06a004,
+ 0x80709070, 0xbf06c004,
0xbf84ffef, 0xbe840380,
0xd7600000, 0x00000901,
0x80048104, 0xd7600001,
@@ -1032,7 +1086,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0x80048104, 0xd7600003,
0x00000901, 0x80048104,
0xf469003a, 0xe0000000,
- 0x80709070, 0xbf06a004,
+ 0x80709070, 0xbf06c004,
0xbf84ffef, 0xbe840380,
0xd7600000, 0x00000902,
0x80048104, 0xd7600001,
@@ -1041,7 +1095,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0x80048104, 0xd7600003,
0x00000902, 0x80048104,
0xf469003a, 0xe0000000,
- 0x80709070, 0xbf06a004,
+ 0x80709070, 0xbf06c004,
0xbf84ffef, 0xbe840380,
0xd7600000, 0x00000903,
0x80048104, 0xd7600001,
@@ -1050,25 +1104,24 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0x80048104, 0xd7600003,
0x00000903, 0x80048104,
0xf469003a, 0xe0000000,
- 0x80709070, 0xbf06a004,
+ 0x80709070, 0xbf06c004,
0xbf84ffef, 0x807c847c,
0xbf0a7b7c, 0xbf85ffb1,
- 0xbf8200a6, 0x7e008700,
+ 0xbf82003b, 0x7e008700,
0x7e028701, 0x7e048702,
0x7e068703, 0xe0704000,
- 0x705d0000, 0xe0704080,
- 0x705d0100, 0xe0704100,
- 0x705d0200, 0xe0704180,
+ 0x705d0000, 0xe0704100,
+ 0x705d0100, 0xe0704200,
+ 0x705d0200, 0xe0704300,
0x705d0300, 0x807c847c,
- 0x8070ff70, 0x00000200,
+ 0x8070ff70, 0x00000400,
0xbf0a7b7c, 0xbf85ffef,
- 0xbf820094, 0xbef603ff,
- 0x01000000, 0xbefc0384,
- 0xbf0a7b7c, 0xbf840065,
- 0xb97af803, 0x8a7a7aff,
- 0x10000000, 0xbf850050,
- 0x7e008700, 0x7e028701,
- 0x7e048702, 0x7e068703,
+ 0xb97b1e06, 0x877bc17b,
+ 0xbf840027, 0x8f7b837b,
+ 0x807b7c7b, 0xbefe03c1,
+ 0xbeff0380, 0xb97af803,
+ 0x8a7a7aff, 0x10000000,
+ 0xbf850017, 0x7e008700,
0xbe840380, 0xd7600000,
0x00000900, 0x80048104,
0xd7600001, 0x00000900,
@@ -1078,78 +1131,25 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0x80048104, 0xf469003a,
0xe0000000, 0x80709070,
0xbf06c004, 0xbf84ffef,
- 0xbe840380, 0xd7600000,
- 0x00000901, 0x80048104,
- 0xd7600001, 0x00000901,
- 0x80048104, 0xd7600002,
- 0x00000901, 0x80048104,
- 0xd7600003, 0x00000901,
- 0x80048104, 0xf469003a,
- 0xe0000000, 0x80709070,
- 0xbf06c004, 0xbf84ffef,
- 0xbe840380, 0xd7600000,
- 0x00000902, 0x80048104,
- 0xd7600001, 0x00000902,
- 0x80048104, 0xd7600002,
- 0x00000902, 0x80048104,
- 0xd7600003, 0x00000902,
- 0x80048104, 0xf469003a,
- 0xe0000000, 0x80709070,
- 0xbf06c004, 0xbf84ffef,
- 0xbe840380, 0xd7600000,
- 0x00000903, 0x80048104,
- 0xd7600001, 0x00000903,
- 0x80048104, 0xd7600002,
- 0x00000903, 0x80048104,
- 0xd7600003, 0x00000903,
- 0x80048104, 0xf469003a,
- 0xe0000000, 0x80709070,
- 0xbf06c004, 0xbf84ffef,
- 0x807c847c, 0xbf0a7b7c,
- 0xbf85ffb1, 0xbf82003b,
- 0x7e008700, 0x7e028701,
- 0x7e048702, 0x7e068703,
- 0xe0704000, 0x705d0000,
- 0xe0704100, 0x705d0100,
- 0xe0704200, 0x705d0200,
- 0xe0704300, 0x705d0300,
- 0x807c847c, 0x8070ff70,
- 0x00000400, 0xbf0a7b7c,
- 0xbf85ffef, 0xb97b1e06,
- 0x877bc17b, 0xbf840027,
- 0x8f7b837b, 0x807b7c7b,
- 0xbefe03c1, 0xbeff0380,
- 0xb97af803, 0x8a7a7aff,
- 0x10000000, 0xbf850017,
- 0x7e008700, 0xbe840380,
- 0xd7600000, 0x00000900,
- 0x80048104, 0xd7600001,
- 0x00000900, 0x80048104,
- 0xd7600002, 0x00000900,
- 0x80048104, 0xd7600003,
- 0x00000900, 0x80048104,
- 0xf469003a, 0xe0000000,
- 0x80709070, 0xbf06c004,
- 0xbf84ffef, 0x807c817c,
- 0xbf0a7b7c, 0xbf85ffea,
- 0xbf820008, 0x7e008700,
- 0xe0704000, 0x705d0000,
- 0x807c817c, 0x8070ff70,
- 0x00000080, 0xbf0a7b7c,
- 0xbf85fff8, 0xbf820144,
- 0xbef4037e, 0x8775ff7f,
- 0x0000ffff, 0x8875ff75,
- 0x00040000, 0xbef60380,
- 0xbef703ff, 0x10807fac,
- 0xb97202dc, 0x8f729972,
- 0x876eff7f, 0x04000000,
- 0xbf840034, 0xbefe03c1,
- 0x907c9972, 0x877c817c,
- 0xbf06817c, 0xbf850002,
- 0xbeff0380, 0xbf820001,
- 0xbeff03c1, 0xb96f4306,
- 0x876fc16f, 0xbf840029,
- 0x8f6f866f, 0x8f6f826f,
+ 0x807c817c, 0xbf0a7b7c,
+ 0xbf85ffea, 0xbf820008,
+ 0x7e008700, 0xe0704000,
+ 0x705d0000, 0x807c817c,
+ 0x8070ff70, 0x00000080,
+ 0xbf0a7b7c, 0xbf85fff8,
+ 0xbf82013f, 0xbef4037e,
+ 0x8775ff7f, 0x0000ffff,
+ 0x8875ff75, 0x00040000,
+ 0xbef60380, 0xbef703ff,
+ 0x10807fac, 0xb97202dc,
+ 0x8f729972, 0x876eff7f,
+ 0x04000000, 0xbf840033,
+ 0xbefe03c1, 0x907c9972,
+ 0x877c817c, 0xbf06817c,
+ 0xbf850002, 0xbeff0380,
+ 0xbf820001, 0xbeff03c1,
+ 0xb96f4306, 0x876fc16f,
+ 0xbf840028, 0x8f6f886f,
0xbef6036f, 0xb9783a05,
0x80788178, 0xbf0d9972,
0xbf850002, 0x8f788978,
@@ -1185,7 +1185,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0x785d0000, 0xe0304080,
0x785d0100, 0xe0304100,
0x785d0200, 0xe0304180,
- 0x785d0300, 0xbf8c3f70,
+ 0x785d0300, 0xbf8c0000,
0x7e008500, 0x7e028501,
0x7e048502, 0x7e068503,
0x807c847c, 0x8078ff78,
@@ -1194,7 +1194,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0x6e5d0000, 0xe0304080,
0x6e5d0100, 0xe0304100,
0x6e5d0200, 0xe0304180,
- 0x6e5d0300, 0xbf8c3f70,
+ 0x6e5d0300, 0xbf8c0000,
0xbf820034, 0xbef603ff,
0x01000000, 0xbeee0378,
0x8078ff78, 0x00000400,
@@ -1203,7 +1203,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0x785d0000, 0xe0304100,
0x785d0100, 0xe0304200,
0x785d0200, 0xe0304300,
- 0x785d0300, 0xbf8c3f70,
+ 0x785d0300, 0xbf8c0000,
0x7e008500, 0x7e028501,
0x7e048502, 0x7e068503,
0x807c847c, 0x8078ff78,
@@ -1213,7 +1213,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0x8f6f836f, 0x806f7c6f,
0xbefe03c1, 0xbeff0380,
0xe0304000, 0x785d0000,
- 0xbf8c3f70, 0x7e008500,
+ 0xbf8c0000, 0x7e008500,
0x807c817c, 0x8078ff78,
0x00000080, 0xbf0a6f7c,
0xbf85fff7, 0xbeff03c1,
@@ -1221,7 +1221,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0xe0304100, 0x6e5d0100,
0xe0304200, 0x6e5d0200,
0xe0304300, 0x6e5d0300,
- 0xbf8c3f70, 0xb9783a05,
+ 0xbf8c0000, 0xb9783a05,
0x80788178, 0xbf0d9972,
0xbf850002, 0x8f788978,
0xbf820001, 0x8f788a78,
@@ -1232,16 +1232,16 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0x01000000, 0xbefc03ff,
0x0000006c, 0x80f89078,
0xf429003a, 0xf0000000,
- 0xbf8cc07f, 0x80fc847c,
+ 0xbf8c0000, 0x80fc847c,
0xbf800000, 0xbe803100,
0xbe823102, 0x80f8a078,
0xf42d003a, 0xf0000000,
- 0xbf8cc07f, 0x80fc887c,
+ 0xbf8c0000, 0x80fc887c,
0xbf800000, 0xbe803100,
0xbe823102, 0xbe843104,
0xbe863106, 0x80f8c078,
0xf431003a, 0xf0000000,
- 0xbf8cc07f, 0x80fc907c,
+ 0xbf8c0000, 0x80fc907c,
0xbf800000, 0xbe803100,
0xbe823102, 0xbe843104,
0xbe863106, 0xbe883108,
@@ -1271,15 +1271,13 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0xf4211cfa, 0xf0000000,
0x80788478, 0xf4211bba,
0xf0000000, 0x80788478,
- 0xbf8cc07f, 0xb9eef814,
+ 0xbf8c0000, 0xb9eef814,
0xf4211bba, 0xf0000000,
- 0x80788478, 0xbf8cc07f,
+ 0x80788478, 0xbf8c0000,
0xb9eef815, 0xbefc036f,
0xbefe0370, 0xbeff0371,
- 0x876f7bff, 0x000003ff,
- 0xb9ef4803, 0xb9f9f816,
- 0x876f7bff, 0xfffff800,
- 0x906f8b6f, 0xb9efa2c3,
+ 0xb9f9f816, 0xb9fb4803,
+ 0x907b8b7b, 0xb9fba2c3,
0xb9f3f801, 0xb96e3a05,
0x806e816e, 0xbf0d9972,
0xbf850002, 0x8f6e896e,
@@ -1291,7 +1289,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0x0000ffff, 0xf4091c37,
0xfa000050, 0xf4091d37,
0xfa000060, 0xf4011e77,
- 0xfa000074, 0xbf8cc07f,
+ 0xfa000074, 0xbf8c0000,
0x906e8977, 0x876fff6e,
0x003f8000, 0x906e8677,
0x876eff6e, 0x02000000,
@@ -1305,7 +1303,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
};
static const uint32_t cwsr_trap_arcturus_hex[] = {
- 0xbf820001, 0xbf8202d4,
+ 0xbf820001, 0xbf8202d5,
0xb8f8f802, 0x8978ff78,
0x00020006, 0xb8fbf803,
0x866eff78, 0x00002000,
@@ -1422,99 +1420,37 @@ static const uint32_t cwsr_trap_arcturus_hex[] = {
0xbefe007c, 0xbefc0070,
0xc0611c7a, 0x0000007c,
0xbf8cc07f, 0x80708470,
- 0xbefc007e, 0x867aff7f,
- 0x04000000, 0xbeef0080,
- 0x876f6f7a, 0xb8f02a05,
- 0x80708170, 0x8e708a70,
- 0x8e708170, 0xb8fb1605,
- 0x807b817b, 0x8e7b847b,
- 0x8e76827b, 0xbef600ff,
- 0x01000000, 0xbef20174,
- 0x80747074, 0x82758075,
- 0xbefc0080, 0xbf800000,
- 0xbe802b00, 0xbe822b02,
- 0xbe842b04, 0xbe862b06,
- 0xbe882b08, 0xbe8a2b0a,
- 0xbe8c2b0c, 0xbe8e2b0e,
- 0xc06b003a, 0x00000000,
- 0xbf8cc07f, 0xc06b013a,
- 0x00000010, 0xbf8cc07f,
- 0xc06b023a, 0x00000020,
- 0xbf8cc07f, 0xc06b033a,
- 0x00000030, 0xbf8cc07f,
- 0x8074c074, 0x82758075,
- 0x807c907c, 0xbf0a7b7c,
- 0xbf85ffe7, 0xbef40172,
- 0xbef00080, 0xbefe00c1,
- 0xbeff00c1, 0xbee80080,
- 0xbee90080, 0xbef600ff,
- 0x01000000, 0x867aff78,
- 0x00400000, 0xbf850003,
- 0xb8faf803, 0x897a7aff,
- 0x10000000, 0xbf85004d,
- 0xbe840080, 0xd2890000,
- 0x00000900, 0x80048104,
- 0xd2890001, 0x00000900,
- 0x80048104, 0xd2890002,
- 0x00000900, 0x80048104,
- 0xd2890003, 0x00000900,
- 0x80048104, 0xc069003a,
- 0x00000070, 0xbf8cc07f,
- 0x80709070, 0xbf06c004,
- 0xbf84ffee, 0xbe840080,
- 0xd2890000, 0x00000901,
- 0x80048104, 0xd2890001,
- 0x00000901, 0x80048104,
- 0xd2890002, 0x00000901,
- 0x80048104, 0xd2890003,
- 0x00000901, 0x80048104,
- 0xc069003a, 0x00000070,
- 0xbf8cc07f, 0x80709070,
- 0xbf06c004, 0xbf84ffee,
- 0xbe840080, 0xd2890000,
- 0x00000902, 0x80048104,
- 0xd2890001, 0x00000902,
- 0x80048104, 0xd2890002,
- 0x00000902, 0x80048104,
- 0xd2890003, 0x00000902,
- 0x80048104, 0xc069003a,
- 0x00000070, 0xbf8cc07f,
- 0x80709070, 0xbf06c004,
- 0xbf84ffee, 0xbe840080,
- 0xd2890000, 0x00000903,
- 0x80048104, 0xd2890001,
- 0x00000903, 0x80048104,
- 0xd2890002, 0x00000903,
- 0x80048104, 0xd2890003,
- 0x00000903, 0x80048104,
- 0xc069003a, 0x00000070,
- 0xbf8cc07f, 0x80709070,
- 0xbf06c004, 0xbf84ffee,
- 0xbf820008, 0xe0724000,
- 0x701d0000, 0xe0724100,
- 0x701d0100, 0xe0724200,
- 0x701d0200, 0xe0724300,
- 0x701d0300, 0xbefe00c1,
- 0xbeff00c1, 0xb8fb4306,
- 0x867bc17b, 0xbf840064,
- 0xbf8a0000, 0x867aff6f,
- 0x04000000, 0xbf840060,
- 0x8e7b867b, 0x8e7b827b,
- 0xbef6007b, 0xb8f02a05,
- 0x80708170, 0x8e708a70,
- 0x8e708170, 0xb8fa1605,
- 0x807a817a, 0x8e7a867a,
- 0x80707a70, 0x8070ff70,
- 0x00000080, 0xbef600ff,
- 0x01000000, 0xbefc0080,
- 0xd28c0002, 0x000100c1,
- 0xd28d0003, 0x000204c1,
+ 0xbefc007e, 0xbf108080,
+ 0x867aff7f, 0x04000000,
+ 0xbeef0080, 0x876f6f7a,
+ 0xb8f02a05, 0x80708170,
+ 0x8e708a70, 0x8e708170,
+ 0xb8fb1605, 0x807b817b,
+ 0x8e7b847b, 0x8e76827b,
+ 0xbef600ff, 0x01000000,
+ 0xbef20174, 0x80747074,
+ 0x82758075, 0xbefc0080,
+ 0xbf800000, 0xbe802b00,
+ 0xbe822b02, 0xbe842b04,
+ 0xbe862b06, 0xbe882b08,
+ 0xbe8a2b0a, 0xbe8c2b0c,
+ 0xbe8e2b0e, 0xc06b003a,
+ 0x00000000, 0xbf8cc07f,
+ 0xc06b013a, 0x00000010,
+ 0xbf8cc07f, 0xc06b023a,
+ 0x00000020, 0xbf8cc07f,
+ 0xc06b033a, 0x00000030,
+ 0xbf8cc07f, 0x8074c074,
+ 0x82758075, 0x807c907c,
+ 0xbf0a7b7c, 0xbf85ffe7,
+ 0xbef40172, 0xbef00080,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xbee80080, 0xbee90080,
+ 0xbef600ff, 0x01000000,
0x867aff78, 0x00400000,
0xbf850003, 0xb8faf803,
0x897a7aff, 0x10000000,
- 0xbf850030, 0x24040682,
- 0xd86e4000, 0x00000002,
- 0xbf8cc07f, 0xbe840080,
+ 0xbf85004d, 0xbe840080,
0xd2890000, 0x00000900,
0x80048104, 0xd2890001,
0x00000900, 0x80048104,
@@ -1533,31 +1469,50 @@ static const uint32_t cwsr_trap_arcturus_hex[] = {
0x80048104, 0xc069003a,
0x00000070, 0xbf8cc07f,
0x80709070, 0xbf06c004,
- 0xbf84ffee, 0x680404ff,
- 0x00000200, 0xd0c9006a,
- 0x0000f702, 0xbf87ffd2,
- 0xbf820015, 0xd1060002,
- 0x00011103, 0x7e0602ff,
- 0x00000200, 0xbefc00ff,
- 0x00010000, 0xbe800077,
- 0x8677ff77, 0xff7fffff,
- 0x8777ff77, 0x00058000,
- 0xd8ec0000, 0x00000002,
- 0xbf8cc07f, 0xe0765000,
- 0x701d0002, 0x68040702,
- 0xd0c9006a, 0x0000f702,
- 0xbf87fff7, 0xbef70000,
- 0xbef000ff, 0x00000400,
+ 0xbf84ffee, 0xbe840080,
+ 0xd2890000, 0x00000902,
+ 0x80048104, 0xd2890001,
+ 0x00000902, 0x80048104,
+ 0xd2890002, 0x00000902,
+ 0x80048104, 0xd2890003,
+ 0x00000902, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
+ 0xbe840080, 0xd2890000,
+ 0x00000903, 0x80048104,
+ 0xd2890001, 0x00000903,
+ 0x80048104, 0xd2890002,
+ 0x00000903, 0x80048104,
+ 0xd2890003, 0x00000903,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0xbf820008,
+ 0xe0724000, 0x701d0000,
+ 0xe0724100, 0x701d0100,
+ 0xe0724200, 0x701d0200,
+ 0xe0724300, 0x701d0300,
0xbefe00c1, 0xbeff00c1,
- 0xb8fb2a05, 0x807b817b,
- 0x8e7b827b, 0xbef600ff,
- 0x01000000, 0xbefc0084,
- 0xbf0a7b7c, 0xbf84006d,
- 0xbf11017c, 0x807bff7b,
- 0x00001000, 0x867aff78,
+ 0xb8fb4306, 0x867bc17b,
+ 0xbf840064, 0xbf8a0000,
+ 0x867aff6f, 0x04000000,
+ 0xbf840060, 0x8e7b867b,
+ 0x8e7b827b, 0xbef6007b,
+ 0xb8f02a05, 0x80708170,
+ 0x8e708a70, 0x8e708170,
+ 0xb8fa1605, 0x807a817a,
+ 0x8e7a867a, 0x80707a70,
+ 0x8070ff70, 0x00000080,
+ 0xbef600ff, 0x01000000,
+ 0xbefc0080, 0xd28c0002,
+ 0x000100c1, 0xd28d0003,
+ 0x000204c1, 0x867aff78,
0x00400000, 0xbf850003,
0xb8faf803, 0x897a7aff,
- 0x10000000, 0xbf850051,
+ 0x10000000, 0xbf850030,
+ 0x24040682, 0xd86e4000,
+ 0x00000002, 0xbf8cc07f,
0xbe840080, 0xd2890000,
0x00000900, 0x80048104,
0xd2890001, 0x00000900,
@@ -1577,215 +1532,259 @@ static const uint32_t cwsr_trap_arcturus_hex[] = {
0xc069003a, 0x00000070,
0xbf8cc07f, 0x80709070,
0xbf06c004, 0xbf84ffee,
+ 0x680404ff, 0x00000200,
+ 0xd0c9006a, 0x0000f702,
+ 0xbf87ffd2, 0xbf820015,
+ 0xd1060002, 0x00011103,
+ 0x7e0602ff, 0x00000200,
+ 0xbefc00ff, 0x00010000,
+ 0xbe800077, 0x8677ff77,
+ 0xff7fffff, 0x8777ff77,
+ 0x00058000, 0xd8ec0000,
+ 0x00000002, 0xbf8cc07f,
+ 0xe0765000, 0x701d0002,
+ 0x68040702, 0xd0c9006a,
+ 0x0000f702, 0xbf87fff7,
+ 0xbef70000, 0xbef000ff,
+ 0x00000400, 0xbefe00c1,
+ 0xbeff00c1, 0xb8fb2a05,
+ 0x807b817b, 0x8e7b827b,
+ 0xbef600ff, 0x01000000,
+ 0xbefc0084, 0xbf0a7b7c,
+ 0xbf84006d, 0xbf11017c,
+ 0x807bff7b, 0x00001000,
+ 0x867aff78, 0x00400000,
+ 0xbf850003, 0xb8faf803,
+ 0x897a7aff, 0x10000000,
+ 0xbf850051, 0xbe840080,
+ 0xd2890000, 0x00000900,
+ 0x80048104, 0xd2890001,
+ 0x00000900, 0x80048104,
+ 0xd2890002, 0x00000900,
+ 0x80048104, 0xd2890003,
+ 0x00000900, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
0xbe840080, 0xd2890000,
- 0x00000902, 0x80048104,
- 0xd2890001, 0x00000902,
+ 0x00000901, 0x80048104,
+ 0xd2890001, 0x00000901,
0x80048104, 0xd2890002,
- 0x00000902, 0x80048104,
- 0xd2890003, 0x00000902,
+ 0x00000901, 0x80048104,
+ 0xd2890003, 0x00000901,
0x80048104, 0xc069003a,
0x00000070, 0xbf8cc07f,
0x80709070, 0xbf06c004,
0xbf84ffee, 0xbe840080,
- 0xd2890000, 0x00000903,
+ 0xd2890000, 0x00000902,
0x80048104, 0xd2890001,
- 0x00000903, 0x80048104,
- 0xd2890002, 0x00000903,
+ 0x00000902, 0x80048104,
+ 0xd2890002, 0x00000902,
0x80048104, 0xd2890003,
- 0x00000903, 0x80048104,
+ 0x00000902, 0x80048104,
0xc069003a, 0x00000070,
0xbf8cc07f, 0x80709070,
0xbf06c004, 0xbf84ffee,
- 0x807c847c, 0xbf0a7b7c,
- 0xbf85ffb1, 0xbf9c0000,
- 0xbf820012, 0x7e000300,
- 0x7e020301, 0x7e040302,
- 0x7e060303, 0xe0724000,
- 0x701d0000, 0xe0724100,
- 0x701d0100, 0xe0724200,
- 0x701d0200, 0xe0724300,
- 0x701d0300, 0x807c847c,
- 0x8070ff70, 0x00000400,
- 0xbf0a7b7c, 0xbf85ffef,
- 0xbf9c0000, 0xbefc0080,
- 0xbf11017c, 0x867aff78,
- 0x00400000, 0xbf850003,
- 0xb8faf803, 0x897a7aff,
- 0x10000000, 0xbf850059,
- 0xd3d84000, 0x18000100,
- 0xd3d84001, 0x18000101,
- 0xd3d84002, 0x18000102,
- 0xd3d84003, 0x18000103,
0xbe840080, 0xd2890000,
- 0x00000900, 0x80048104,
- 0xd2890001, 0x00000900,
+ 0x00000903, 0x80048104,
+ 0xd2890001, 0x00000903,
0x80048104, 0xd2890002,
- 0x00000900, 0x80048104,
- 0xd2890003, 0x00000900,
+ 0x00000903, 0x80048104,
+ 0xd2890003, 0x00000903,
0x80048104, 0xc069003a,
0x00000070, 0xbf8cc07f,
0x80709070, 0xbf06c004,
- 0xbf84ffee, 0xbe840080,
- 0xd2890000, 0x00000901,
+ 0xbf84ffee, 0x807c847c,
+ 0xbf0a7b7c, 0xbf85ffb1,
+ 0xbf9c0000, 0xbf820012,
+ 0x7e000300, 0x7e020301,
+ 0x7e040302, 0x7e060303,
+ 0xe0724000, 0x701d0000,
+ 0xe0724100, 0x701d0100,
+ 0xe0724200, 0x701d0200,
+ 0xe0724300, 0x701d0300,
+ 0x807c847c, 0x8070ff70,
+ 0x00000400, 0xbf0a7b7c,
+ 0xbf85ffef, 0xbf9c0000,
+ 0xbefc0080, 0xbf11017c,
+ 0x867aff78, 0x00400000,
+ 0xbf850003, 0xb8faf803,
+ 0x897a7aff, 0x10000000,
+ 0xbf850059, 0xd3d84000,
+ 0x18000100, 0xd3d84001,
+ 0x18000101, 0xd3d84002,
+ 0x18000102, 0xd3d84003,
+ 0x18000103, 0xbe840080,
+ 0xd2890000, 0x00000900,
0x80048104, 0xd2890001,
- 0x00000901, 0x80048104,
- 0xd2890002, 0x00000901,
+ 0x00000900, 0x80048104,
+ 0xd2890002, 0x00000900,
0x80048104, 0xd2890003,
- 0x00000901, 0x80048104,
+ 0x00000900, 0x80048104,
0xc069003a, 0x00000070,
0xbf8cc07f, 0x80709070,
0xbf06c004, 0xbf84ffee,
0xbe840080, 0xd2890000,
- 0x00000902, 0x80048104,
- 0xd2890001, 0x00000902,
+ 0x00000901, 0x80048104,
+ 0xd2890001, 0x00000901,
0x80048104, 0xd2890002,
- 0x00000902, 0x80048104,
- 0xd2890003, 0x00000902,
+ 0x00000901, 0x80048104,
+ 0xd2890003, 0x00000901,
0x80048104, 0xc069003a,
0x00000070, 0xbf8cc07f,
0x80709070, 0xbf06c004,
0xbf84ffee, 0xbe840080,
- 0xd2890000, 0x00000903,
+ 0xd2890000, 0x00000902,
0x80048104, 0xd2890001,
- 0x00000903, 0x80048104,
- 0xd2890002, 0x00000903,
+ 0x00000902, 0x80048104,
+ 0xd2890002, 0x00000902,
0x80048104, 0xd2890003,
- 0x00000903, 0x80048104,
+ 0x00000902, 0x80048104,
0xc069003a, 0x00000070,
0xbf8cc07f, 0x80709070,
0xbf06c004, 0xbf84ffee,
- 0x807c847c, 0xbf0a7b7c,
- 0xbf85ffa9, 0xbf9c0000,
- 0xbf820016, 0xd3d84000,
- 0x18000100, 0xd3d84001,
- 0x18000101, 0xd3d84002,
- 0x18000102, 0xd3d84003,
- 0x18000103, 0xe0724000,
- 0x701d0000, 0xe0724100,
- 0x701d0100, 0xe0724200,
- 0x701d0200, 0xe0724300,
- 0x701d0300, 0x807c847c,
- 0x8070ff70, 0x00000400,
- 0xbf0a7b7c, 0xbf85ffeb,
- 0xbf9c0000, 0xbf8200e3,
- 0xbef4007e, 0x8675ff7f,
- 0x0000ffff, 0x8775ff75,
- 0x00040000, 0xbef60080,
- 0xbef700ff, 0x00807fac,
- 0x866eff7f, 0x04000000,
- 0xbf84001f, 0xbefe00c1,
- 0xbeff00c1, 0xb8ef4306,
- 0x866fc16f, 0xbf84001a,
- 0x8e6f866f, 0x8e6f826f,
- 0xbef6006f, 0xb8f82a05,
- 0x80788178, 0x8e788a78,
- 0x8e788178, 0xb8ee1605,
- 0x806e816e, 0x8e6e866e,
- 0x80786e78, 0x8078ff78,
- 0x00000080, 0xbef600ff,
- 0x01000000, 0xbefc0080,
- 0xe0510000, 0x781d0000,
- 0xe0510100, 0x781d0000,
- 0x807cff7c, 0x00000200,
- 0x8078ff78, 0x00000200,
- 0xbf0a6f7c, 0xbf85fff6,
+ 0xbe840080, 0xd2890000,
+ 0x00000903, 0x80048104,
+ 0xd2890001, 0x00000903,
+ 0x80048104, 0xd2890002,
+ 0x00000903, 0x80048104,
+ 0xd2890003, 0x00000903,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0x807c847c,
+ 0xbf0a7b7c, 0xbf85ffa9,
+ 0xbf9c0000, 0xbf820016,
+ 0xd3d84000, 0x18000100,
+ 0xd3d84001, 0x18000101,
+ 0xd3d84002, 0x18000102,
+ 0xd3d84003, 0x18000103,
+ 0xe0724000, 0x701d0000,
+ 0xe0724100, 0x701d0100,
+ 0xe0724200, 0x701d0200,
+ 0xe0724300, 0x701d0300,
+ 0x807c847c, 0x8070ff70,
+ 0x00000400, 0xbf0a7b7c,
+ 0xbf85ffeb, 0xbf9c0000,
+ 0xbf8200e3, 0xbef4007e,
+ 0x8675ff7f, 0x0000ffff,
+ 0x8775ff75, 0x00040000,
+ 0xbef60080, 0xbef700ff,
+ 0x00807fac, 0x866eff7f,
+ 0x04000000, 0xbf84001f,
0xbefe00c1, 0xbeff00c1,
+ 0xb8ef4306, 0x866fc16f,
+ 0xbf84001a, 0x8e6f866f,
+ 0x8e6f826f, 0xbef6006f,
+ 0xb8f82a05, 0x80788178,
+ 0x8e788a78, 0x8e788178,
+ 0xb8ee1605, 0x806e816e,
+ 0x8e6e866e, 0x80786e78,
+ 0x8078ff78, 0x00000080,
0xbef600ff, 0x01000000,
- 0xb8ef2a05, 0x806f816f,
- 0x8e6f826f, 0x806fff6f,
- 0x00008000, 0xbef80080,
- 0xbeee0078, 0x8078ff78,
- 0x00000400, 0xbefc0084,
- 0xbf11087c, 0xe0524000,
- 0x781d0000, 0xe0524100,
- 0x781d0100, 0xe0524200,
- 0x781d0200, 0xe0524300,
- 0x781d0300, 0xbf8c0f70,
- 0x7e000300, 0x7e020301,
- 0x7e040302, 0x7e060303,
- 0x807c847c, 0x8078ff78,
- 0x00000400, 0xbf0a6f7c,
- 0xbf85ffee, 0xbefc0080,
- 0xbf11087c, 0xe0524000,
- 0x781d0000, 0xe0524100,
- 0x781d0100, 0xe0524200,
- 0x781d0200, 0xe0524300,
- 0x781d0300, 0xbf8c0f70,
- 0xd3d94000, 0x18000100,
- 0xd3d94001, 0x18000101,
- 0xd3d94002, 0x18000102,
- 0xd3d94003, 0x18000103,
- 0x807c847c, 0x8078ff78,
- 0x00000400, 0xbf0a6f7c,
- 0xbf85ffea, 0xbf9c0000,
- 0xe0524000, 0x6e1d0000,
- 0xe0524100, 0x6e1d0100,
- 0xe0524200, 0x6e1d0200,
- 0xe0524300, 0x6e1d0300,
- 0xbf8c0f70, 0xb8f82a05,
- 0x80788178, 0x8e788a78,
- 0x8e788178, 0xb8ee1605,
- 0x806e816e, 0x8e6e866e,
- 0x80786e78, 0x80f8c078,
- 0xb8ef1605, 0x806f816f,
- 0x8e6f846f, 0x8e76826f,
- 0xbef600ff, 0x01000000,
- 0xbefc006f, 0xc031003a,
- 0x00000078, 0x80f8c078,
- 0xbf8cc07f, 0x80fc907c,
- 0xbf800000, 0xbe802d00,
- 0xbe822d02, 0xbe842d04,
- 0xbe862d06, 0xbe882d08,
- 0xbe8a2d0a, 0xbe8c2d0c,
- 0xbe8e2d0e, 0xbf06807c,
- 0xbf84fff0, 0xb8f82a05,
- 0x80788178, 0x8e788a78,
- 0x8e788178, 0xb8ee1605,
- 0x806e816e, 0x8e6e866e,
- 0x80786e78, 0xbef60084,
- 0xbef600ff, 0x01000000,
- 0xc0211bfa, 0x00000078,
- 0x80788478, 0xc0211b3a,
+ 0xbefc0080, 0xe0510000,
+ 0x781d0000, 0xe0510100,
+ 0x781d0000, 0x807cff7c,
+ 0x00000200, 0x8078ff78,
+ 0x00000200, 0xbf0a6f7c,
+ 0xbf85fff6, 0xbefe00c1,
+ 0xbeff00c1, 0xbef600ff,
+ 0x01000000, 0xb8ef2a05,
+ 0x806f816f, 0x8e6f826f,
+ 0x806fff6f, 0x00008000,
+ 0xbef80080, 0xbeee0078,
+ 0x8078ff78, 0x00000400,
+ 0xbefc0084, 0xbf11087c,
+ 0xe0524000, 0x781d0000,
+ 0xe0524100, 0x781d0100,
+ 0xe0524200, 0x781d0200,
+ 0xe0524300, 0x781d0300,
+ 0xbf8c0f70, 0x7e000300,
+ 0x7e020301, 0x7e040302,
+ 0x7e060303, 0x807c847c,
+ 0x8078ff78, 0x00000400,
+ 0xbf0a6f7c, 0xbf85ffee,
+ 0xbefc0080, 0xbf11087c,
+ 0xe0524000, 0x781d0000,
+ 0xe0524100, 0x781d0100,
+ 0xe0524200, 0x781d0200,
+ 0xe0524300, 0x781d0300,
+ 0xbf8c0f70, 0xd3d94000,
+ 0x18000100, 0xd3d94001,
+ 0x18000101, 0xd3d94002,
+ 0x18000102, 0xd3d94003,
+ 0x18000103, 0x807c847c,
+ 0x8078ff78, 0x00000400,
+ 0xbf0a6f7c, 0xbf85ffea,
+ 0xbf9c0000, 0xe0524000,
+ 0x6e1d0000, 0xe0524100,
+ 0x6e1d0100, 0xe0524200,
+ 0x6e1d0200, 0xe0524300,
+ 0x6e1d0300, 0xbf8c0f70,
+ 0xb8f82a05, 0x80788178,
+ 0x8e788a78, 0x8e788178,
+ 0xb8ee1605, 0x806e816e,
+ 0x8e6e866e, 0x80786e78,
+ 0x80f8c078, 0xb8ef1605,
+ 0x806f816f, 0x8e6f846f,
+ 0x8e76826f, 0xbef600ff,
+ 0x01000000, 0xbefc006f,
+ 0xc031003a, 0x00000078,
+ 0x80f8c078, 0xbf8cc07f,
+ 0x80fc907c, 0xbf800000,
+ 0xbe802d00, 0xbe822d02,
+ 0xbe842d04, 0xbe862d06,
+ 0xbe882d08, 0xbe8a2d0a,
+ 0xbe8c2d0c, 0xbe8e2d0e,
+ 0xbf06807c, 0xbf84fff0,
+ 0xb8f82a05, 0x80788178,
+ 0x8e788a78, 0x8e788178,
+ 0xb8ee1605, 0x806e816e,
+ 0x8e6e866e, 0x80786e78,
+ 0xbef60084, 0xbef600ff,
+ 0x01000000, 0xc0211bfa,
0x00000078, 0x80788478,
- 0xc0211b7a, 0x00000078,
- 0x80788478, 0xc0211c3a,
+ 0xc0211b3a, 0x00000078,
+ 0x80788478, 0xc0211b7a,
0x00000078, 0x80788478,
- 0xc0211c7a, 0x00000078,
- 0x80788478, 0xc0211eba,
+ 0xc0211c3a, 0x00000078,
+ 0x80788478, 0xc0211c7a,
0x00000078, 0x80788478,
- 0xc0211efa, 0x00000078,
- 0x80788478, 0xc0211a3a,
+ 0xc0211eba, 0x00000078,
+ 0x80788478, 0xc0211efa,
0x00000078, 0x80788478,
- 0xc0211a7a, 0x00000078,
- 0x80788478, 0xc0211cfa,
+ 0xc0211a3a, 0x00000078,
+ 0x80788478, 0xc0211a7a,
0x00000078, 0x80788478,
- 0xbf8cc07f, 0xbefc006f,
- 0xbefe0070, 0xbeff0071,
- 0x866f7bff, 0x000003ff,
- 0xb96f4803, 0x866f7bff,
- 0xfffff800, 0x8f6f8b6f,
- 0xb96fa2c3, 0xb973f801,
- 0xb8ee2a05, 0x806e816e,
- 0x8e6e8a6e, 0x8e6e816e,
- 0xb8ef1605, 0x806f816f,
- 0x8e6f866f, 0x806e6f6e,
- 0x806e746e, 0x826f8075,
- 0x866fff6f, 0x0000ffff,
- 0xc00b1c37, 0x00000050,
- 0xc00b1d37, 0x00000060,
- 0xc0031e77, 0x00000074,
- 0xbf8cc07f, 0x8f6e8b77,
- 0x866eff6e, 0x001f8000,
- 0xb96ef807, 0x866dff6d,
- 0x0000ffff, 0x86fe7e7e,
- 0x86ea6a6a, 0x8f6e837a,
- 0xb96ee0c2, 0xbf800002,
- 0xb97a0002, 0xbf8a0000,
- 0xbe801f6c, 0xbf9b0000,
+ 0xc0211cfa, 0x00000078,
+ 0x80788478, 0xbf8cc07f,
+ 0xbefc006f, 0xbefe0070,
+ 0xbeff0071, 0x866f7bff,
+ 0x000003ff, 0xb96f4803,
+ 0x866f7bff, 0xfffff800,
+ 0x8f6f8b6f, 0xb96fa2c3,
+ 0xb973f801, 0xb8ee2a05,
+ 0x806e816e, 0x8e6e8a6e,
+ 0x8e6e816e, 0xb8ef1605,
+ 0x806f816f, 0x8e6f866f,
+ 0x806e6f6e, 0x806e746e,
+ 0x826f8075, 0x866fff6f,
+ 0x0000ffff, 0xc00b1c37,
+ 0x00000050, 0xc00b1d37,
+ 0x00000060, 0xc0031e77,
+ 0x00000074, 0xbf8cc07f,
+ 0x8f6e8b77, 0x866eff6e,
+ 0x001f8000, 0xb96ef807,
+ 0x866dff6d, 0x0000ffff,
+ 0x86fe7e7e, 0x86ea6a6a,
+ 0x8f6e837a, 0xb96ee0c2,
+ 0xbf800002, 0xb97a0002,
+ 0xbf8a0000, 0xbe801f6c,
+ 0xbf9b0000, 0x00000000,
};
static const uint32_t cwsr_trap_aldebaran_hex[] = {
- 0xbf820001, 0xbf8202df,
+ 0xbf820001, 0xbf8202e0,
0xb8f8f802, 0x8978ff78,
0x00020006, 0xb8fbf803,
0x866eff78, 0x00002000,
@@ -1902,99 +1901,37 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = {
0xbefe007c, 0xbefc0070,
0xc0611c7a, 0x0000007c,
0xbf8cc07f, 0x80708470,
- 0xbefc007e, 0x867aff7f,
- 0x04000000, 0xbeef0080,
- 0x876f6f7a, 0xb8f02985,
- 0x80708170, 0x8e708a70,
- 0x8e708170, 0xb8fb1605,
- 0x807b817b, 0x8e7b847b,
- 0x8e76827b, 0xbef600ff,
- 0x01000000, 0xbef20174,
- 0x80747074, 0x82758075,
- 0xbefc0080, 0xbf800000,
- 0xbe802b00, 0xbe822b02,
- 0xbe842b04, 0xbe862b06,
- 0xbe882b08, 0xbe8a2b0a,
- 0xbe8c2b0c, 0xbe8e2b0e,
- 0xc06b003a, 0x00000000,
- 0xbf8cc07f, 0xc06b013a,
- 0x00000010, 0xbf8cc07f,
- 0xc06b023a, 0x00000020,
- 0xbf8cc07f, 0xc06b033a,
- 0x00000030, 0xbf8cc07f,
- 0x8074c074, 0x82758075,
- 0x807c907c, 0xbf0a7b7c,
- 0xbf85ffe7, 0xbef40172,
- 0xbef00080, 0xbefe00c1,
- 0xbeff00c1, 0xbee80080,
- 0xbee90080, 0xbef600ff,
- 0x01000000, 0x867aff78,
- 0x00400000, 0xbf850003,
- 0xb8faf803, 0x897a7aff,
- 0x10000000, 0xbf85004d,
- 0xbe840080, 0xd2890000,
- 0x00000900, 0x80048104,
- 0xd2890001, 0x00000900,
- 0x80048104, 0xd2890002,
- 0x00000900, 0x80048104,
- 0xd2890003, 0x00000900,
- 0x80048104, 0xc069003a,
- 0x00000070, 0xbf8cc07f,
- 0x80709070, 0xbf06c004,
- 0xbf84ffee, 0xbe840080,
- 0xd2890000, 0x00000901,
- 0x80048104, 0xd2890001,
- 0x00000901, 0x80048104,
- 0xd2890002, 0x00000901,
- 0x80048104, 0xd2890003,
- 0x00000901, 0x80048104,
- 0xc069003a, 0x00000070,
- 0xbf8cc07f, 0x80709070,
- 0xbf06c004, 0xbf84ffee,
- 0xbe840080, 0xd2890000,
- 0x00000902, 0x80048104,
- 0xd2890001, 0x00000902,
- 0x80048104, 0xd2890002,
- 0x00000902, 0x80048104,
- 0xd2890003, 0x00000902,
- 0x80048104, 0xc069003a,
- 0x00000070, 0xbf8cc07f,
- 0x80709070, 0xbf06c004,
- 0xbf84ffee, 0xbe840080,
- 0xd2890000, 0x00000903,
- 0x80048104, 0xd2890001,
- 0x00000903, 0x80048104,
- 0xd2890002, 0x00000903,
- 0x80048104, 0xd2890003,
- 0x00000903, 0x80048104,
- 0xc069003a, 0x00000070,
- 0xbf8cc07f, 0x80709070,
- 0xbf06c004, 0xbf84ffee,
- 0xbf820008, 0xe0724000,
- 0x701d0000, 0xe0724100,
- 0x701d0100, 0xe0724200,
- 0x701d0200, 0xe0724300,
- 0x701d0300, 0xbefe00c1,
- 0xbeff00c1, 0xb8fb4306,
- 0x867bc17b, 0xbf840064,
- 0xbf8a0000, 0x867aff6f,
- 0x04000000, 0xbf840060,
- 0x8e7b867b, 0x8e7b827b,
- 0xbef6007b, 0xb8f02985,
- 0x80708170, 0x8e708a70,
- 0x8e708170, 0xb8fa1605,
- 0x807a817a, 0x8e7a867a,
- 0x80707a70, 0x8070ff70,
- 0x00000080, 0xbef600ff,
- 0x01000000, 0xbefc0080,
- 0xd28c0002, 0x000100c1,
- 0xd28d0003, 0x000204c1,
+ 0xbefc007e, 0xbf108080,
+ 0x867aff7f, 0x04000000,
+ 0xbeef0080, 0x876f6f7a,
+ 0xb8f02985, 0x80708170,
+ 0x8e708a70, 0x8e708170,
+ 0xb8fb1605, 0x807b817b,
+ 0x8e7b847b, 0x8e76827b,
+ 0xbef600ff, 0x01000000,
+ 0xbef20174, 0x80747074,
+ 0x82758075, 0xbefc0080,
+ 0xbf800000, 0xbe802b00,
+ 0xbe822b02, 0xbe842b04,
+ 0xbe862b06, 0xbe882b08,
+ 0xbe8a2b0a, 0xbe8c2b0c,
+ 0xbe8e2b0e, 0xc06b003a,
+ 0x00000000, 0xbf8cc07f,
+ 0xc06b013a, 0x00000010,
+ 0xbf8cc07f, 0xc06b023a,
+ 0x00000020, 0xbf8cc07f,
+ 0xc06b033a, 0x00000030,
+ 0xbf8cc07f, 0x8074c074,
+ 0x82758075, 0x807c907c,
+ 0xbf0a7b7c, 0xbf85ffe7,
+ 0xbef40172, 0xbef00080,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xbee80080, 0xbee90080,
+ 0xbef600ff, 0x01000000,
0x867aff78, 0x00400000,
0xbf850003, 0xb8faf803,
0x897a7aff, 0x10000000,
- 0xbf850030, 0x24040682,
- 0xd86e4000, 0x00000002,
- 0xbf8cc07f, 0xbe840080,
+ 0xbf85004d, 0xbe840080,
0xd2890000, 0x00000900,
0x80048104, 0xd2890001,
0x00000900, 0x80048104,
@@ -2013,31 +1950,50 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = {
0x80048104, 0xc069003a,
0x00000070, 0xbf8cc07f,
0x80709070, 0xbf06c004,
- 0xbf84ffee, 0x680404ff,
- 0x00000200, 0xd0c9006a,
- 0x0000f702, 0xbf87ffd2,
- 0xbf820015, 0xd1060002,
- 0x00011103, 0x7e0602ff,
- 0x00000200, 0xbefc00ff,
- 0x00010000, 0xbe800077,
- 0x8677ff77, 0xff7fffff,
- 0x8777ff77, 0x00058000,
- 0xd8ec0000, 0x00000002,
- 0xbf8cc07f, 0xe0765000,
- 0x701d0002, 0x68040702,
- 0xd0c9006a, 0x0000f702,
- 0xbf87fff7, 0xbef70000,
- 0xbef000ff, 0x00000400,
+ 0xbf84ffee, 0xbe840080,
+ 0xd2890000, 0x00000902,
+ 0x80048104, 0xd2890001,
+ 0x00000902, 0x80048104,
+ 0xd2890002, 0x00000902,
+ 0x80048104, 0xd2890003,
+ 0x00000902, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
+ 0xbe840080, 0xd2890000,
+ 0x00000903, 0x80048104,
+ 0xd2890001, 0x00000903,
+ 0x80048104, 0xd2890002,
+ 0x00000903, 0x80048104,
+ 0xd2890003, 0x00000903,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0xbf820008,
+ 0xe0724000, 0x701d0000,
+ 0xe0724100, 0x701d0100,
+ 0xe0724200, 0x701d0200,
+ 0xe0724300, 0x701d0300,
0xbefe00c1, 0xbeff00c1,
- 0xb8fb2b05, 0x807b817b,
- 0x8e7b827b, 0xbef600ff,
- 0x01000000, 0xbefc0084,
- 0xbf0a7b7c, 0xbf84006d,
- 0xbf11017c, 0x807bff7b,
- 0x00001000, 0x867aff78,
+ 0xb8fb4306, 0x867bc17b,
+ 0xbf840064, 0xbf8a0000,
+ 0x867aff6f, 0x04000000,
+ 0xbf840060, 0x8e7b867b,
+ 0x8e7b827b, 0xbef6007b,
+ 0xb8f02985, 0x80708170,
+ 0x8e708a70, 0x8e708170,
+ 0xb8fa1605, 0x807a817a,
+ 0x8e7a867a, 0x80707a70,
+ 0x8070ff70, 0x00000080,
+ 0xbef600ff, 0x01000000,
+ 0xbefc0080, 0xd28c0002,
+ 0x000100c1, 0xd28d0003,
+ 0x000204c1, 0x867aff78,
0x00400000, 0xbf850003,
0xb8faf803, 0x897a7aff,
- 0x10000000, 0xbf850051,
+ 0x10000000, 0xbf850030,
+ 0x24040682, 0xd86e4000,
+ 0x00000002, 0xbf8cc07f,
0xbe840080, 0xd2890000,
0x00000900, 0x80048104,
0xd2890001, 0x00000900,
@@ -2057,51 +2013,31 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = {
0xc069003a, 0x00000070,
0xbf8cc07f, 0x80709070,
0xbf06c004, 0xbf84ffee,
- 0xbe840080, 0xd2890000,
- 0x00000902, 0x80048104,
- 0xd2890001, 0x00000902,
- 0x80048104, 0xd2890002,
- 0x00000902, 0x80048104,
- 0xd2890003, 0x00000902,
- 0x80048104, 0xc069003a,
- 0x00000070, 0xbf8cc07f,
- 0x80709070, 0xbf06c004,
- 0xbf84ffee, 0xbe840080,
- 0xd2890000, 0x00000903,
- 0x80048104, 0xd2890001,
- 0x00000903, 0x80048104,
- 0xd2890002, 0x00000903,
- 0x80048104, 0xd2890003,
- 0x00000903, 0x80048104,
- 0xc069003a, 0x00000070,
- 0xbf8cc07f, 0x80709070,
- 0xbf06c004, 0xbf84ffee,
- 0x807c847c, 0xbf0a7b7c,
- 0xbf85ffb1, 0xbf9c0000,
- 0xbf820012, 0x7e000300,
- 0x7e020301, 0x7e040302,
- 0x7e060303, 0xe0724000,
- 0x701d0000, 0xe0724100,
- 0x701d0100, 0xe0724200,
- 0x701d0200, 0xe0724300,
- 0x701d0300, 0x807c847c,
- 0x8070ff70, 0x00000400,
- 0xbf0a7b7c, 0xbf85ffef,
- 0xbf9c0000, 0xb8fb2985,
- 0x807b817b, 0x8e7b837b,
- 0xb8fa2b05, 0x807a817a,
- 0x8e7a827a, 0x80fb7a7b,
- 0x867b7b7b, 0xbf84007a,
+ 0x680404ff, 0x00000200,
+ 0xd0c9006a, 0x0000f702,
+ 0xbf87ffd2, 0xbf820015,
+ 0xd1060002, 0x00011103,
+ 0x7e0602ff, 0x00000200,
+ 0xbefc00ff, 0x00010000,
+ 0xbe800077, 0x8677ff77,
+ 0xff7fffff, 0x8777ff77,
+ 0x00058000, 0xd8ec0000,
+ 0x00000002, 0xbf8cc07f,
+ 0xe0765000, 0x701d0002,
+ 0x68040702, 0xd0c9006a,
+ 0x0000f702, 0xbf87fff7,
+ 0xbef70000, 0xbef000ff,
+ 0x00000400, 0xbefe00c1,
+ 0xbeff00c1, 0xb8fb2b05,
+ 0x807b817b, 0x8e7b827b,
+ 0xbef600ff, 0x01000000,
+ 0xbefc0084, 0xbf0a7b7c,
+ 0xbf84006d, 0xbf11017c,
0x807bff7b, 0x00001000,
- 0xbefc0080, 0xbf11017c,
0x867aff78, 0x00400000,
0xbf850003, 0xb8faf803,
0x897a7aff, 0x10000000,
- 0xbf850059, 0xd3d84000,
- 0x18000100, 0xd3d84001,
- 0x18000101, 0xd3d84002,
- 0x18000102, 0xd3d84003,
- 0x18000103, 0xbe840080,
+ 0xbf850051, 0xbe840080,
0xd2890000, 0x00000900,
0x80048104, 0xd2890001,
0x00000900, 0x80048104,
@@ -2140,143 +2076,207 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = {
0x00000070, 0xbf8cc07f,
0x80709070, 0xbf06c004,
0xbf84ffee, 0x807c847c,
- 0xbf0a7b7c, 0xbf85ffa9,
- 0xbf9c0000, 0xbf820016,
- 0xd3d84000, 0x18000100,
- 0xd3d84001, 0x18000101,
- 0xd3d84002, 0x18000102,
- 0xd3d84003, 0x18000103,
+ 0xbf0a7b7c, 0xbf85ffb1,
+ 0xbf9c0000, 0xbf820012,
+ 0x7e000300, 0x7e020301,
+ 0x7e040302, 0x7e060303,
0xe0724000, 0x701d0000,
0xe0724100, 0x701d0100,
0xe0724200, 0x701d0200,
0xe0724300, 0x701d0300,
0x807c847c, 0x8070ff70,
0x00000400, 0xbf0a7b7c,
- 0xbf85ffeb, 0xbf9c0000,
- 0xbf8200ee, 0xbef4007e,
- 0x8675ff7f, 0x0000ffff,
- 0x8775ff75, 0x00040000,
- 0xbef60080, 0xbef700ff,
- 0x00807fac, 0x866eff7f,
- 0x04000000, 0xbf84001f,
+ 0xbf85ffef, 0xbf9c0000,
+ 0xb8fb2985, 0x807b817b,
+ 0x8e7b837b, 0xb8fa2b05,
+ 0x807a817a, 0x8e7a827a,
+ 0x80fb7a7b, 0x867b7b7b,
+ 0xbf84007a, 0x807bff7b,
+ 0x00001000, 0xbefc0080,
+ 0xbf11017c, 0x867aff78,
+ 0x00400000, 0xbf850003,
+ 0xb8faf803, 0x897a7aff,
+ 0x10000000, 0xbf850059,
+ 0xd3d84000, 0x18000100,
+ 0xd3d84001, 0x18000101,
+ 0xd3d84002, 0x18000102,
+ 0xd3d84003, 0x18000103,
+ 0xbe840080, 0xd2890000,
+ 0x00000900, 0x80048104,
+ 0xd2890001, 0x00000900,
+ 0x80048104, 0xd2890002,
+ 0x00000900, 0x80048104,
+ 0xd2890003, 0x00000900,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0xbe840080,
+ 0xd2890000, 0x00000901,
+ 0x80048104, 0xd2890001,
+ 0x00000901, 0x80048104,
+ 0xd2890002, 0x00000901,
+ 0x80048104, 0xd2890003,
+ 0x00000901, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
+ 0xbe840080, 0xd2890000,
+ 0x00000902, 0x80048104,
+ 0xd2890001, 0x00000902,
+ 0x80048104, 0xd2890002,
+ 0x00000902, 0x80048104,
+ 0xd2890003, 0x00000902,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0xbe840080,
+ 0xd2890000, 0x00000903,
+ 0x80048104, 0xd2890001,
+ 0x00000903, 0x80048104,
+ 0xd2890002, 0x00000903,
+ 0x80048104, 0xd2890003,
+ 0x00000903, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
+ 0x807c847c, 0xbf0a7b7c,
+ 0xbf85ffa9, 0xbf9c0000,
+ 0xbf820016, 0xd3d84000,
+ 0x18000100, 0xd3d84001,
+ 0x18000101, 0xd3d84002,
+ 0x18000102, 0xd3d84003,
+ 0x18000103, 0xe0724000,
+ 0x701d0000, 0xe0724100,
+ 0x701d0100, 0xe0724200,
+ 0x701d0200, 0xe0724300,
+ 0x701d0300, 0x807c847c,
+ 0x8070ff70, 0x00000400,
+ 0xbf0a7b7c, 0xbf85ffeb,
+ 0xbf9c0000, 0xbf8200ee,
+ 0xbef4007e, 0x8675ff7f,
+ 0x0000ffff, 0x8775ff75,
+ 0x00040000, 0xbef60080,
+ 0xbef700ff, 0x00807fac,
+ 0x866eff7f, 0x04000000,
+ 0xbf84001f, 0xbefe00c1,
+ 0xbeff00c1, 0xb8ef4306,
+ 0x866fc16f, 0xbf84001a,
+ 0x8e6f866f, 0x8e6f826f,
+ 0xbef6006f, 0xb8f82985,
+ 0x80788178, 0x8e788a78,
+ 0x8e788178, 0xb8ee1605,
+ 0x806e816e, 0x8e6e866e,
+ 0x80786e78, 0x8078ff78,
+ 0x00000080, 0xbef600ff,
+ 0x01000000, 0xbefc0080,
+ 0xe0510000, 0x781d0000,
+ 0xe0510100, 0x781d0000,
+ 0x807cff7c, 0x00000200,
+ 0x8078ff78, 0x00000200,
+ 0xbf0a6f7c, 0xbf85fff6,
0xbefe00c1, 0xbeff00c1,
- 0xb8ef4306, 0x866fc16f,
- 0xbf84001a, 0x8e6f866f,
- 0x8e6f826f, 0xbef6006f,
- 0xb8f82985, 0x80788178,
- 0x8e788a78, 0x8e788178,
- 0xb8ee1605, 0x806e816e,
- 0x8e6e866e, 0x80786e78,
- 0x8078ff78, 0x00000080,
0xbef600ff, 0x01000000,
- 0xbefc0080, 0xe0510000,
- 0x781d0000, 0xe0510100,
- 0x781d0000, 0x807cff7c,
- 0x00000200, 0x8078ff78,
- 0x00000200, 0xbf0a6f7c,
- 0xbf85fff6, 0xbefe00c1,
- 0xbeff00c1, 0xbef600ff,
- 0x01000000, 0xb8ef2b05,
- 0x806f816f, 0x8e6f826f,
- 0x806fff6f, 0x00008000,
- 0xbef80080, 0xbeee0078,
- 0x8078ff78, 0x00000400,
- 0xbefc0084, 0xbf11087c,
- 0xe0524000, 0x781d0000,
- 0xe0524100, 0x781d0100,
- 0xe0524200, 0x781d0200,
- 0xe0524300, 0x781d0300,
- 0xbf8c0f70, 0x7e000300,
- 0x7e020301, 0x7e040302,
- 0x7e060303, 0x807c847c,
- 0x8078ff78, 0x00000400,
- 0xbf0a6f7c, 0xbf85ffee,
- 0xb8ef2985, 0x806f816f,
- 0x8e6f836f, 0xb8f92b05,
- 0x80798179, 0x8e798279,
- 0x80ef796f, 0x866f6f6f,
- 0xbf84001a, 0x806fff6f,
- 0x00008000, 0xbefc0080,
+ 0xb8ef2b05, 0x806f816f,
+ 0x8e6f826f, 0x806fff6f,
+ 0x00008000, 0xbef80080,
+ 0xbeee0078, 0x8078ff78,
+ 0x00000400, 0xbefc0084,
0xbf11087c, 0xe0524000,
0x781d0000, 0xe0524100,
0x781d0100, 0xe0524200,
0x781d0200, 0xe0524300,
0x781d0300, 0xbf8c0f70,
- 0xd3d94000, 0x18000100,
- 0xd3d94001, 0x18000101,
- 0xd3d94002, 0x18000102,
- 0xd3d94003, 0x18000103,
+ 0x7e000300, 0x7e020301,
+ 0x7e040302, 0x7e060303,
0x807c847c, 0x8078ff78,
0x00000400, 0xbf0a6f7c,
- 0xbf85ffea, 0xbf9c0000,
- 0xe0524000, 0x6e1d0000,
- 0xe0524100, 0x6e1d0100,
- 0xe0524200, 0x6e1d0200,
- 0xe0524300, 0x6e1d0300,
- 0xbf8c0f70, 0xb8f82985,
- 0x80788178, 0x8e788a78,
- 0x8e788178, 0xb8ee1605,
- 0x806e816e, 0x8e6e866e,
- 0x80786e78, 0x80f8c078,
- 0xb8ef1605, 0x806f816f,
- 0x8e6f846f, 0x8e76826f,
- 0xbef600ff, 0x01000000,
- 0xbefc006f, 0xc031003a,
- 0x00000078, 0x80f8c078,
- 0xbf8cc07f, 0x80fc907c,
- 0xbf800000, 0xbe802d00,
- 0xbe822d02, 0xbe842d04,
- 0xbe862d06, 0xbe882d08,
- 0xbe8a2d0a, 0xbe8c2d0c,
- 0xbe8e2d0e, 0xbf06807c,
- 0xbf84fff0, 0xb8f82985,
- 0x80788178, 0x8e788a78,
- 0x8e788178, 0xb8ee1605,
- 0x806e816e, 0x8e6e866e,
- 0x80786e78, 0xbef60084,
- 0xbef600ff, 0x01000000,
- 0xc0211bfa, 0x00000078,
- 0x80788478, 0xc0211b3a,
+ 0xbf85ffee, 0xb8ef2985,
+ 0x806f816f, 0x8e6f836f,
+ 0xb8f92b05, 0x80798179,
+ 0x8e798279, 0x80ef796f,
+ 0x866f6f6f, 0xbf84001a,
+ 0x806fff6f, 0x00008000,
+ 0xbefc0080, 0xbf11087c,
+ 0xe0524000, 0x781d0000,
+ 0xe0524100, 0x781d0100,
+ 0xe0524200, 0x781d0200,
+ 0xe0524300, 0x781d0300,
+ 0xbf8c0f70, 0xd3d94000,
+ 0x18000100, 0xd3d94001,
+ 0x18000101, 0xd3d94002,
+ 0x18000102, 0xd3d94003,
+ 0x18000103, 0x807c847c,
+ 0x8078ff78, 0x00000400,
+ 0xbf0a6f7c, 0xbf85ffea,
+ 0xbf9c0000, 0xe0524000,
+ 0x6e1d0000, 0xe0524100,
+ 0x6e1d0100, 0xe0524200,
+ 0x6e1d0200, 0xe0524300,
+ 0x6e1d0300, 0xbf8c0f70,
+ 0xb8f82985, 0x80788178,
+ 0x8e788a78, 0x8e788178,
+ 0xb8ee1605, 0x806e816e,
+ 0x8e6e866e, 0x80786e78,
+ 0x80f8c078, 0xb8ef1605,
+ 0x806f816f, 0x8e6f846f,
+ 0x8e76826f, 0xbef600ff,
+ 0x01000000, 0xbefc006f,
+ 0xc031003a, 0x00000078,
+ 0x80f8c078, 0xbf8cc07f,
+ 0x80fc907c, 0xbf800000,
+ 0xbe802d00, 0xbe822d02,
+ 0xbe842d04, 0xbe862d06,
+ 0xbe882d08, 0xbe8a2d0a,
+ 0xbe8c2d0c, 0xbe8e2d0e,
+ 0xbf06807c, 0xbf84fff0,
+ 0xb8f82985, 0x80788178,
+ 0x8e788a78, 0x8e788178,
+ 0xb8ee1605, 0x806e816e,
+ 0x8e6e866e, 0x80786e78,
+ 0xbef60084, 0xbef600ff,
+ 0x01000000, 0xc0211bfa,
0x00000078, 0x80788478,
- 0xc0211b7a, 0x00000078,
- 0x80788478, 0xc0211c3a,
+ 0xc0211b3a, 0x00000078,
+ 0x80788478, 0xc0211b7a,
0x00000078, 0x80788478,
- 0xc0211c7a, 0x00000078,
- 0x80788478, 0xc0211eba,
+ 0xc0211c3a, 0x00000078,
+ 0x80788478, 0xc0211c7a,
0x00000078, 0x80788478,
- 0xc0211efa, 0x00000078,
- 0x80788478, 0xc0211a3a,
+ 0xc0211eba, 0x00000078,
+ 0x80788478, 0xc0211efa,
0x00000078, 0x80788478,
- 0xc0211a7a, 0x00000078,
- 0x80788478, 0xc0211cfa,
+ 0xc0211a3a, 0x00000078,
+ 0x80788478, 0xc0211a7a,
0x00000078, 0x80788478,
- 0xbf8cc07f, 0xbefc006f,
- 0xbefe0070, 0xbeff0071,
- 0x866f7bff, 0x000003ff,
- 0xb96f4803, 0x866f7bff,
- 0xfffff800, 0x8f6f8b6f,
- 0xb96fa2c3, 0xb973f801,
- 0xb8ee2985, 0x806e816e,
- 0x8e6e8a6e, 0x8e6e816e,
- 0xb8ef1605, 0x806f816f,
- 0x8e6f866f, 0x806e6f6e,
- 0x806e746e, 0x826f8075,
- 0x866fff6f, 0x0000ffff,
- 0xc00b1c37, 0x00000050,
- 0xc00b1d37, 0x00000060,
- 0xc0031e77, 0x00000074,
- 0xbf8cc07f, 0x8f6e8b77,
- 0x866eff6e, 0x001f8000,
- 0xb96ef807, 0x866dff6d,
- 0x0000ffff, 0x86fe7e7e,
- 0x86ea6a6a, 0x8f6e837a,
- 0xb96ee0c2, 0xbf800002,
- 0xb97a0002, 0xbf8a0000,
- 0xbe801f6c, 0xbf9b0000,
+ 0xc0211cfa, 0x00000078,
+ 0x80788478, 0xbf8cc07f,
+ 0xbefc006f, 0xbefe0070,
+ 0xbeff0071, 0x866f7bff,
+ 0x000003ff, 0xb96f4803,
+ 0x866f7bff, 0xfffff800,
+ 0x8f6f8b6f, 0xb96fa2c3,
+ 0xb973f801, 0xb8ee2985,
+ 0x806e816e, 0x8e6e8a6e,
+ 0x8e6e816e, 0xb8ef1605,
+ 0x806f816f, 0x8e6f866f,
+ 0x806e6f6e, 0x806e746e,
+ 0x826f8075, 0x866fff6f,
+ 0x0000ffff, 0xc00b1c37,
+ 0x00000050, 0xc00b1d37,
+ 0x00000060, 0xc0031e77,
+ 0x00000074, 0xbf8cc07f,
+ 0x8f6e8b77, 0x866eff6e,
+ 0x001f8000, 0xb96ef807,
+ 0x866dff6d, 0x0000ffff,
+ 0x86fe7e7e, 0x86ea6a6a,
+ 0x8f6e837a, 0xb96ee0c2,
+ 0xbf800002, 0xb97a0002,
+ 0xbf8a0000, 0xbe801f6c,
+ 0xbf9b0000, 0x00000000,
};
static const uint32_t cwsr_trap_gfx10_hex[] = {
- 0xbf820001, 0xbf820221,
+ 0xbf820001, 0xbf820220,
0xb0804004, 0xb978f802,
0x8a78ff78, 0x00020006,
0xb97bf803, 0x876eff78,
@@ -2302,19 +2302,19 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0xbf0d8f7b, 0xbf840002,
0x887bff7b, 0xffff0000,
0xf4011bbd, 0xfa000010,
- 0xbf8cc07f, 0x8f6e976e,
+ 0xbf8c0000, 0x8f6e976e,
0x8a77ff77, 0x00800000,
0x88776e77, 0xf4051bbd,
- 0xfa000000, 0xbf8cc07f,
+ 0xfa000000, 0xbf8c0000,
0xf4051ebd, 0xfa000008,
- 0xbf8cc07f, 0x87ee6e6e,
+ 0xbf8c0000, 0x87ee6e6e,
0xbf840001, 0xbe80206e,
- 0x876eff6d, 0x01ff0000,
- 0xbf850005, 0x8878ff78,
- 0x00002000, 0x80ec886c,
- 0x82ed806d, 0xbf820005,
- 0x876eff6d, 0x01000000,
- 0xbf850002, 0x806c846c,
+ 0x876eff6d, 0x00ff0000,
+ 0xbf850008, 0x876eff6d,
+ 0x01000000, 0xbf850007,
+ 0x8878ff78, 0x00002000,
+ 0x80ec886c, 0x82ed806d,
+ 0xbf820002, 0x806c846c,
0x826d806d, 0x876dff6d,
0x0000ffff, 0x87fe7e7e,
0x87ea6a6a, 0xb9f8f802,
@@ -2322,7 +2322,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0x0000ffff, 0xbefa0380,
0xb9fa0283, 0xbeee037e,
0xbeef037f, 0xbefe0480,
- 0xbf900004, 0xbf8cc07f,
+ 0xbf900004, 0xbf8c0000,
0x877aff7f, 0x04000000,
0x8f7a857a, 0x886d7a6d,
0x7e008200, 0xbefa037e,
@@ -2475,94 +2475,93 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0xbf850002, 0xbeff0380,
0xbf820001, 0xbeff03c1,
0xb97b4306, 0x877bc17b,
- 0xbf840044, 0xbf8a0000,
+ 0xbf840043, 0xbf8a0000,
0x877aff6d, 0x80000000,
- 0xbf840040, 0x8f7b867b,
- 0x8f7b827b, 0xbef6037b,
- 0xb9703a05, 0x80708170,
- 0xbf0d9973, 0xbf850002,
- 0x8f708970, 0xbf820001,
- 0x8f708a70, 0xb97a1e06,
- 0x8f7a8a7a, 0x80707a70,
- 0x8070ff70, 0x00000200,
- 0x8070ff70, 0x00000080,
- 0xbef603ff, 0x01000000,
- 0xd7650000, 0x000100c1,
- 0xd7660000, 0x000200c1,
- 0x16000084, 0x907c9973,
- 0x877c817c, 0xbf06817c,
- 0xbefc0380, 0xbf850012,
- 0xbe8303ff, 0x00000080,
+ 0xbf84003f, 0x8f7b887b,
+ 0xbef6037b, 0xb9703a05,
+ 0x80708170, 0xbf0d9973,
+ 0xbf850002, 0x8f708970,
+ 0xbf820001, 0x8f708a70,
+ 0xb97a1e06, 0x8f7a8a7a,
+ 0x80707a70, 0x8070ff70,
+ 0x00000200, 0x8070ff70,
+ 0x00000080, 0xbef603ff,
+ 0x01000000, 0xd7650000,
+ 0x000100c1, 0xd7660000,
+ 0x000200c1, 0x16000084,
+ 0x907c9973, 0x877c817c,
+ 0xbf06817c, 0xbefc0380,
+ 0xbf850012, 0xbe8303ff,
+ 0x00000080, 0xbf800000,
0xbf800000, 0xbf800000,
- 0xbf800000, 0xd8d80000,
- 0x01000000, 0xbf8c0000,
- 0xe0704000, 0x705d0100,
- 0x807c037c, 0x80700370,
- 0xd5250000, 0x0001ff00,
- 0x00000080, 0xbf0a7b7c,
- 0xbf85fff4, 0xbf820011,
- 0xbe8303ff, 0x00000100,
+ 0xd8d80000, 0x01000000,
+ 0xbf8c0000, 0xe0704000,
+ 0x705d0100, 0x807c037c,
+ 0x80700370, 0xd5250000,
+ 0x0001ff00, 0x00000080,
+ 0xbf0a7b7c, 0xbf85fff4,
+ 0xbf820011, 0xbe8303ff,
+ 0x00000100, 0xbf800000,
0xbf800000, 0xbf800000,
- 0xbf800000, 0xd8d80000,
- 0x01000000, 0xbf8c0000,
- 0xe0704000, 0x705d0100,
- 0x807c037c, 0x80700370,
- 0xd5250000, 0x0001ff00,
- 0x00000100, 0xbf0a7b7c,
- 0xbf85fff4, 0xbefe03c1,
- 0x907c9973, 0x877c817c,
- 0xbf06817c, 0xbf850004,
- 0xbef003ff, 0x00000200,
- 0xbeff0380, 0xbf820003,
- 0xbef003ff, 0x00000400,
- 0xbeff03c1, 0xb97b3a05,
- 0x807b817b, 0x8f7b827b,
- 0x907c9973, 0x877c817c,
- 0xbf06817c, 0xbf850017,
+ 0xd8d80000, 0x01000000,
+ 0xbf8c0000, 0xe0704000,
+ 0x705d0100, 0x807c037c,
+ 0x80700370, 0xd5250000,
+ 0x0001ff00, 0x00000100,
+ 0xbf0a7b7c, 0xbf85fff4,
+ 0xbefe03c1, 0x907c9973,
+ 0x877c817c, 0xbf06817c,
+ 0xbf850004, 0xbef003ff,
+ 0x00000200, 0xbeff0380,
+ 0xbf820003, 0xbef003ff,
+ 0x00000400, 0xbeff03c1,
+ 0xb97b3a05, 0x807b817b,
+ 0x8f7b827b, 0x907c9973,
+ 0x877c817c, 0xbf06817c,
+ 0xbf850017, 0xbef603ff,
+ 0x01000000, 0xbefc0384,
+ 0xbf0a7b7c, 0xbf840037,
+ 0x7e008700, 0x7e028701,
+ 0x7e048702, 0x7e068703,
+ 0xe0704000, 0x705d0000,
+ 0xe0704080, 0x705d0100,
+ 0xe0704100, 0x705d0200,
+ 0xe0704180, 0x705d0300,
+ 0x807c847c, 0x8070ff70,
+ 0x00000200, 0xbf0a7b7c,
+ 0xbf85ffef, 0xbf820025,
0xbef603ff, 0x01000000,
0xbefc0384, 0xbf0a7b7c,
- 0xbf840037, 0x7e008700,
+ 0xbf840011, 0x7e008700,
0x7e028701, 0x7e048702,
0x7e068703, 0xe0704000,
- 0x705d0000, 0xe0704080,
- 0x705d0100, 0xe0704100,
- 0x705d0200, 0xe0704180,
+ 0x705d0000, 0xe0704100,
+ 0x705d0100, 0xe0704200,
+ 0x705d0200, 0xe0704300,
0x705d0300, 0x807c847c,
- 0x8070ff70, 0x00000200,
+ 0x8070ff70, 0x00000400,
0xbf0a7b7c, 0xbf85ffef,
- 0xbf820025, 0xbef603ff,
- 0x01000000, 0xbefc0384,
- 0xbf0a7b7c, 0xbf840011,
- 0x7e008700, 0x7e028701,
- 0x7e048702, 0x7e068703,
+ 0xb97b1e06, 0x877bc17b,
+ 0xbf84000c, 0x8f7b837b,
+ 0x807b7c7b, 0xbefe03c1,
+ 0xbeff0380, 0x7e008700,
0xe0704000, 0x705d0000,
- 0xe0704100, 0x705d0100,
- 0xe0704200, 0x705d0200,
- 0xe0704300, 0x705d0300,
- 0x807c847c, 0x8070ff70,
- 0x00000400, 0xbf0a7b7c,
- 0xbf85ffef, 0xb97b1e06,
- 0x877bc17b, 0xbf84000c,
- 0x8f7b837b, 0x807b7c7b,
- 0xbefe03c1, 0xbeff0380,
- 0x7e008700, 0xe0704000,
- 0x705d0000, 0x807c817c,
- 0x8070ff70, 0x00000080,
- 0xbf0a7b7c, 0xbf85fff8,
- 0xbf82013b, 0xbef4037e,
- 0x8775ff7f, 0x0000ffff,
- 0x8875ff75, 0x00040000,
- 0xbef60380, 0xbef703ff,
- 0x10807fac, 0xb97202dc,
- 0x8f729972, 0x876eff7f,
- 0x04000000, 0xbf840034,
- 0xbefe03c1, 0x907c9972,
- 0x877c817c, 0xbf06817c,
- 0xbf850002, 0xbeff0380,
- 0xbf820001, 0xbeff03c1,
- 0xb96f4306, 0x876fc16f,
- 0xbf840029, 0x8f6f866f,
- 0x8f6f826f, 0xbef6036f,
+ 0x807c817c, 0x8070ff70,
+ 0x00000080, 0xbf0a7b7c,
+ 0xbf85fff8, 0xbf820136,
+ 0xbef4037e, 0x8775ff7f,
+ 0x0000ffff, 0x8875ff75,
+ 0x00040000, 0xbef60380,
+ 0xbef703ff, 0x10807fac,
+ 0xb97202dc, 0x8f729972,
+ 0x876eff7f, 0x04000000,
+ 0xbf840033, 0xbefe03c1,
+ 0x907c9972, 0x877c817c,
+ 0xbf06817c, 0xbf850002,
+ 0xbeff0380, 0xbf820001,
+ 0xbeff03c1, 0xb96f4306,
+ 0x876fc16f, 0xbf840028,
+ 0x8f6f886f, 0xbef6036f,
0xb9783a05, 0x80788178,
0xbf0d9972, 0xbf850002,
0x8f788978, 0xbf820001,
@@ -2598,7 +2597,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0xe0304080, 0x785d0100,
0xe0304100, 0x785d0200,
0xe0304180, 0x785d0300,
- 0xbf8c3f70, 0x7e008500,
+ 0xbf8c0000, 0x7e008500,
0x7e028501, 0x7e048502,
0x7e068503, 0x807c847c,
0x8078ff78, 0x00000200,
@@ -2607,7 +2606,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0xe0304080, 0x6e5d0100,
0xe0304100, 0x6e5d0200,
0xe0304180, 0x6e5d0300,
- 0xbf8c3f70, 0xbf820034,
+ 0xbf8c0000, 0xbf820034,
0xbef603ff, 0x01000000,
0xbeee0378, 0x8078ff78,
0x00000400, 0xbefc0384,
@@ -2616,7 +2615,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0xe0304100, 0x785d0100,
0xe0304200, 0x785d0200,
0xe0304300, 0x785d0300,
- 0xbf8c3f70, 0x7e008500,
+ 0xbf8c0000, 0x7e008500,
0x7e028501, 0x7e048502,
0x7e068503, 0x807c847c,
0x8078ff78, 0x00000400,
@@ -2625,7 +2624,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0xbf84000e, 0x8f6f836f,
0x806f7c6f, 0xbefe03c1,
0xbeff0380, 0xe0304000,
- 0x785d0000, 0xbf8c3f70,
+ 0x785d0000, 0xbf8c0000,
0x7e008500, 0x807c817c,
0x8078ff78, 0x00000080,
0xbf0a6f7c, 0xbf85fff7,
@@ -2633,7 +2632,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0x6e5d0000, 0xe0304100,
0x6e5d0100, 0xe0304200,
0x6e5d0200, 0xe0304300,
- 0x6e5d0300, 0xbf8c3f70,
+ 0x6e5d0300, 0xbf8c0000,
0xb9783a05, 0x80788178,
0xbf0d9972, 0xbf850002,
0x8f788978, 0xbf820001,
@@ -2644,16 +2643,16 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0xbef603ff, 0x01000000,
0xbefc03ff, 0x0000006c,
0x80f89078, 0xf429003a,
- 0xf0000000, 0xbf8cc07f,
+ 0xf0000000, 0xbf8c0000,
0x80fc847c, 0xbf800000,
0xbe803100, 0xbe823102,
0x80f8a078, 0xf42d003a,
- 0xf0000000, 0xbf8cc07f,
+ 0xf0000000, 0xbf8c0000,
0x80fc887c, 0xbf800000,
0xbe803100, 0xbe823102,
0xbe843104, 0xbe863106,
0x80f8c078, 0xf431003a,
- 0xf0000000, 0xbf8cc07f,
+ 0xf0000000, 0xbf8c0000,
0x80fc907c, 0xbf800000,
0xbe803100, 0xbe823102,
0xbe843104, 0xbe863106,
@@ -2683,15 +2682,13 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0x80788478, 0xf4211cfa,
0xf0000000, 0x80788478,
0xf4211bba, 0xf0000000,
- 0x80788478, 0xbf8cc07f,
+ 0x80788478, 0xbf8c0000,
0xb9eef814, 0xf4211bba,
0xf0000000, 0x80788478,
- 0xbf8cc07f, 0xb9eef815,
+ 0xbf8c0000, 0xb9eef815,
0xbefc036f, 0xbefe0370,
- 0xbeff0371, 0x876f7bff,
- 0x000003ff, 0xb9ef4803,
- 0x876f7bff, 0xfffff800,
- 0x906f8b6f, 0xb9efa2c3,
+ 0xbeff0371, 0xb9fb4803,
+ 0x907b8b7b, 0xb9fba2c3,
0xb9f3f801, 0xb96e3a05,
0x806e816e, 0xbf0d9972,
0xbf850002, 0x8f6e896e,
@@ -2703,7 +2700,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0x0000ffff, 0xf4091c37,
0xfa000050, 0xf4091d37,
0xfa000060, 0xf4011e77,
- 0xfa000074, 0xbf8cc07f,
+ 0xfa000074, 0xbf8c0000,
0x876dff6d, 0x0000ffff,
0x87fe7e7e, 0x87ea6a6a,
0xb9faf802, 0xbe80226c,
@@ -2713,7 +2710,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
};
static const uint32_t cwsr_trap_gfx11_hex[] = {
- 0xbfa00001, 0xbfa00225,
+ 0xbfa00001, 0xbfa00227,
0xb0804006, 0xb8f8f802,
0x9178ff78, 0x00020006,
0xb8fbf803, 0xbf0d9e6d,
@@ -2737,187 +2734,188 @@ static const uint32_t cwsr_trap_gfx11_hex[] = {
0x8b6eff6e, 0x00000800,
0xbfa20003, 0x8b6eff7b,
0x00000400, 0xbfa2002a,
- 0xbefa4d82, 0xbf89fc07,
+ 0xbefa4d82, 0xbf890000,
0x84fa887a, 0xbf0d8f7b,
0xbfa10002, 0x8c7bff7b,
0xffff0000, 0xf4005bbd,
- 0xf8000010, 0xbf89fc07,
+ 0xf8000010, 0xbf890000,
0x846e976e, 0x9177ff77,
0x00800000, 0x8c776e77,
0xf4045bbd, 0xf8000000,
- 0xbf89fc07, 0xf4045ebd,
- 0xf8000008, 0xbf89fc07,
+ 0xbf890000, 0xf4045ebd,
+ 0xf8000008, 0xbf890000,
0x8bee6e6e, 0xbfa10001,
0xbe80486e, 0x8b6eff6d,
- 0x01ff0000, 0xbfa20005,
- 0x8c78ff78, 0x00002000,
- 0x80ec886c, 0x82ed806d,
- 0xbfa00005, 0x8b6eff6d,
- 0x01000000, 0xbfa20002,
+ 0x00ff0000, 0xbfa20008,
+ 0x8b6eff6d, 0x01000000,
+ 0xbfa20007, 0x8c78ff78,
+ 0x00002000, 0x80ec886c,
+ 0x82ed806d, 0xbfa00002,
0x806c846c, 0x826d806d,
0x8b6dff6d, 0x0000ffff,
0x8bfe7e7e, 0x8bea6a6a,
0xb978f802, 0xbe804a6c,
- 0x8b6dff6d, 0x0000ffff,
- 0xbefa0080, 0xb97a0283,
- 0xbeee007e, 0xbeef007f,
- 0xbefe0180, 0xbefe4d84,
- 0xbf89fc07, 0x8b7aff7f,
- 0x04000000, 0x847a857a,
- 0x8c6d7a6d, 0xbefa007e,
- 0x8b7bff7f, 0x0000ffff,
- 0xbefe00c1, 0xbeff00c1,
- 0xdca6c000, 0x007a0000,
- 0x7e000280, 0xbefe007a,
- 0xbeff007b, 0xb8fb02dc,
- 0x847b997b, 0xb8fa3b05,
- 0x807a817a, 0xbf0d997b,
- 0xbfa20002, 0x847a897a,
- 0xbfa00001, 0x847a8a7a,
- 0xb8fb1e06, 0x847b8a7b,
- 0x807a7b7a, 0x8b7bff7f,
- 0x0000ffff, 0x807aff7a,
- 0x00000200, 0x807a7e7a,
- 0x827b807b, 0xd7610000,
- 0x00010870, 0xd7610000,
- 0x00010a71, 0xd7610000,
- 0x00010c72, 0xd7610000,
- 0x00010e73, 0xd7610000,
- 0x00011074, 0xd7610000,
- 0x00011275, 0xd7610000,
- 0x00011476, 0xd7610000,
- 0x00011677, 0xd7610000,
- 0x00011a79, 0xd7610000,
- 0x00011c7e, 0xd7610000,
- 0x00011e7f, 0xbefe00ff,
- 0x00003fff, 0xbeff0080,
- 0xdca6c040, 0x007a0000,
- 0xd760007a, 0x00011d00,
- 0xd760007b, 0x00011f00,
+ 0xbf0d9878, 0xbfa10001,
+ 0xbfb00000, 0x8b6dff6d,
+ 0x0000ffff, 0xbefa0080,
+ 0xb97a0283, 0xbeee007e,
+ 0xbeef007f, 0xbefe0180,
+ 0xbefe4d84, 0xbf890000,
+ 0x8b7aff7f, 0x04000000,
+ 0x847a857a, 0x8c6d7a6d,
+ 0xbefa007e, 0x8b7bff7f,
+ 0x0000ffff, 0xbefe00c1,
+ 0xbeff00c1, 0xdca6c000,
+ 0x007a0000, 0x7e000280,
0xbefe007a, 0xbeff007b,
- 0xbef4007e, 0x8b75ff7f,
- 0x0000ffff, 0x8c75ff75,
- 0x00040000, 0xbef60080,
- 0xbef700ff, 0x10807fac,
- 0xbef1007d, 0xbef00080,
- 0xb8f302dc, 0x84739973,
- 0xbefe00c1, 0x857d9973,
- 0x8b7d817d, 0xbf06817d,
- 0xbfa20002, 0xbeff0080,
- 0xbfa00002, 0xbeff00c1,
- 0xbfa00009, 0xbef600ff,
- 0x01000000, 0xe0685080,
- 0x701d0100, 0xe0685100,
- 0x701d0200, 0xe0685180,
- 0x701d0300, 0xbfa00008,
+ 0xb8fb02dc, 0x847b997b,
+ 0xb8fa3b05, 0x807a817a,
+ 0xbf0d997b, 0xbfa20002,
+ 0x847a897a, 0xbfa00001,
+ 0x847a8a7a, 0xb8fb1e06,
+ 0x847b8a7b, 0x807a7b7a,
+ 0x8b7bff7f, 0x0000ffff,
+ 0x807aff7a, 0x00000200,
+ 0x807a7e7a, 0x827b807b,
+ 0xd7610000, 0x00010870,
+ 0xd7610000, 0x00010a71,
+ 0xd7610000, 0x00010c72,
+ 0xd7610000, 0x00010e73,
+ 0xd7610000, 0x00011074,
+ 0xd7610000, 0x00011275,
+ 0xd7610000, 0x00011476,
+ 0xd7610000, 0x00011677,
+ 0xd7610000, 0x00011a79,
+ 0xd7610000, 0x00011c7e,
+ 0xd7610000, 0x00011e7f,
+ 0xbefe00ff, 0x00003fff,
+ 0xbeff0080, 0xdca6c040,
+ 0x007a0000, 0xd760007a,
+ 0x00011d00, 0xd760007b,
+ 0x00011f00, 0xbefe007a,
+ 0xbeff007b, 0xbef4007e,
+ 0x8b75ff7f, 0x0000ffff,
+ 0x8c75ff75, 0x00040000,
+ 0xbef60080, 0xbef700ff,
+ 0x10807fac, 0xbef1007d,
+ 0xbef00080, 0xb8f302dc,
+ 0x84739973, 0xbefe00c1,
+ 0x857d9973, 0x8b7d817d,
+ 0xbf06817d, 0xbfa20002,
+ 0xbeff0080, 0xbfa00002,
+ 0xbeff00c1, 0xbfa00009,
0xbef600ff, 0x01000000,
- 0xe0685100, 0x701d0100,
- 0xe0685200, 0x701d0200,
- 0xe0685300, 0x701d0300,
+ 0xe0685080, 0x701d0100,
+ 0xe0685100, 0x701d0200,
+ 0xe0685180, 0x701d0300,
+ 0xbfa00008, 0xbef600ff,
+ 0x01000000, 0xe0685100,
+ 0x701d0100, 0xe0685200,
+ 0x701d0200, 0xe0685300,
+ 0x701d0300, 0xb8f03b05,
+ 0x80708170, 0xbf0d9973,
+ 0xbfa20002, 0x84708970,
+ 0xbfa00001, 0x84708a70,
+ 0xb8fa1e06, 0x847a8a7a,
+ 0x80707a70, 0x8070ff70,
+ 0x00000200, 0xbef600ff,
+ 0x01000000, 0x7e000280,
+ 0x7e020280, 0x7e040280,
+ 0xbefd0080, 0xd7610002,
+ 0x0000fa71, 0x807d817d,
+ 0xd7610002, 0x0000fa6c,
+ 0x807d817d, 0x917aff6d,
+ 0x80000000, 0xd7610002,
+ 0x0000fa7a, 0x807d817d,
+ 0xd7610002, 0x0000fa6e,
+ 0x807d817d, 0xd7610002,
+ 0x0000fa6f, 0x807d817d,
+ 0xd7610002, 0x0000fa78,
+ 0x807d817d, 0xb8faf803,
+ 0xd7610002, 0x0000fa7a,
+ 0x807d817d, 0xd7610002,
+ 0x0000fa7b, 0x807d817d,
+ 0xb8f1f801, 0xd7610002,
+ 0x0000fa71, 0x807d817d,
+ 0xb8f1f814, 0xd7610002,
+ 0x0000fa71, 0x807d817d,
+ 0xb8f1f815, 0xd7610002,
+ 0x0000fa71, 0x807d817d,
+ 0xbefe00ff, 0x0000ffff,
+ 0xbeff0080, 0xe0685000,
+ 0x701d0200, 0xbefe00c1,
0xb8f03b05, 0x80708170,
0xbf0d9973, 0xbfa20002,
0x84708970, 0xbfa00001,
0x84708a70, 0xb8fa1e06,
0x847a8a7a, 0x80707a70,
- 0x8070ff70, 0x00000200,
0xbef600ff, 0x01000000,
- 0x7e000280, 0x7e020280,
- 0x7e040280, 0xbefd0080,
- 0xd7610002, 0x0000fa71,
- 0x807d817d, 0xd7610002,
- 0x0000fa6c, 0x807d817d,
- 0x917aff6d, 0x80000000,
- 0xd7610002, 0x0000fa7a,
- 0x807d817d, 0xd7610002,
- 0x0000fa6e, 0x807d817d,
- 0xd7610002, 0x0000fa6f,
- 0x807d817d, 0xd7610002,
- 0x0000fa78, 0x807d817d,
- 0xb8faf803, 0xd7610002,
- 0x0000fa7a, 0x807d817d,
- 0xd7610002, 0x0000fa7b,
- 0x807d817d, 0xb8f1f801,
- 0xd7610002, 0x0000fa71,
- 0x807d817d, 0xb8f1f814,
- 0xd7610002, 0x0000fa71,
- 0x807d817d, 0xb8f1f815,
- 0xd7610002, 0x0000fa71,
- 0x807d817d, 0xbefe00ff,
- 0x0000ffff, 0xbeff0080,
- 0xe0685000, 0x701d0200,
- 0xbefe00c1, 0xb8f03b05,
- 0x80708170, 0xbf0d9973,
- 0xbfa20002, 0x84708970,
- 0xbfa00001, 0x84708a70,
- 0xb8fa1e06, 0x847a8a7a,
- 0x80707a70, 0xbef600ff,
- 0x01000000, 0xbef90080,
- 0xbefd0080, 0xbf800000,
- 0xbe804100, 0xbe824102,
- 0xbe844104, 0xbe864106,
- 0xbe884108, 0xbe8a410a,
- 0xbe8c410c, 0xbe8e410e,
- 0xd7610002, 0x0000f200,
+ 0xbef90080, 0xbefd0080,
+ 0xbf800000, 0xbe804100,
+ 0xbe824102, 0xbe844104,
+ 0xbe864106, 0xbe884108,
+ 0xbe8a410a, 0xbe8c410c,
+ 0xbe8e410e, 0xd7610002,
+ 0x0000f200, 0x80798179,
+ 0xd7610002, 0x0000f201,
0x80798179, 0xd7610002,
- 0x0000f201, 0x80798179,
- 0xd7610002, 0x0000f202,
+ 0x0000f202, 0x80798179,
+ 0xd7610002, 0x0000f203,
0x80798179, 0xd7610002,
- 0x0000f203, 0x80798179,
- 0xd7610002, 0x0000f204,
+ 0x0000f204, 0x80798179,
+ 0xd7610002, 0x0000f205,
0x80798179, 0xd7610002,
- 0x0000f205, 0x80798179,
- 0xd7610002, 0x0000f206,
+ 0x0000f206, 0x80798179,
+ 0xd7610002, 0x0000f207,
0x80798179, 0xd7610002,
- 0x0000f207, 0x80798179,
- 0xd7610002, 0x0000f208,
+ 0x0000f208, 0x80798179,
+ 0xd7610002, 0x0000f209,
0x80798179, 0xd7610002,
- 0x0000f209, 0x80798179,
- 0xd7610002, 0x0000f20a,
+ 0x0000f20a, 0x80798179,
+ 0xd7610002, 0x0000f20b,
0x80798179, 0xd7610002,
- 0x0000f20b, 0x80798179,
- 0xd7610002, 0x0000f20c,
+ 0x0000f20c, 0x80798179,
+ 0xd7610002, 0x0000f20d,
0x80798179, 0xd7610002,
- 0x0000f20d, 0x80798179,
- 0xd7610002, 0x0000f20e,
+ 0x0000f20e, 0x80798179,
+ 0xd7610002, 0x0000f20f,
+ 0x80798179, 0xbf06a079,
+ 0xbfa10006, 0xe0685000,
+ 0x701d0200, 0x8070ff70,
+ 0x00000080, 0xbef90080,
+ 0x7e040280, 0x807d907d,
+ 0xbf0aff7d, 0x00000060,
+ 0xbfa2ffbc, 0xbe804100,
+ 0xbe824102, 0xbe844104,
+ 0xbe864106, 0xbe884108,
+ 0xbe8a410a, 0xd7610002,
+ 0x0000f200, 0x80798179,
+ 0xd7610002, 0x0000f201,
0x80798179, 0xd7610002,
- 0x0000f20f, 0x80798179,
- 0xbf06a079, 0xbfa10006,
- 0xe0685000, 0x701d0200,
- 0x8070ff70, 0x00000080,
- 0xbef90080, 0x7e040280,
- 0x807d907d, 0xbf0aff7d,
- 0x00000060, 0xbfa2ffbc,
- 0xbe804100, 0xbe824102,
- 0xbe844104, 0xbe864106,
- 0xbe884108, 0xbe8a410a,
- 0xd7610002, 0x0000f200,
+ 0x0000f202, 0x80798179,
+ 0xd7610002, 0x0000f203,
0x80798179, 0xd7610002,
- 0x0000f201, 0x80798179,
- 0xd7610002, 0x0000f202,
+ 0x0000f204, 0x80798179,
+ 0xd7610002, 0x0000f205,
0x80798179, 0xd7610002,
- 0x0000f203, 0x80798179,
- 0xd7610002, 0x0000f204,
+ 0x0000f206, 0x80798179,
+ 0xd7610002, 0x0000f207,
0x80798179, 0xd7610002,
- 0x0000f205, 0x80798179,
- 0xd7610002, 0x0000f206,
+ 0x0000f208, 0x80798179,
+ 0xd7610002, 0x0000f209,
0x80798179, 0xd7610002,
- 0x0000f207, 0x80798179,
- 0xd7610002, 0x0000f208,
- 0x80798179, 0xd7610002,
- 0x0000f209, 0x80798179,
- 0xd7610002, 0x0000f20a,
- 0x80798179, 0xd7610002,
- 0x0000f20b, 0x80798179,
- 0xe0685000, 0x701d0200,
- 0xbefe00c1, 0x857d9973,
- 0x8b7d817d, 0xbf06817d,
- 0xbfa20002, 0xbeff0080,
- 0xbfa00001, 0xbeff00c1,
- 0xb8fb4306, 0x8b7bc17b,
- 0xbfa10044, 0xbfbd0000,
- 0x8b7aff6d, 0x80000000,
- 0xbfa10040, 0x847b867b,
- 0x847b827b, 0xbef6007b,
+ 0x0000f20a, 0x80798179,
+ 0xd7610002, 0x0000f20b,
+ 0x80798179, 0xe0685000,
+ 0x701d0200, 0xbefe00c1,
+ 0x857d9973, 0x8b7d817d,
+ 0xbf06817d, 0xbfa20002,
+ 0xbeff0080, 0xbfa00001,
+ 0xbeff00c1, 0xb8fb4306,
+ 0x8b7bc17b, 0xbfa10043,
+ 0xbfbd0000, 0x8b7aff6d,
+ 0x80000000, 0xbfa1003f,
+ 0x847b887b, 0xbef6007b,
0xb8f03b05, 0x80708170,
0xbf0d9973, 0xbfa20002,
0x84708970, 0xbfa00001,
@@ -2988,177 +2986,175 @@ static const uint32_t cwsr_trap_gfx11_hex[] = {
0x701d0000, 0x807d817d,
0x8070ff70, 0x00000080,
0xbf0a7b7d, 0xbfa2fff8,
- 0xbfa00146, 0xbef4007e,
+ 0xbfa00143, 0xbef4007e,
0x8b75ff7f, 0x0000ffff,
0x8c75ff75, 0x00040000,
0xbef60080, 0xbef700ff,
0x10807fac, 0xb8f202dc,
0x84729972, 0x8b6eff7f,
- 0x04000000, 0xbfa1003a,
+ 0x04000000, 0xbfa10039,
0xbefe00c1, 0x857d9972,
0x8b7d817d, 0xbf06817d,
0xbfa20002, 0xbeff0080,
0xbfa00001, 0xbeff00c1,
0xb8ef4306, 0x8b6fc16f,
- 0xbfa1002f, 0x846f866f,
- 0x846f826f, 0xbef6006f,
- 0xb8f83b05, 0x80788178,
- 0xbf0d9972, 0xbfa20002,
- 0x84788978, 0xbfa00001,
- 0x84788a78, 0xb8ee1e06,
- 0x846e8a6e, 0x80786e78,
+ 0xbfa1002e, 0x846f886f,
+ 0xbef6006f, 0xb8f83b05,
+ 0x80788178, 0xbf0d9972,
+ 0xbfa20002, 0x84788978,
+ 0xbfa00001, 0x84788a78,
+ 0xb8ee1e06, 0x846e8a6e,
+ 0x80786e78, 0x8078ff78,
+ 0x00000200, 0x8078ff78,
+ 0x00000080, 0xbef600ff,
+ 0x01000000, 0x857d9972,
+ 0x8b7d817d, 0xbf06817d,
+ 0xbefd0080, 0xbfa2000c,
+ 0xe0500000, 0x781d0000,
+ 0xbf890000, 0xdac00000,
+ 0x00000000, 0x807dff7d,
+ 0x00000080, 0x8078ff78,
+ 0x00000080, 0xbf0a6f7d,
+ 0xbfa2fff5, 0xbfa0000b,
+ 0xe0500000, 0x781d0000,
+ 0xbf890000, 0xdac00000,
+ 0x00000000, 0x807dff7d,
+ 0x00000100, 0x8078ff78,
+ 0x00000100, 0xbf0a6f7d,
+ 0xbfa2fff5, 0xbef80080,
+ 0xbefe00c1, 0x857d9972,
+ 0x8b7d817d, 0xbf06817d,
+ 0xbfa20002, 0xbeff0080,
+ 0xbfa00001, 0xbeff00c1,
+ 0xb8ef3b05, 0x806f816f,
+ 0x846f826f, 0x857d9972,
+ 0x8b7d817d, 0xbf06817d,
+ 0xbfa20024, 0xbef600ff,
+ 0x01000000, 0xbeee0078,
0x8078ff78, 0x00000200,
- 0x8078ff78, 0x00000080,
- 0xbef600ff, 0x01000000,
- 0x857d9972, 0x8b7d817d,
- 0xbf06817d, 0xbefd0080,
- 0xbfa2000c, 0xe0500000,
- 0x781d0000, 0xbf8903f7,
- 0xdac00000, 0x00000000,
- 0x807dff7d, 0x00000080,
- 0x8078ff78, 0x00000080,
- 0xbf0a6f7d, 0xbfa2fff5,
- 0xbfa0000b, 0xe0500000,
- 0x781d0000, 0xbf8903f7,
- 0xdac00000, 0x00000000,
- 0x807dff7d, 0x00000100,
- 0x8078ff78, 0x00000100,
- 0xbf0a6f7d, 0xbfa2fff5,
- 0xbef80080, 0xbefe00c1,
- 0x857d9972, 0x8b7d817d,
- 0xbf06817d, 0xbfa20002,
- 0xbeff0080, 0xbfa00001,
- 0xbeff00c1, 0xb8ef3b05,
- 0x806f816f, 0x846f826f,
- 0x857d9972, 0x8b7d817d,
- 0xbf06817d, 0xbfa20024,
- 0xbef600ff, 0x01000000,
- 0xbeee0078, 0x8078ff78,
- 0x00000200, 0xbefd0084,
- 0xbf0a6f7d, 0xbfa10050,
+ 0xbefd0084, 0xbf0a6f7d,
+ 0xbfa10050, 0xe0505000,
+ 0x781d0000, 0xe0505080,
+ 0x781d0100, 0xe0505100,
+ 0x781d0200, 0xe0505180,
+ 0x781d0300, 0xbf890000,
+ 0x7e008500, 0x7e028501,
+ 0x7e048502, 0x7e068503,
+ 0x807d847d, 0x8078ff78,
+ 0x00000200, 0xbf0a6f7d,
+ 0xbfa2ffee, 0xe0505000,
+ 0x6e1d0000, 0xe0505080,
+ 0x6e1d0100, 0xe0505100,
+ 0x6e1d0200, 0xe0505180,
+ 0x6e1d0300, 0xbf890000,
+ 0xbfa00034, 0xbef600ff,
+ 0x01000000, 0xbeee0078,
+ 0x8078ff78, 0x00000400,
+ 0xbefd0084, 0xbf0a6f7d,
+ 0xbfa10012, 0xe0505000,
+ 0x781d0000, 0xe0505100,
+ 0x781d0100, 0xe0505200,
+ 0x781d0200, 0xe0505300,
+ 0x781d0300, 0xbf890000,
+ 0x7e008500, 0x7e028501,
+ 0x7e048502, 0x7e068503,
+ 0x807d847d, 0x8078ff78,
+ 0x00000400, 0xbf0a6f7d,
+ 0xbfa2ffee, 0xb8ef1e06,
+ 0x8b6fc16f, 0xbfa1000e,
+ 0x846f836f, 0x806f7d6f,
+ 0xbefe00c1, 0xbeff0080,
0xe0505000, 0x781d0000,
- 0xe0505080, 0x781d0100,
- 0xe0505100, 0x781d0200,
- 0xe0505180, 0x781d0300,
- 0xbf8903f7, 0x7e008500,
- 0x7e028501, 0x7e048502,
- 0x7e068503, 0x807d847d,
- 0x8078ff78, 0x00000200,
- 0xbf0a6f7d, 0xbfa2ffee,
+ 0xbf890000, 0x7e008500,
+ 0x807d817d, 0x8078ff78,
+ 0x00000080, 0xbf0a6f7d,
+ 0xbfa2fff7, 0xbeff00c1,
0xe0505000, 0x6e1d0000,
- 0xe0505080, 0x6e1d0100,
- 0xe0505100, 0x6e1d0200,
- 0xe0505180, 0x6e1d0300,
- 0xbf8903f7, 0xbfa00034,
- 0xbef600ff, 0x01000000,
- 0xbeee0078, 0x8078ff78,
- 0x00000400, 0xbefd0084,
- 0xbf0a6f7d, 0xbfa10012,
- 0xe0505000, 0x781d0000,
- 0xe0505100, 0x781d0100,
- 0xe0505200, 0x781d0200,
- 0xe0505300, 0x781d0300,
- 0xbf8903f7, 0x7e008500,
- 0x7e028501, 0x7e048502,
- 0x7e068503, 0x807d847d,
- 0x8078ff78, 0x00000400,
- 0xbf0a6f7d, 0xbfa2ffee,
- 0xb8ef1e06, 0x8b6fc16f,
- 0xbfa1000e, 0x846f836f,
- 0x806f7d6f, 0xbefe00c1,
- 0xbeff0080, 0xe0505000,
- 0x781d0000, 0xbf8903f7,
- 0x7e008500, 0x807d817d,
- 0x8078ff78, 0x00000080,
- 0xbf0a6f7d, 0xbfa2fff7,
- 0xbeff00c1, 0xe0505000,
- 0x6e1d0000, 0xe0505100,
- 0x6e1d0100, 0xe0505200,
- 0x6e1d0200, 0xe0505300,
- 0x6e1d0300, 0xbf8903f7,
+ 0xe0505100, 0x6e1d0100,
+ 0xe0505200, 0x6e1d0200,
+ 0xe0505300, 0x6e1d0300,
+ 0xbf890000, 0xb8f83b05,
+ 0x80788178, 0xbf0d9972,
+ 0xbfa20002, 0x84788978,
+ 0xbfa00001, 0x84788a78,
+ 0xb8ee1e06, 0x846e8a6e,
+ 0x80786e78, 0x8078ff78,
+ 0x00000200, 0x80f8ff78,
+ 0x00000050, 0xbef600ff,
+ 0x01000000, 0xbefd00ff,
+ 0x0000006c, 0x80f89078,
+ 0xf428403a, 0xf0000000,
+ 0xbf890000, 0x80fd847d,
+ 0xbf800000, 0xbe804300,
+ 0xbe824302, 0x80f8a078,
+ 0xf42c403a, 0xf0000000,
+ 0xbf890000, 0x80fd887d,
+ 0xbf800000, 0xbe804300,
+ 0xbe824302, 0xbe844304,
+ 0xbe864306, 0x80f8c078,
+ 0xf430403a, 0xf0000000,
+ 0xbf890000, 0x80fd907d,
+ 0xbf800000, 0xbe804300,
+ 0xbe824302, 0xbe844304,
+ 0xbe864306, 0xbe884308,
+ 0xbe8a430a, 0xbe8c430c,
+ 0xbe8e430e, 0xbf06807d,
+ 0xbfa1fff0, 0xb980f801,
+ 0x00000000, 0xbfbd0000,
0xb8f83b05, 0x80788178,
0xbf0d9972, 0xbfa20002,
0x84788978, 0xbfa00001,
0x84788a78, 0xb8ee1e06,
0x846e8a6e, 0x80786e78,
0x8078ff78, 0x00000200,
- 0x80f8ff78, 0x00000050,
0xbef600ff, 0x01000000,
- 0xbefd00ff, 0x0000006c,
- 0x80f89078, 0xf428403a,
- 0xf0000000, 0xbf89fc07,
- 0x80fd847d, 0xbf800000,
- 0xbe804300, 0xbe824302,
- 0x80f8a078, 0xf42c403a,
- 0xf0000000, 0xbf89fc07,
- 0x80fd887d, 0xbf800000,
- 0xbe804300, 0xbe824302,
- 0xbe844304, 0xbe864306,
- 0x80f8c078, 0xf430403a,
- 0xf0000000, 0xbf89fc07,
- 0x80fd907d, 0xbf800000,
- 0xbe804300, 0xbe824302,
- 0xbe844304, 0xbe864306,
- 0xbe884308, 0xbe8a430a,
- 0xbe8c430c, 0xbe8e430e,
- 0xbf06807d, 0xbfa1fff0,
- 0xb980f801, 0x00000000,
- 0xbfbd0000, 0xb8f83b05,
- 0x80788178, 0xbf0d9972,
- 0xbfa20002, 0x84788978,
- 0xbfa00001, 0x84788a78,
- 0xb8ee1e06, 0x846e8a6e,
- 0x80786e78, 0x8078ff78,
- 0x00000200, 0xbef600ff,
- 0x01000000, 0xf4205bfa,
+ 0xf4205bfa, 0xf0000000,
+ 0x80788478, 0xf4205b3a,
0xf0000000, 0x80788478,
- 0xf4205b3a, 0xf0000000,
- 0x80788478, 0xf4205b7a,
+ 0xf4205b7a, 0xf0000000,
+ 0x80788478, 0xf4205c3a,
0xf0000000, 0x80788478,
- 0xf4205c3a, 0xf0000000,
- 0x80788478, 0xf4205c7a,
+ 0xf4205c7a, 0xf0000000,
+ 0x80788478, 0xf4205eba,
0xf0000000, 0x80788478,
- 0xf4205eba, 0xf0000000,
- 0x80788478, 0xf4205efa,
+ 0xf4205efa, 0xf0000000,
+ 0x80788478, 0xf4205e7a,
0xf0000000, 0x80788478,
- 0xf4205e7a, 0xf0000000,
- 0x80788478, 0xf4205cfa,
+ 0xf4205cfa, 0xf0000000,
+ 0x80788478, 0xf4205bba,
0xf0000000, 0x80788478,
+ 0xbf890000, 0xb96ef814,
0xf4205bba, 0xf0000000,
- 0x80788478, 0xbf89fc07,
- 0xb96ef814, 0xf4205bba,
- 0xf0000000, 0x80788478,
- 0xbf89fc07, 0xb96ef815,
- 0xbefd006f, 0xbefe0070,
- 0xbeff0071, 0x8b6f7bff,
- 0x000003ff, 0xb96f4803,
- 0x8b6f7bff, 0xfffff800,
- 0x856f8b6f, 0xb96fa2c3,
- 0xb973f801, 0xb8ee3b05,
- 0x806e816e, 0xbf0d9972,
- 0xbfa20002, 0x846e896e,
- 0xbfa00001, 0x846e8a6e,
- 0xb8ef1e06, 0x846f8a6f,
- 0x806e6f6e, 0x806eff6e,
- 0x00000200, 0x806e746e,
- 0x826f8075, 0x8b6fff6f,
- 0x0000ffff, 0xf4085c37,
- 0xf8000050, 0xf4085d37,
- 0xf8000060, 0xf4005e77,
- 0xf8000074, 0xbf89fc07,
- 0x8b6dff6d, 0x0000ffff,
- 0x8bfe7e7e, 0x8bea6a6a,
- 0xb8eef802, 0xbf0d866e,
- 0xbfa20002, 0xb97af802,
- 0xbe80486c, 0xb97af802,
- 0xbe804a6c, 0xbfb10000,
+ 0x80788478, 0xbf890000,
+ 0xb96ef815, 0xbefd006f,
+ 0xbefe0070, 0xbeff0071,
+ 0xb97b4803, 0x857b8b7b,
+ 0xb97b22c3, 0x857b867b,
+ 0xb97b7443, 0xb973f801,
+ 0xb8ee3b05, 0x806e816e,
+ 0xbf0d9972, 0xbfa20002,
+ 0x846e896e, 0xbfa00001,
+ 0x846e8a6e, 0xb8ef1e06,
+ 0x846f8a6f, 0x806e6f6e,
+ 0x806eff6e, 0x00000200,
+ 0x806e746e, 0x826f8075,
+ 0x8b6fff6f, 0x0000ffff,
+ 0xf4085c37, 0xf8000050,
+ 0xf4085d37, 0xf8000060,
+ 0xf4005e77, 0xf8000074,
+ 0xbf890000, 0x8b6dff6d,
+ 0x0000ffff, 0x8bfe7e7e,
+ 0x8bea6a6a, 0xb8eef802,
+ 0xbf0d866e, 0xbfa20002,
+ 0xb97af802, 0xbe80486c,
+ 0xb97af802, 0xbe804a6c,
+ 0xbfb10000, 0xbf9f0000,
0xbf9f0000, 0xbf9f0000,
0xbf9f0000, 0xbf9f0000,
- 0xbf9f0000, 0x00000000,
};
static const uint32_t cwsr_trap_gfx9_4_3_hex[] = {
- 0xbf820001, 0xbf8202db,
+ 0xbf820001, 0xbf8202dc,
0xb8f8f802, 0x8978ff78,
0x00020006, 0xb8fbf803,
0x866eff78, 0x00002000,
@@ -3273,99 +3269,143 @@ static const uint32_t cwsr_trap_gfx9_4_3_hex[] = {
0xbefe007c, 0xbefc0070,
0xc0611c7a, 0x0000007c,
0xbf8cc07f, 0x80708470,
- 0xbefc007e, 0x867aff7f,
- 0x04000000, 0xbeef0080,
- 0x876f6f7a, 0xb8f02985,
- 0x80708170, 0x8e708a70,
- 0x8e708170, 0xb8fb1605,
- 0x807b817b, 0x8e7b847b,
- 0x8e76827b, 0xbef600ff,
- 0x01000000, 0xbef20174,
- 0x80747074, 0x82758075,
- 0xbefc0080, 0xbf800000,
- 0xbe802b00, 0xbe822b02,
- 0xbe842b04, 0xbe862b06,
- 0xbe882b08, 0xbe8a2b0a,
- 0xbe8c2b0c, 0xbe8e2b0e,
- 0xc06b003a, 0x00000000,
- 0xbf8cc07f, 0xc06b013a,
- 0x00000010, 0xbf8cc07f,
- 0xc06b023a, 0x00000020,
- 0xbf8cc07f, 0xc06b033a,
- 0x00000030, 0xbf8cc07f,
- 0x8074c074, 0x82758075,
- 0x807c907c, 0xbf0a7b7c,
- 0xbf85ffe7, 0xbef40172,
- 0xbef00080, 0xbefe00c1,
- 0xbeff00c1, 0xbee80080,
- 0xbee90080, 0xbef600ff,
- 0x01000000, 0x867aff78,
- 0x00400000, 0xbf850003,
- 0xb8faf803, 0x897a7aff,
- 0x10000000, 0xbf85004d,
- 0xbe840080, 0xd2890000,
+ 0xbefc007e, 0xbf108080,
+ 0x867aff7f, 0x04000000,
+ 0xbeef0080, 0x876f6f7a,
+ 0xb8f02985, 0x80708170,
+ 0x8e708a70, 0x8e708170,
+ 0xb8fb1605, 0x807b817b,
+ 0x8e7b847b, 0x8e76827b,
+ 0xbef600ff, 0x01000000,
+ 0xbef20174, 0x80747074,
+ 0x82758075, 0xbefc0080,
+ 0xbf800000, 0xbe802b00,
+ 0xbe822b02, 0xbe842b04,
+ 0xbe862b06, 0xbe882b08,
+ 0xbe8a2b0a, 0xbe8c2b0c,
+ 0xbe8e2b0e, 0xc06b003a,
+ 0x00000000, 0xbf8cc07f,
+ 0xc06b013a, 0x00000010,
+ 0xbf8cc07f, 0xc06b023a,
+ 0x00000020, 0xbf8cc07f,
+ 0xc06b033a, 0x00000030,
+ 0xbf8cc07f, 0x8074c074,
+ 0x82758075, 0x807c907c,
+ 0xbf0a7b7c, 0xbf85ffe7,
+ 0xbef40172, 0xbef00080,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xbee80080, 0xbee90080,
+ 0xbef600ff, 0x01000000,
+ 0x867aff78, 0x00400000,
+ 0xbf850003, 0xb8faf803,
+ 0x897a7aff, 0x10000000,
+ 0xbf85004d, 0xbe840080,
+ 0xd2890000, 0x00000900,
+ 0x80048104, 0xd2890001,
0x00000900, 0x80048104,
- 0xd2890001, 0x00000900,
- 0x80048104, 0xd2890002,
+ 0xd2890002, 0x00000900,
+ 0x80048104, 0xd2890003,
0x00000900, 0x80048104,
- 0xd2890003, 0x00000900,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
+ 0xbe840080, 0xd2890000,
+ 0x00000901, 0x80048104,
+ 0xd2890001, 0x00000901,
+ 0x80048104, 0xd2890002,
+ 0x00000901, 0x80048104,
+ 0xd2890003, 0x00000901,
0x80048104, 0xc069003a,
0x00000070, 0xbf8cc07f,
0x80709070, 0xbf06c004,
0xbf84ffee, 0xbe840080,
- 0xd2890000, 0x00000901,
+ 0xd2890000, 0x00000902,
0x80048104, 0xd2890001,
- 0x00000901, 0x80048104,
- 0xd2890002, 0x00000901,
+ 0x00000902, 0x80048104,
+ 0xd2890002, 0x00000902,
0x80048104, 0xd2890003,
- 0x00000901, 0x80048104,
+ 0x00000902, 0x80048104,
0xc069003a, 0x00000070,
0xbf8cc07f, 0x80709070,
0xbf06c004, 0xbf84ffee,
0xbe840080, 0xd2890000,
- 0x00000902, 0x80048104,
- 0xd2890001, 0x00000902,
+ 0x00000903, 0x80048104,
+ 0xd2890001, 0x00000903,
0x80048104, 0xd2890002,
- 0x00000902, 0x80048104,
- 0xd2890003, 0x00000902,
+ 0x00000903, 0x80048104,
+ 0xd2890003, 0x00000903,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0xbf820008,
+ 0xe0724000, 0x701d0000,
+ 0xe0724100, 0x701d0100,
+ 0xe0724200, 0x701d0200,
+ 0xe0724300, 0x701d0300,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xb8fb4306, 0x867bc17b,
+ 0xbf840064, 0xbf8a0000,
+ 0x867aff6f, 0x04000000,
+ 0xbf840060, 0x8e7b867b,
+ 0x8e7b827b, 0xbef6007b,
+ 0xb8f02985, 0x80708170,
+ 0x8e708a70, 0x8e708170,
+ 0xb8fa1605, 0x807a817a,
+ 0x8e7a867a, 0x80707a70,
+ 0x8070ff70, 0x00000080,
+ 0xbef600ff, 0x01000000,
+ 0xbefc0080, 0xd28c0002,
+ 0x000100c1, 0xd28d0003,
+ 0x000204c1, 0x867aff78,
+ 0x00400000, 0xbf850003,
+ 0xb8faf803, 0x897a7aff,
+ 0x10000000, 0xbf850030,
+ 0x24040682, 0xd86e4000,
+ 0x00000002, 0xbf8cc07f,
+ 0xbe840080, 0xd2890000,
+ 0x00000900, 0x80048104,
+ 0xd2890001, 0x00000900,
+ 0x80048104, 0xd2890002,
+ 0x00000900, 0x80048104,
+ 0xd2890003, 0x00000900,
0x80048104, 0xc069003a,
0x00000070, 0xbf8cc07f,
0x80709070, 0xbf06c004,
0xbf84ffee, 0xbe840080,
- 0xd2890000, 0x00000903,
+ 0xd2890000, 0x00000901,
0x80048104, 0xd2890001,
- 0x00000903, 0x80048104,
- 0xd2890002, 0x00000903,
+ 0x00000901, 0x80048104,
+ 0xd2890002, 0x00000901,
0x80048104, 0xd2890003,
- 0x00000903, 0x80048104,
+ 0x00000901, 0x80048104,
0xc069003a, 0x00000070,
0xbf8cc07f, 0x80709070,
0xbf06c004, 0xbf84ffee,
- 0xbf820008, 0xe0724000,
- 0x701d0000, 0xe0724100,
- 0x701d0100, 0xe0724200,
- 0x701d0200, 0xe0724300,
- 0x701d0300, 0xbefe00c1,
- 0xbeff00c1, 0xb8fb4306,
- 0x867bc17b, 0xbf840064,
- 0xbf8a0000, 0x867aff6f,
- 0x04000000, 0xbf840060,
- 0x8e7b867b, 0x8e7b827b,
- 0xbef6007b, 0xb8f02985,
- 0x80708170, 0x8e708a70,
- 0x8e708170, 0xb8fa1605,
- 0x807a817a, 0x8e7a867a,
- 0x80707a70, 0x8070ff70,
- 0x00000080, 0xbef600ff,
- 0x01000000, 0xbefc0080,
- 0xd28c0002, 0x000100c1,
- 0xd28d0003, 0x000204c1,
+ 0x680404ff, 0x00000200,
+ 0xd0c9006a, 0x0000f702,
+ 0xbf87ffd2, 0xbf820015,
+ 0xd1060002, 0x00011103,
+ 0x7e0602ff, 0x00000200,
+ 0xbefc00ff, 0x00010000,
+ 0xbe800077, 0x8677ff77,
+ 0xff7fffff, 0x8777ff77,
+ 0x00058000, 0xd8ec0000,
+ 0x00000002, 0xbf8cc07f,
+ 0xe0765000, 0x701d0002,
+ 0x68040702, 0xd0c9006a,
+ 0x0000f702, 0xbf87fff7,
+ 0xbef70000, 0xbef000ff,
+ 0x00000400, 0xbefe00c1,
+ 0xbeff00c1, 0xb8fb2b05,
+ 0x807b817b, 0x8e7b827b,
+ 0xbef600ff, 0x01000000,
+ 0xbefc0084, 0xbf0a7b7c,
+ 0xbf84006d, 0xbf11017c,
+ 0x807bff7b, 0x00001000,
0x867aff78, 0x00400000,
0xbf850003, 0xb8faf803,
0x897a7aff, 0x10000000,
- 0xbf850030, 0x24040682,
- 0xd86e4000, 0x00000002,
- 0xbf8cc07f, 0xbe840080,
+ 0xbf850051, 0xbe840080,
0xd2890000, 0x00000900,
0x80048104, 0xd2890001,
0x00000900, 0x80048104,
@@ -3384,31 +3424,51 @@ static const uint32_t cwsr_trap_gfx9_4_3_hex[] = {
0x80048104, 0xc069003a,
0x00000070, 0xbf8cc07f,
0x80709070, 0xbf06c004,
- 0xbf84ffee, 0x680404ff,
- 0x00000200, 0xd0c9006a,
- 0x0000f702, 0xbf87ffd2,
- 0xbf820015, 0xd1060002,
- 0x00011103, 0x7e0602ff,
- 0x00000200, 0xbefc00ff,
- 0x00010000, 0xbe800077,
- 0x8677ff77, 0xff7fffff,
- 0x8777ff77, 0x00058000,
- 0xd8ec0000, 0x00000002,
- 0xbf8cc07f, 0xe0765000,
- 0x701d0002, 0x68040702,
- 0xd0c9006a, 0x0000f702,
- 0xbf87fff7, 0xbef70000,
- 0xbef000ff, 0x00000400,
- 0xbefe00c1, 0xbeff00c1,
- 0xb8fb2b05, 0x807b817b,
- 0x8e7b827b, 0xbef600ff,
- 0x01000000, 0xbefc0084,
- 0xbf0a7b7c, 0xbf84006d,
- 0xbf11017c, 0x807bff7b,
- 0x00001000, 0x867aff78,
+ 0xbf84ffee, 0xbe840080,
+ 0xd2890000, 0x00000902,
+ 0x80048104, 0xd2890001,
+ 0x00000902, 0x80048104,
+ 0xd2890002, 0x00000902,
+ 0x80048104, 0xd2890003,
+ 0x00000902, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
+ 0xbe840080, 0xd2890000,
+ 0x00000903, 0x80048104,
+ 0xd2890001, 0x00000903,
+ 0x80048104, 0xd2890002,
+ 0x00000903, 0x80048104,
+ 0xd2890003, 0x00000903,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0x807c847c,
+ 0xbf0a7b7c, 0xbf85ffb1,
+ 0xbf9c0000, 0xbf820012,
+ 0x7e000300, 0x7e020301,
+ 0x7e040302, 0x7e060303,
+ 0xe0724000, 0x701d0000,
+ 0xe0724100, 0x701d0100,
+ 0xe0724200, 0x701d0200,
+ 0xe0724300, 0x701d0300,
+ 0x807c847c, 0x8070ff70,
+ 0x00000400, 0xbf0a7b7c,
+ 0xbf85ffef, 0xbf9c0000,
+ 0xb8fb2985, 0x807b817b,
+ 0x8e7b837b, 0xb8fa2b05,
+ 0x807a817a, 0x8e7a827a,
+ 0x80fb7a7b, 0x867b7b7b,
+ 0xbf84007a, 0x807bff7b,
+ 0x00001000, 0xbefc0080,
+ 0xbf11017c, 0x867aff78,
0x00400000, 0xbf850003,
0xb8faf803, 0x897a7aff,
- 0x10000000, 0xbf850051,
+ 0x10000000, 0xbf850059,
+ 0xd3d84000, 0x18000100,
+ 0xd3d84001, 0x18000101,
+ 0xd3d84002, 0x18000102,
+ 0xd3d84003, 0x18000103,
0xbe840080, 0xd2890000,
0x00000900, 0x80048104,
0xd2890001, 0x00000900,
@@ -3448,31 +3508,913 @@ static const uint32_t cwsr_trap_gfx9_4_3_hex[] = {
0xbf8cc07f, 0x80709070,
0xbf06c004, 0xbf84ffee,
0x807c847c, 0xbf0a7b7c,
- 0xbf85ffb1, 0xbf9c0000,
- 0xbf820012, 0x7e000300,
- 0x7e020301, 0x7e040302,
- 0x7e060303, 0xe0724000,
+ 0xbf85ffa9, 0xbf9c0000,
+ 0xbf820016, 0xd3d84000,
+ 0x18000100, 0xd3d84001,
+ 0x18000101, 0xd3d84002,
+ 0x18000102, 0xd3d84003,
+ 0x18000103, 0xe0724000,
0x701d0000, 0xe0724100,
0x701d0100, 0xe0724200,
0x701d0200, 0xe0724300,
0x701d0300, 0x807c847c,
0x8070ff70, 0x00000400,
- 0xbf0a7b7c, 0xbf85ffef,
- 0xbf9c0000, 0xb8fb2985,
- 0x807b817b, 0x8e7b837b,
- 0xb8fa2b05, 0x807a817a,
- 0x8e7a827a, 0x80fb7a7b,
- 0x867b7b7b, 0xbf84007a,
+ 0xbf0a7b7c, 0xbf85ffeb,
+ 0xbf9c0000, 0xbf8200ee,
+ 0xbef4007e, 0x8675ff7f,
+ 0x0000ffff, 0x8775ff75,
+ 0x00040000, 0xbef60080,
+ 0xbef700ff, 0x00807fac,
+ 0x866eff7f, 0x04000000,
+ 0xbf84001f, 0xbefe00c1,
+ 0xbeff00c1, 0xb8ef4306,
+ 0x866fc16f, 0xbf84001a,
+ 0x8e6f866f, 0x8e6f826f,
+ 0xbef6006f, 0xb8f82985,
+ 0x80788178, 0x8e788a78,
+ 0x8e788178, 0xb8ee1605,
+ 0x806e816e, 0x8e6e866e,
+ 0x80786e78, 0x8078ff78,
+ 0x00000080, 0xbef600ff,
+ 0x01000000, 0xbefc0080,
+ 0xe0510000, 0x781d0000,
+ 0xe0510100, 0x781d0000,
+ 0x807cff7c, 0x00000200,
+ 0x8078ff78, 0x00000200,
+ 0xbf0a6f7c, 0xbf85fff6,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xbef600ff, 0x01000000,
+ 0xb8ef2b05, 0x806f816f,
+ 0x8e6f826f, 0x806fff6f,
+ 0x00008000, 0xbef80080,
+ 0xbeee0078, 0x8078ff78,
+ 0x00000400, 0xbefc0084,
+ 0xbf11087c, 0xe0524000,
+ 0x781d0000, 0xe0524100,
+ 0x781d0100, 0xe0524200,
+ 0x781d0200, 0xe0524300,
+ 0x781d0300, 0xbf8c0f70,
+ 0x7e000300, 0x7e020301,
+ 0x7e040302, 0x7e060303,
+ 0x807c847c, 0x8078ff78,
+ 0x00000400, 0xbf0a6f7c,
+ 0xbf85ffee, 0xb8ef2985,
+ 0x806f816f, 0x8e6f836f,
+ 0xb8f92b05, 0x80798179,
+ 0x8e798279, 0x80ef796f,
+ 0x866f6f6f, 0xbf84001a,
+ 0x806fff6f, 0x00008000,
+ 0xbefc0080, 0xbf11087c,
+ 0xe0524000, 0x781d0000,
+ 0xe0524100, 0x781d0100,
+ 0xe0524200, 0x781d0200,
+ 0xe0524300, 0x781d0300,
+ 0xbf8c0f70, 0xd3d94000,
+ 0x18000100, 0xd3d94001,
+ 0x18000101, 0xd3d94002,
+ 0x18000102, 0xd3d94003,
+ 0x18000103, 0x807c847c,
+ 0x8078ff78, 0x00000400,
+ 0xbf0a6f7c, 0xbf85ffea,
+ 0xbf9c0000, 0xe0524000,
+ 0x6e1d0000, 0xe0524100,
+ 0x6e1d0100, 0xe0524200,
+ 0x6e1d0200, 0xe0524300,
+ 0x6e1d0300, 0xbf8c0f70,
+ 0xb8f82985, 0x80788178,
+ 0x8e788a78, 0x8e788178,
+ 0xb8ee1605, 0x806e816e,
+ 0x8e6e866e, 0x80786e78,
+ 0x80f8c078, 0xb8ef1605,
+ 0x806f816f, 0x8e6f846f,
+ 0x8e76826f, 0xbef600ff,
+ 0x01000000, 0xbefc006f,
+ 0xc031003a, 0x00000078,
+ 0x80f8c078, 0xbf8cc07f,
+ 0x80fc907c, 0xbf800000,
+ 0xbe802d00, 0xbe822d02,
+ 0xbe842d04, 0xbe862d06,
+ 0xbe882d08, 0xbe8a2d0a,
+ 0xbe8c2d0c, 0xbe8e2d0e,
+ 0xbf06807c, 0xbf84fff0,
+ 0xb8f82985, 0x80788178,
+ 0x8e788a78, 0x8e788178,
+ 0xb8ee1605, 0x806e816e,
+ 0x8e6e866e, 0x80786e78,
+ 0xbef60084, 0xbef600ff,
+ 0x01000000, 0xc0211bfa,
+ 0x00000078, 0x80788478,
+ 0xc0211b3a, 0x00000078,
+ 0x80788478, 0xc0211b7a,
+ 0x00000078, 0x80788478,
+ 0xc0211c3a, 0x00000078,
+ 0x80788478, 0xc0211c7a,
+ 0x00000078, 0x80788478,
+ 0xc0211eba, 0x00000078,
+ 0x80788478, 0xc0211efa,
+ 0x00000078, 0x80788478,
+ 0xc0211a3a, 0x00000078,
+ 0x80788478, 0xc0211a7a,
+ 0x00000078, 0x80788478,
+ 0xc0211cfa, 0x00000078,
+ 0x80788478, 0xbf8cc07f,
+ 0xbefc006f, 0xbefe0070,
+ 0xbeff0071, 0x866f7bff,
+ 0x000003ff, 0xb96f4803,
+ 0x866f7bff, 0xfffff800,
+ 0x8f6f8b6f, 0xb96fa2c3,
+ 0xb973f801, 0xb8ee2985,
+ 0x806e816e, 0x8e6e8a6e,
+ 0x8e6e816e, 0xb8ef1605,
+ 0x806f816f, 0x8e6f866f,
+ 0x806e6f6e, 0x806e746e,
+ 0x826f8075, 0x866fff6f,
+ 0x0000ffff, 0xc00b1c37,
+ 0x00000050, 0xc00b1d37,
+ 0x00000060, 0xc0031e77,
+ 0x00000074, 0xbf8cc07f,
+ 0x8f6e8b79, 0x866eff6e,
+ 0x001f8000, 0xb96ef807,
+ 0x866dff6d, 0x0000ffff,
+ 0x86fe7e7e, 0x86ea6a6a,
+ 0x8f6e837a, 0xb96ee0c2,
+ 0xbf800002, 0xb97a0002,
+ 0xbf8a0000, 0xbe801f6c,
+ 0xbf9b0000, 0x00000000,
+};
+
+static const uint32_t cwsr_trap_gfx12_hex[] = {
+ 0xbfa00001, 0xbfa002a2,
+ 0xb0804009, 0xb8f8f804,
+ 0x9178ff78, 0x00008c00,
+ 0xb8fbf811, 0x8b6eff78,
+ 0x00004000, 0xbfa10008,
+ 0x8b6eff7b, 0x00000080,
+ 0xbfa20018, 0x8b6ea07b,
+ 0xbfa20042, 0xbf830010,
+ 0xb8fbf811, 0xbfa0fffb,
+ 0x8b6eff7b, 0x00000bd0,
+ 0xbfa20010, 0xb8eef812,
+ 0x8b6f8f7b, 0xbfa10002,
+ 0x8c6eff6e, 0x00000080,
+ 0xb8eff813, 0x8b6e6e6f,
+ 0xbfa20008, 0x8b6eff6d,
+ 0xf0000000, 0xbfa20005,
+ 0x8b6fff6f, 0x00000200,
+ 0xbfa20002, 0x8b6ea07b,
+ 0xbfa2002c, 0xbefa4d82,
+ 0xbf8a0000, 0x84fa887a,
+ 0xbf0d8f7b, 0xbfa10002,
+ 0x8c7bff7b, 0xffff0000,
+ 0xf4601bbd, 0xf8000010,
+ 0xbf8a0000, 0x846e976e,
+ 0x9177ff77, 0x00800000,
+ 0x8c776e77, 0xf4603bbd,
+ 0xf8000000, 0xbf8a0000,
+ 0xf4603ebd, 0xf8000008,
+ 0xbf8a0000, 0x8bee6e6e,
+ 0xbfa10001, 0xbe80486e,
+ 0x8b6eff6d, 0xf0000000,
+ 0xbfa20009, 0xb8eef811,
+ 0x8b6eff6e, 0x00000080,
+ 0xbfa20007, 0x8c78ff78,
+ 0x00004000, 0x80ec886c,
+ 0x82ed806d, 0xbfa00002,
+ 0x806c846c, 0x826d806d,
+ 0x8b6dff6d, 0x0000ffff,
+ 0x8bfe7e7e, 0x8bea6a6a,
+ 0x85788978, 0xb9783244,
+ 0xbe804a6c, 0xb8faf802,
+ 0xbf0d987a, 0xbfa10001,
+ 0xbfb00000, 0x8b6dff6d,
+ 0x0000ffff, 0xbefa0080,
+ 0xb97a0151, 0xbeee007e,
+ 0xbeef007f, 0xbefe0180,
+ 0xbefe4d84, 0xbf8a0000,
+ 0x8b7aff7f, 0x04000000,
+ 0x847a857a, 0x8c6d7a6d,
+ 0xbefa007e, 0x8b7bff7f,
+ 0x0000ffff, 0xbefe00c1,
+ 0xbeff00c1, 0xee0a407a,
+ 0x000c0000, 0x00000000,
+ 0x7e000280, 0xbefe007a,
+ 0xbeff007b, 0xb8fb0742,
+ 0x847b997b, 0xb8fa3b05,
+ 0x807a817a, 0xbf0d997b,
+ 0xbfa20002, 0x847a897a,
+ 0xbfa00001, 0x847a8a7a,
+ 0xb8fb1e06, 0x847b8a7b,
+ 0x807a7b7a, 0x8b7bff7f,
+ 0x0000ffff, 0x807aff7a,
+ 0x00000200, 0x807a7e7a,
+ 0x827b807b, 0xd7610000,
+ 0x00010870, 0xd7610000,
+ 0x00010a71, 0xd7610000,
+ 0x00010c72, 0xd7610000,
+ 0x00010e73, 0xd7610000,
+ 0x00011074, 0xd7610000,
+ 0x00011275, 0xd7610000,
+ 0x00011476, 0xd7610000,
+ 0x00011677, 0xd7610000,
+ 0x00011a79, 0xd7610000,
+ 0x00011c7e, 0xd7610000,
+ 0x00011e7f, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xbefe00ff,
+ 0x00003fff, 0xbeff0080,
+ 0xee0a407a, 0x000c0000,
+ 0x00004000, 0xd760007a,
+ 0x00011d00, 0xd760007b,
+ 0x00011f00, 0xbefe007a,
+ 0xbeff007b, 0xbef4007e,
+ 0x8b75ff7f, 0x0000ffff,
+ 0x8c75ff75, 0x00040000,
+ 0xbef60080, 0xbef700ff,
+ 0x10807fac, 0xbef1007d,
+ 0xbef00080, 0xb8f30742,
+ 0x84739973, 0xbefe00c1,
+ 0x857d9973, 0x8b7d817d,
+ 0xbf06817d, 0xbfa20002,
+ 0xbeff0080, 0xbfa00002,
+ 0xbeff00c1, 0xbfa0000c,
+ 0xbef600ff, 0x01000000,
+ 0xc4068070, 0x008ce801,
+ 0x00008000, 0xc4068070,
+ 0x008ce802, 0x00010000,
+ 0xc4068070, 0x008ce803,
+ 0x00018000, 0xbfa0000b,
+ 0xbef600ff, 0x01000000,
+ 0xc4068070, 0x008ce801,
+ 0x00010000, 0xc4068070,
+ 0x008ce802, 0x00020000,
+ 0xc4068070, 0x008ce803,
+ 0x00030000, 0xb8f03b05,
+ 0x80708170, 0xbf0d9973,
+ 0xbfa20002, 0x84708970,
+ 0xbfa00001, 0x84708a70,
+ 0xb8fa1e06, 0x847a8a7a,
+ 0x80707a70, 0x8070ff70,
+ 0x00000200, 0xbef600ff,
+ 0x01000000, 0x7e000280,
+ 0x7e020280, 0x7e040280,
+ 0xbe804ec2, 0xbf94fffe,
+ 0xb8faf804, 0x8b7a847a,
+ 0x91788478, 0x8c787a78,
+ 0x917aff6d, 0x80000000,
+ 0xd7610002, 0x00010071,
+ 0xd7610002, 0x0001026c,
+ 0xd7610002, 0x0001047a,
+ 0xd7610002, 0x0001066e,
+ 0xd7610002, 0x0001086f,
+ 0xd7610002, 0x00010a78,
+ 0xd7610002, 0x00010e7b,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xb8faf811, 0xd7610002,
+ 0x00010c7a, 0xb8faf801,
+ 0xd7610002, 0x0001107a,
+ 0xb8faf814, 0xd7610002,
+ 0x0001127a, 0xb8faf815,
+ 0xd7610002, 0x0001147a,
+ 0xb8faf812, 0xd7610002,
+ 0x0001167a, 0xb8faf813,
+ 0xd7610002, 0x0001187a,
+ 0xb8faf802, 0xd7610002,
+ 0x00011a7a, 0xbefa50c1,
+ 0xbfc70000, 0xd7610002,
+ 0x00011c7a, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xbefe00ff,
+ 0x0000ffff, 0xbeff0080,
+ 0xc4068070, 0x008ce802,
+ 0x00000000, 0xbefe00c1,
+ 0xb8f03b05, 0x80708170,
+ 0xbf0d9973, 0xbfa20002,
+ 0x84708970, 0xbfa00001,
+ 0x84708a70, 0xb8fa1e06,
+ 0x847a8a7a, 0x80707a70,
+ 0xbef600ff, 0x01000000,
+ 0xbef90080, 0xbefd0080,
+ 0xbf800000, 0xbe804100,
+ 0xbe824102, 0xbe844104,
+ 0xbe864106, 0xbe884108,
+ 0xbe8a410a, 0xbe8c410c,
+ 0xbe8e410e, 0xbf068079,
+ 0xbfa10032, 0xd7610002,
+ 0x00010000, 0xd7610002,
+ 0x00010201, 0xd7610002,
+ 0x00010402, 0xd7610002,
+ 0x00010603, 0xd7610002,
+ 0x00010804, 0xd7610002,
+ 0x00010a05, 0xd7610002,
+ 0x00010c06, 0xd7610002,
+ 0x00010e07, 0xd7610002,
+ 0x00011008, 0xd7610002,
+ 0x00011209, 0xd7610002,
+ 0x0001140a, 0xd7610002,
+ 0x0001160b, 0xd7610002,
+ 0x0001180c, 0xd7610002,
+ 0x00011a0d, 0xd7610002,
+ 0x00011c0e, 0xd7610002,
+ 0x00011e0f, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0x80799079,
+ 0xbfa00038, 0xd7610002,
+ 0x00012000, 0xd7610002,
+ 0x00012201, 0xd7610002,
+ 0x00012402, 0xd7610002,
+ 0x00012603, 0xd7610002,
+ 0x00012804, 0xd7610002,
+ 0x00012a05, 0xd7610002,
+ 0x00012c06, 0xd7610002,
+ 0x00012e07, 0xd7610002,
+ 0x00013008, 0xd7610002,
+ 0x00013209, 0xd7610002,
+ 0x0001340a, 0xd7610002,
+ 0x0001360b, 0xd7610002,
+ 0x0001380c, 0xd7610002,
+ 0x00013a0d, 0xd7610002,
+ 0x00013c0e, 0xd7610002,
+ 0x00013e0f, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0x80799079,
+ 0xc4068070, 0x008ce802,
+ 0x00000000, 0x8070ff70,
+ 0x00000080, 0xbef90080,
+ 0x7e040280, 0x807d907d,
+ 0xbf0aff7d, 0x00000060,
+ 0xbfa2ff88, 0xbe804100,
+ 0xbe824102, 0xbe844104,
+ 0xbe864106, 0xbe884108,
+ 0xbe8a410a, 0xd7610002,
+ 0x00010000, 0xd7610002,
+ 0x00010201, 0xd7610002,
+ 0x00010402, 0xd7610002,
+ 0x00010603, 0xd7610002,
+ 0x00010804, 0xd7610002,
+ 0x00010a05, 0xd7610002,
+ 0x00010c06, 0xd7610002,
+ 0x00010e07, 0xd7610002,
+ 0x00011008, 0xd7610002,
+ 0x00011209, 0xd7610002,
+ 0x0001140a, 0xd7610002,
+ 0x0001160b, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xc4068070,
+ 0x008ce802, 0x00000000,
+ 0xbefe00c1, 0x857d9973,
+ 0x8b7d817d, 0xbf06817d,
+ 0xbfa20002, 0xbeff0080,
+ 0xbfa00001, 0xbeff00c1,
+ 0xb8fb4306, 0x8b7bc17b,
+ 0xbfa10044, 0x8b7aff6d,
+ 0x80000000, 0xbfa10041,
+ 0x847b897b, 0xbef6007b,
+ 0xb8f03b05, 0x80708170,
+ 0xbf0d9973, 0xbfa20002,
+ 0x84708970, 0xbfa00001,
+ 0x84708a70, 0xb8fa1e06,
+ 0x847a8a7a, 0x80707a70,
+ 0x8070ff70, 0x00000200,
+ 0x8070ff70, 0x00000080,
+ 0xbef600ff, 0x01000000,
+ 0xd71f0000, 0x000100c1,
+ 0xd7200000, 0x000200c1,
+ 0x16000084, 0x857d9973,
+ 0x8b7d817d, 0xbf06817d,
+ 0xbefd0080, 0xbfa20013,
+ 0xbe8300ff, 0x00000080,
+ 0xbf800000, 0xbf800000,
+ 0xbf800000, 0xd8d80000,
+ 0x01000000, 0xbf8a0000,
+ 0xc4068070, 0x008ce801,
+ 0x00000000, 0x807d037d,
+ 0x80700370, 0xd5250000,
+ 0x0001ff00, 0x00000080,
+ 0xbf0a7b7d, 0xbfa2fff3,
+ 0xbfa00012, 0xbe8300ff,
+ 0x00000100, 0xbf800000,
+ 0xbf800000, 0xbf800000,
+ 0xd8d80000, 0x01000000,
+ 0xbf8a0000, 0xc4068070,
+ 0x008ce801, 0x00000000,
+ 0x807d037d, 0x80700370,
+ 0xd5250000, 0x0001ff00,
+ 0x00000100, 0xbf0a7b7d,
+ 0xbfa2fff3, 0xbefe00c1,
+ 0x857d9973, 0x8b7d817d,
+ 0xbf06817d, 0xbfa20004,
+ 0xbef000ff, 0x00000200,
+ 0xbeff0080, 0xbfa00003,
+ 0xbef000ff, 0x00000400,
+ 0xbeff00c1, 0xb8fb3b05,
+ 0x807b817b, 0x847b827b,
+ 0x857d9973, 0x8b7d817d,
+ 0xbf06817d, 0xbfa2001b,
+ 0xbef600ff, 0x01000000,
+ 0xbefd0084, 0xbf0a7b7d,
+ 0xbfa10040, 0x7e008700,
+ 0x7e028701, 0x7e048702,
+ 0x7e068703, 0xc4068070,
+ 0x008ce800, 0x00000000,
+ 0xc4068070, 0x008ce801,
+ 0x00008000, 0xc4068070,
+ 0x008ce802, 0x00010000,
+ 0xc4068070, 0x008ce803,
+ 0x00018000, 0x807d847d,
+ 0x8070ff70, 0x00000200,
+ 0xbf0a7b7d, 0xbfa2ffeb,
+ 0xbfa0002a, 0xbef600ff,
+ 0x01000000, 0xbefd0084,
+ 0xbf0a7b7d, 0xbfa10015,
+ 0x7e008700, 0x7e028701,
+ 0x7e048702, 0x7e068703,
+ 0xc4068070, 0x008ce800,
+ 0x00000000, 0xc4068070,
+ 0x008ce801, 0x00010000,
+ 0xc4068070, 0x008ce802,
+ 0x00020000, 0xc4068070,
+ 0x008ce803, 0x00030000,
+ 0x807d847d, 0x8070ff70,
+ 0x00000400, 0xbf0a7b7d,
+ 0xbfa2ffeb, 0xb8fb1e06,
+ 0x8b7bc17b, 0xbfa1000d,
+ 0x847b837b, 0x807b7d7b,
+ 0xbefe00c1, 0xbeff0080,
+ 0x7e008700, 0xc4068070,
+ 0x008ce800, 0x00000000,
+ 0x807d817d, 0x8070ff70,
+ 0x00000080, 0xbf0a7b7d,
+ 0xbfa2fff7, 0xbfa0016e,
+ 0xbef4007e, 0x8b75ff7f,
+ 0x0000ffff, 0x8c75ff75,
+ 0x00040000, 0xbef60080,
+ 0xbef700ff, 0x10807fac,
+ 0xbef1007f, 0xb8f20742,
+ 0x84729972, 0x8b6eff7f,
+ 0x04000000, 0xbfa1003b,
+ 0xbefe00c1, 0x857d9972,
+ 0x8b7d817d, 0xbf06817d,
+ 0xbfa20002, 0xbeff0080,
+ 0xbfa00001, 0xbeff00c1,
+ 0xb8ef4306, 0x8b6fc16f,
+ 0xbfa10030, 0x846f896f,
+ 0xbef6006f, 0xb8f83b05,
+ 0x80788178, 0xbf0d9972,
+ 0xbfa20002, 0x84788978,
+ 0xbfa00001, 0x84788a78,
+ 0xb8ee1e06, 0x846e8a6e,
+ 0x80786e78, 0x8078ff78,
+ 0x00000200, 0x8078ff78,
+ 0x00000080, 0xbef600ff,
+ 0x01000000, 0x857d9972,
+ 0x8b7d817d, 0xbf06817d,
+ 0xbefd0080, 0xbfa2000d,
+ 0xc4050078, 0x0080e800,
+ 0x00000000, 0xbf8a0000,
+ 0xdac00000, 0x00000000,
+ 0x807dff7d, 0x00000080,
+ 0x8078ff78, 0x00000080,
+ 0xbf0a6f7d, 0xbfa2fff4,
+ 0xbfa0000c, 0xc4050078,
+ 0x0080e800, 0x00000000,
+ 0xbf8a0000, 0xdac00000,
+ 0x00000000, 0x807dff7d,
+ 0x00000100, 0x8078ff78,
+ 0x00000100, 0xbf0a6f7d,
+ 0xbfa2fff4, 0xbef80080,
+ 0xbefe00c1, 0x857d9972,
+ 0x8b7d817d, 0xbf06817d,
+ 0xbfa20002, 0xbeff0080,
+ 0xbfa00001, 0xbeff00c1,
+ 0xb8ef3b05, 0x806f816f,
+ 0x846f826f, 0x857d9972,
+ 0x8b7d817d, 0xbf06817d,
+ 0xbfa2002c, 0xbef600ff,
+ 0x01000000, 0xbeee0078,
+ 0x8078ff78, 0x00000200,
+ 0xbefd0084, 0xbf0a6f7d,
+ 0xbfa10061, 0xc4050078,
+ 0x008ce800, 0x00000000,
+ 0xc4050078, 0x008ce801,
+ 0x00008000, 0xc4050078,
+ 0x008ce802, 0x00010000,
+ 0xc4050078, 0x008ce803,
+ 0x00018000, 0xbf8a0000,
+ 0x7e008500, 0x7e028501,
+ 0x7e048502, 0x7e068503,
+ 0x807d847d, 0x8078ff78,
+ 0x00000200, 0xbf0a6f7d,
+ 0xbfa2ffea, 0xc405006e,
+ 0x008ce800, 0x00000000,
+ 0xc405006e, 0x008ce801,
+ 0x00008000, 0xc405006e,
+ 0x008ce802, 0x00010000,
+ 0xc405006e, 0x008ce803,
+ 0x00018000, 0xbf8a0000,
+ 0xbfa0003d, 0xbef600ff,
+ 0x01000000, 0xbeee0078,
+ 0x8078ff78, 0x00000400,
+ 0xbefd0084, 0xbf0a6f7d,
+ 0xbfa10016, 0xc4050078,
+ 0x008ce800, 0x00000000,
+ 0xc4050078, 0x008ce801,
+ 0x00010000, 0xc4050078,
+ 0x008ce802, 0x00020000,
+ 0xc4050078, 0x008ce803,
+ 0x00030000, 0xbf8a0000,
+ 0x7e008500, 0x7e028501,
+ 0x7e048502, 0x7e068503,
+ 0x807d847d, 0x8078ff78,
+ 0x00000400, 0xbf0a6f7d,
+ 0xbfa2ffea, 0xb8ef1e06,
+ 0x8b6fc16f, 0xbfa1000f,
+ 0x846f836f, 0x806f7d6f,
+ 0xbefe00c1, 0xbeff0080,
+ 0xc4050078, 0x008ce800,
+ 0x00000000, 0xbf8a0000,
+ 0x7e008500, 0x807d817d,
+ 0x8078ff78, 0x00000080,
+ 0xbf0a6f7d, 0xbfa2fff6,
+ 0xbeff00c1, 0xc405006e,
+ 0x008ce800, 0x00000000,
+ 0xc405006e, 0x008ce801,
+ 0x00010000, 0xc405006e,
+ 0x008ce802, 0x00020000,
+ 0xc405006e, 0x008ce803,
+ 0x00030000, 0xbf8a0000,
+ 0xb8f83b05, 0x80788178,
+ 0xbf0d9972, 0xbfa20002,
+ 0x84788978, 0xbfa00001,
+ 0x84788a78, 0xb8ee1e06,
+ 0x846e8a6e, 0x80786e78,
+ 0x8078ff78, 0x00000200,
+ 0x80f8ff78, 0x00000050,
+ 0xbef600ff, 0x01000000,
+ 0xbefd00ff, 0x0000006c,
+ 0x80f89078, 0xf462403a,
+ 0xf0000000, 0xbf8a0000,
+ 0x80fd847d, 0xbf800000,
+ 0xbe804300, 0xbe824302,
+ 0x80f8a078, 0xf462603a,
+ 0xf0000000, 0xbf8a0000,
+ 0x80fd887d, 0xbf800000,
+ 0xbe804300, 0xbe824302,
+ 0xbe844304, 0xbe864306,
+ 0x80f8c078, 0xf462803a,
+ 0xf0000000, 0xbf8a0000,
+ 0x80fd907d, 0xbf800000,
+ 0xbe804300, 0xbe824302,
+ 0xbe844304, 0xbe864306,
+ 0xbe884308, 0xbe8a430a,
+ 0xbe8c430c, 0xbe8e430e,
+ 0xbf06807d, 0xbfa1fff0,
+ 0xb980f801, 0x00000000,
+ 0xb8f83b05, 0x80788178,
+ 0xbf0d9972, 0xbfa20002,
+ 0x84788978, 0xbfa00001,
+ 0x84788a78, 0xb8ee1e06,
+ 0x846e8a6e, 0x80786e78,
+ 0x8078ff78, 0x00000200,
+ 0xbef600ff, 0x01000000,
+ 0xbeff0071, 0xf4621bfa,
+ 0xf0000000, 0x80788478,
+ 0xf4621b3a, 0xf0000000,
+ 0x80788478, 0xf4621b7a,
+ 0xf0000000, 0x80788478,
+ 0xf4621c3a, 0xf0000000,
+ 0x80788478, 0xf4621c7a,
+ 0xf0000000, 0x80788478,
+ 0xf4621eba, 0xf0000000,
+ 0x80788478, 0xf4621efa,
+ 0xf0000000, 0x80788478,
+ 0xf4621e7a, 0xf0000000,
+ 0x80788478, 0xf4621cfa,
+ 0xf0000000, 0x80788478,
+ 0xf4621bba, 0xf0000000,
+ 0x80788478, 0xbf8a0000,
+ 0xb96ef814, 0xf4621bba,
+ 0xf0000000, 0x80788478,
+ 0xbf8a0000, 0xb96ef815,
+ 0xf4621bba, 0xf0000000,
+ 0x80788478, 0xbf8a0000,
+ 0xb96ef812, 0xf4621bba,
+ 0xf0000000, 0x80788478,
+ 0xbf8a0000, 0xb96ef813,
+ 0x8b6eff7f, 0x04000000,
+ 0xbfa1000d, 0x80788478,
+ 0xf4621bba, 0xf0000000,
+ 0x80788478, 0xbf8a0000,
+ 0xbf0d806e, 0xbfa10006,
+ 0x856e906e, 0x8b6e6e6e,
+ 0xbfa10003, 0xbe804ec1,
+ 0x816ec16e, 0xbfa0fffb,
+ 0xbefd006f, 0xbefe0070,
+ 0xbeff0071, 0xb97b2011,
+ 0x857b867b, 0xb97b0191,
+ 0x857b827b, 0xb97bba11,
+ 0xb973f801, 0xb8ee3b05,
+ 0x806e816e, 0xbf0d9972,
+ 0xbfa20002, 0x846e896e,
+ 0xbfa00001, 0x846e8a6e,
+ 0xb8ef1e06, 0x846f8a6f,
+ 0x806e6f6e, 0x806eff6e,
+ 0x00000200, 0x806e746e,
+ 0x826f8075, 0x8b6fff6f,
+ 0x0000ffff, 0xf4605c37,
+ 0xf8000050, 0xf4605d37,
+ 0xf8000060, 0xf4601e77,
+ 0xf8000074, 0xbf8a0000,
+ 0x8b6dff6d, 0x0000ffff,
+ 0x8bfe7e7e, 0x8bea6a6a,
+ 0xb97af804, 0xbe804ec2,
+ 0xbf94fffe, 0xbe804a6c,
+ 0xbe804ec2, 0xbf94fffe,
+ 0xbfb10000, 0xbf9f0000,
+ 0xbf9f0000, 0xbf9f0000,
+ 0xbf9f0000, 0xbf9f0000,
+};
+
+static const uint32_t cwsr_trap_gfx9_5_0_hex[] = {
+ 0xbf820001, 0xbf8202ca,
+ 0xb8f8f802, 0x8978ff78,
+ 0x00020006, 0xb8fbf803,
+ 0x866eff78, 0x00002000,
+ 0xbf840009, 0x866eff6d,
+ 0x00ff0000, 0xbf85001a,
+ 0x866eff7b, 0x00000400,
+ 0xbf850051, 0xbf8e0010,
+ 0xb8fbf803, 0xbf82fffa,
+ 0x866eff7b, 0x03c00900,
+ 0xbf850011, 0x866eff7b,
+ 0x000071ff, 0xbf840008,
+ 0x866fff7b, 0x00007080,
+ 0xbf840001, 0xbeee1a87,
+ 0xb8eff801, 0x8e6e8c6e,
+ 0x866e6f6e, 0xbf850006,
+ 0x866eff6d, 0x00ff0000,
+ 0xbf850003, 0x866eff7b,
+ 0x00000400, 0xbf85003a,
+ 0xb8faf807, 0x867aff7a,
+ 0x001f8000, 0x8e7a8b7a,
+ 0x8979ff79, 0xfc000000,
+ 0x87797a79, 0xba7ff807,
+ 0x00000000, 0xb8faf812,
+ 0xb8fbf813, 0x8efa887a,
+ 0xbf0d8f7b, 0xbf840002,
+ 0x877bff7b, 0xffff0000,
+ 0xc0031bbd, 0x00000010,
+ 0xbf8cc07f, 0x8e6e976e,
+ 0x8979ff79, 0x00800000,
+ 0x87796e79, 0xc0071bbd,
+ 0x00000000, 0xbf8cc07f,
+ 0xc0071ebd, 0x00000008,
+ 0xbf8cc07f, 0x86ee6e6e,
+ 0xbf840001, 0xbe801d6e,
+ 0x866eff6d, 0x01ff0000,
+ 0xbf850005, 0x8778ff78,
+ 0x00002000, 0x80ec886c,
+ 0x82ed806d, 0xbf820005,
+ 0x866eff6d, 0x01000000,
+ 0xbf850002, 0x806c846c,
+ 0x826d806d, 0x866dff6d,
+ 0x0000ffff, 0x8f7a8b79,
+ 0x867aff7a, 0x001f8000,
+ 0xb97af807, 0x86fe7e7e,
+ 0x86ea6a6a, 0x8f6e8378,
+ 0xb96ee0c2, 0xbf800002,
+ 0xb9780002, 0xbe801f6c,
+ 0x866dff6d, 0x0000ffff,
+ 0xbefa0080, 0xb97a0283,
+ 0xb8faf807, 0x867aff7a,
+ 0x001f8000, 0x8e7a8b7a,
+ 0x8979ff79, 0xfc000000,
+ 0x87797a79, 0xba7ff807,
+ 0x00000000, 0xbeee007e,
+ 0xbeef007f, 0xbefe0180,
+ 0xbf900004, 0x877a8478,
+ 0xb97af802, 0xbf8e0002,
+ 0xbf88fffe, 0xb8fa2985,
+ 0x807a817a, 0x8e7a8a7a,
+ 0x8e7a817a, 0xb8fb1605,
+ 0x807b817b, 0x8e7b867b,
+ 0x807a7b7a, 0x807a7e7a,
+ 0x827b807f, 0x867bff7b,
+ 0x0000ffff, 0xc04b1c3d,
+ 0x00000050, 0xbf8cc07f,
+ 0xc04b1d3d, 0x00000060,
+ 0xbf8cc07f, 0xc0431e7d,
+ 0x00000074, 0xbf8cc07f,
+ 0xbef4007e, 0x8675ff7f,
+ 0x0000ffff, 0x8775ff75,
+ 0x00040000, 0xbef60080,
+ 0xbef700ff, 0x00807fac,
+ 0xbef1007c, 0xbef00080,
+ 0xb8f02985, 0x80708170,
+ 0x8e708a70, 0x8e708170,
+ 0xb8fa1605, 0x807a817a,
+ 0x8e7a867a, 0x80707a70,
+ 0xbef60084, 0xbef600ff,
+ 0x01000000, 0xbefe007c,
+ 0xbefc0070, 0xc0611c7a,
+ 0x0000007c, 0xbf8cc07f,
+ 0x80708470, 0xbefc007e,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611b3a, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
+ 0xbefc007e, 0xbefe007c,
+ 0xbefc0070, 0xc0611b7a,
+ 0x0000007c, 0xbf8cc07f,
+ 0x80708470, 0xbefc007e,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611bba, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
+ 0xbefc007e, 0xbefe007c,
+ 0xbefc0070, 0xc0611bfa,
+ 0x0000007c, 0xbf8cc07f,
+ 0x80708470, 0xbefc007e,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611e3a, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
+ 0xbefc007e, 0xb8fbf803,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611efa, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
+ 0xbefc007e, 0xbefe007c,
+ 0xbefc0070, 0xc0611a3a,
+ 0x0000007c, 0xbf8cc07f,
+ 0x80708470, 0xbefc007e,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611a7a, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
+ 0xbefc007e, 0xb8f1f801,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611c7a, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
+ 0xbefc007e, 0xbf108080,
+ 0x867aff7f, 0x04000000,
+ 0xbeef0080, 0x876f6f7a,
+ 0xb8f02985, 0x80708170,
+ 0x8e708a70, 0x8e708170,
+ 0xb8fb1605, 0x807b817b,
+ 0x8e7b847b, 0x8e76827b,
+ 0xbef600ff, 0x01000000,
+ 0xbef20174, 0x80747074,
+ 0x82758075, 0xbefc0080,
+ 0xbf800000, 0xbe802b00,
+ 0xbe822b02, 0xbe842b04,
+ 0xbe862b06, 0xbe882b08,
+ 0xbe8a2b0a, 0xbe8c2b0c,
+ 0xbe8e2b0e, 0xc06b003a,
+ 0x00000000, 0xbf8cc07f,
+ 0xc06b013a, 0x00000010,
+ 0xbf8cc07f, 0xc06b023a,
+ 0x00000020, 0xbf8cc07f,
+ 0xc06b033a, 0x00000030,
+ 0xbf8cc07f, 0x8074c074,
+ 0x82758075, 0x807c907c,
+ 0xbf0a7b7c, 0xbf85ffe7,
+ 0xbef40172, 0xbef00080,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xbee80080, 0xbee90080,
+ 0xbef600ff, 0x01000000,
+ 0x867aff78, 0x00400000,
+ 0xbf850003, 0xb8faf803,
+ 0x897a7aff, 0x10000000,
+ 0xbf85004d, 0xbe840080,
+ 0xd2890000, 0x00000900,
+ 0x80048104, 0xd2890001,
+ 0x00000900, 0x80048104,
+ 0xd2890002, 0x00000900,
+ 0x80048104, 0xd2890003,
+ 0x00000900, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
+ 0xbe840080, 0xd2890000,
+ 0x00000901, 0x80048104,
+ 0xd2890001, 0x00000901,
+ 0x80048104, 0xd2890002,
+ 0x00000901, 0x80048104,
+ 0xd2890003, 0x00000901,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0xbe840080,
+ 0xd2890000, 0x00000902,
+ 0x80048104, 0xd2890001,
+ 0x00000902, 0x80048104,
+ 0xd2890002, 0x00000902,
+ 0x80048104, 0xd2890003,
+ 0x00000902, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
+ 0xbe840080, 0xd2890000,
+ 0x00000903, 0x80048104,
+ 0xd2890001, 0x00000903,
+ 0x80048104, 0xd2890002,
+ 0x00000903, 0x80048104,
+ 0xd2890003, 0x00000903,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0xbf820008,
+ 0xe0724000, 0x701d0000,
+ 0xe0724100, 0x701d0100,
+ 0xe0724200, 0x701d0200,
+ 0xe0724300, 0x701d0300,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xb8fb5306, 0x867bc17b,
+ 0xbf840052, 0xbf8a0000,
+ 0x867aff6f, 0x04000000,
+ 0xbf84004e, 0x8e7b867b,
+ 0x8e7b827b, 0xbef6007b,
+ 0xb8f02985, 0x80708170,
+ 0x8e708a70, 0x8e708170,
+ 0xb8fa1605, 0x807a817a,
+ 0x8e7a867a, 0x80707a70,
+ 0x8070ff70, 0x00000080,
+ 0xbef600ff, 0x01000000,
+ 0xbefc0080, 0xd28c0002,
+ 0x000100c1, 0xd28d0003,
+ 0x000204c1, 0x867aff78,
+ 0x00400000, 0xbf850003,
+ 0xb8faf803, 0x897a7aff,
+ 0x10000000, 0xbf85001d,
+ 0x24040682, 0xd86c0000,
+ 0x00000002, 0xbf8cc07f,
+ 0xbe840080, 0xd2890000,
+ 0x00000900, 0x80048104,
+ 0xd2890001, 0x00000900,
+ 0x80048104, 0xd2890002,
+ 0x00000900, 0x80048104,
+ 0xd2890003, 0x00000900,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0x680404ff,
+ 0x00000100, 0xd0c9006a,
+ 0x0000f702, 0xbf87ffe5,
+ 0xbf820016, 0xd1060002,
+ 0x00011103, 0x7e0602ff,
+ 0x00000200, 0xbefc00ff,
+ 0x00010000, 0xbe800077,
+ 0x8677ff77, 0xff7fffff,
+ 0x8777ff77, 0x00058000,
+ 0xd8ec0000, 0x00000002,
+ 0xbf8cc07f, 0xe0765000,
+ 0x701d0002, 0x68040702,
+ 0xd0c9006a, 0x0000f702,
+ 0xbefe016a, 0xbf87fff6,
+ 0xbef70000, 0xbef000ff,
+ 0x00000400, 0xbefe00c1,
+ 0xbeff00c1, 0xb8fb2b05,
+ 0x807b817b, 0x8e7b827b,
+ 0xbef600ff, 0x01000000,
+ 0xbefc0084, 0xbf0a7b7c,
+ 0xbf84006d, 0xbf11017c,
0x807bff7b, 0x00001000,
- 0xbefc0080, 0xbf11017c,
0x867aff78, 0x00400000,
0xbf850003, 0xb8faf803,
0x897a7aff, 0x10000000,
- 0xbf850059, 0xd3d84000,
- 0x18000100, 0xd3d84001,
- 0x18000101, 0xd3d84002,
- 0x18000102, 0xd3d84003,
- 0x18000103, 0xbe840080,
+ 0xbf850051, 0xbe840080,
0xd2890000, 0x00000900,
0x80048104, 0xd2890001,
0x00000900, 0x80048104,
@@ -3511,137 +4453,204 @@ static const uint32_t cwsr_trap_gfx9_4_3_hex[] = {
0x00000070, 0xbf8cc07f,
0x80709070, 0xbf06c004,
0xbf84ffee, 0x807c847c,
- 0xbf0a7b7c, 0xbf85ffa9,
- 0xbf9c0000, 0xbf820016,
- 0xd3d84000, 0x18000100,
- 0xd3d84001, 0x18000101,
- 0xd3d84002, 0x18000102,
- 0xd3d84003, 0x18000103,
+ 0xbf0a7b7c, 0xbf85ffb1,
+ 0xbf9c0000, 0xbf820012,
+ 0x7e000300, 0x7e020301,
+ 0x7e040302, 0x7e060303,
0xe0724000, 0x701d0000,
0xe0724100, 0x701d0100,
0xe0724200, 0x701d0200,
0xe0724300, 0x701d0300,
0x807c847c, 0x8070ff70,
0x00000400, 0xbf0a7b7c,
- 0xbf85ffeb, 0xbf9c0000,
- 0xbf8200ee, 0xbef4007e,
- 0x8675ff7f, 0x0000ffff,
- 0x8775ff75, 0x00040000,
- 0xbef60080, 0xbef700ff,
- 0x00807fac, 0x866eff7f,
- 0x04000000, 0xbf84001f,
+ 0xbf85ffef, 0xbf9c0000,
+ 0xb8fb2985, 0x807b817b,
+ 0x8e7b837b, 0xb8fa2b05,
+ 0x807a817a, 0x8e7a827a,
+ 0x80fb7a7b, 0x867b7b7b,
+ 0xbf84007a, 0x807bff7b,
+ 0x00001000, 0xbefc0080,
+ 0xbf11017c, 0x867aff78,
+ 0x00400000, 0xbf850003,
+ 0xb8faf803, 0x897a7aff,
+ 0x10000000, 0xbf850059,
+ 0xd3d84000, 0x18000100,
+ 0xd3d84001, 0x18000101,
+ 0xd3d84002, 0x18000102,
+ 0xd3d84003, 0x18000103,
+ 0xbe840080, 0xd2890000,
+ 0x00000900, 0x80048104,
+ 0xd2890001, 0x00000900,
+ 0x80048104, 0xd2890002,
+ 0x00000900, 0x80048104,
+ 0xd2890003, 0x00000900,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0xbe840080,
+ 0xd2890000, 0x00000901,
+ 0x80048104, 0xd2890001,
+ 0x00000901, 0x80048104,
+ 0xd2890002, 0x00000901,
+ 0x80048104, 0xd2890003,
+ 0x00000901, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
+ 0xbe840080, 0xd2890000,
+ 0x00000902, 0x80048104,
+ 0xd2890001, 0x00000902,
+ 0x80048104, 0xd2890002,
+ 0x00000902, 0x80048104,
+ 0xd2890003, 0x00000902,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0xbe840080,
+ 0xd2890000, 0x00000903,
+ 0x80048104, 0xd2890001,
+ 0x00000903, 0x80048104,
+ 0xd2890002, 0x00000903,
+ 0x80048104, 0xd2890003,
+ 0x00000903, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
+ 0x807c847c, 0xbf0a7b7c,
+ 0xbf85ffa9, 0xbf9c0000,
+ 0xbf820016, 0xd3d84000,
+ 0x18000100, 0xd3d84001,
+ 0x18000101, 0xd3d84002,
+ 0x18000102, 0xd3d84003,
+ 0x18000103, 0xe0724000,
+ 0x701d0000, 0xe0724100,
+ 0x701d0100, 0xe0724200,
+ 0x701d0200, 0xe0724300,
+ 0x701d0300, 0x807c847c,
+ 0x8070ff70, 0x00000400,
+ 0xbf0a7b7c, 0xbf85ffeb,
+ 0xbf9c0000, 0xbf8200f4,
+ 0xbef4007e, 0x8675ff7f,
+ 0x0000ffff, 0x8775ff75,
+ 0x00040000, 0xbef60080,
+ 0xbef700ff, 0x00807fac,
+ 0x866eff7f, 0x04000000,
+ 0xbf840025, 0xbefe00c1,
+ 0xbeff00c1, 0xb8ef5306,
+ 0x866fc16f, 0xbf840020,
+ 0x8e6f866f, 0x8e6f826f,
+ 0xbef6006f, 0xb8f82985,
+ 0x80788178, 0x8e788a78,
+ 0x8e788178, 0xb8ee1605,
+ 0x806e816e, 0x8e6e866e,
+ 0x80786e78, 0x8078ff78,
+ 0x00000080, 0xbef600ff,
+ 0x01000000, 0xbefc0080,
+ 0xe0510000, 0x781d0000,
+ 0xe0510100, 0x781d0000,
+ 0xe0510200, 0x781d0000,
+ 0xe0510300, 0x781d0000,
+ 0xe0510400, 0x781d0000,
+ 0x807cff7c, 0x00000500,
+ 0x8078ff78, 0x00000500,
+ 0xbf0a6f7c, 0xbf85fff0,
0xbefe00c1, 0xbeff00c1,
- 0xb8ef4306, 0x866fc16f,
- 0xbf84001a, 0x8e6f866f,
- 0x8e6f826f, 0xbef6006f,
- 0xb8f82985, 0x80788178,
- 0x8e788a78, 0x8e788178,
- 0xb8ee1605, 0x806e816e,
- 0x8e6e866e, 0x80786e78,
- 0x8078ff78, 0x00000080,
0xbef600ff, 0x01000000,
- 0xbefc0080, 0xe0510000,
- 0x781d0000, 0xe0510100,
- 0x781d0000, 0x807cff7c,
- 0x00000200, 0x8078ff78,
- 0x00000200, 0xbf0a6f7c,
- 0xbf85fff6, 0xbefe00c1,
- 0xbeff00c1, 0xbef600ff,
- 0x01000000, 0xb8ef2b05,
- 0x806f816f, 0x8e6f826f,
- 0x806fff6f, 0x00008000,
- 0xbef80080, 0xbeee0078,
- 0x8078ff78, 0x00000400,
- 0xbefc0084, 0xbf11087c,
- 0xe0524000, 0x781d0000,
- 0xe0524100, 0x781d0100,
- 0xe0524200, 0x781d0200,
- 0xe0524300, 0x781d0300,
- 0xbf8c0f70, 0x7e000300,
- 0x7e020301, 0x7e040302,
- 0x7e060303, 0x807c847c,
- 0x8078ff78, 0x00000400,
- 0xbf0a6f7c, 0xbf85ffee,
- 0xb8ef2985, 0x806f816f,
- 0x8e6f836f, 0xb8f92b05,
- 0x80798179, 0x8e798279,
- 0x80ef796f, 0x866f6f6f,
- 0xbf84001a, 0x806fff6f,
- 0x00008000, 0xbefc0080,
+ 0xb8ef2b05, 0x806f816f,
+ 0x8e6f826f, 0x806fff6f,
+ 0x00008000, 0xbef80080,
+ 0xbeee0078, 0x8078ff78,
+ 0x00000400, 0xbefc0084,
0xbf11087c, 0xe0524000,
0x781d0000, 0xe0524100,
0x781d0100, 0xe0524200,
0x781d0200, 0xe0524300,
0x781d0300, 0xbf8c0f70,
- 0xd3d94000, 0x18000100,
- 0xd3d94001, 0x18000101,
- 0xd3d94002, 0x18000102,
- 0xd3d94003, 0x18000103,
+ 0x7e000300, 0x7e020301,
+ 0x7e040302, 0x7e060303,
0x807c847c, 0x8078ff78,
0x00000400, 0xbf0a6f7c,
- 0xbf85ffea, 0xbf9c0000,
- 0xe0524000, 0x6e1d0000,
- 0xe0524100, 0x6e1d0100,
- 0xe0524200, 0x6e1d0200,
- 0xe0524300, 0x6e1d0300,
- 0xbf8c0f70, 0xb8f82985,
- 0x80788178, 0x8e788a78,
- 0x8e788178, 0xb8ee1605,
- 0x806e816e, 0x8e6e866e,
- 0x80786e78, 0x80f8c078,
- 0xb8ef1605, 0x806f816f,
- 0x8e6f846f, 0x8e76826f,
- 0xbef600ff, 0x01000000,
- 0xbefc006f, 0xc031003a,
- 0x00000078, 0x80f8c078,
- 0xbf8cc07f, 0x80fc907c,
- 0xbf800000, 0xbe802d00,
- 0xbe822d02, 0xbe842d04,
- 0xbe862d06, 0xbe882d08,
- 0xbe8a2d0a, 0xbe8c2d0c,
- 0xbe8e2d0e, 0xbf06807c,
- 0xbf84fff0, 0xb8f82985,
- 0x80788178, 0x8e788a78,
- 0x8e788178, 0xb8ee1605,
- 0x806e816e, 0x8e6e866e,
- 0x80786e78, 0xbef60084,
- 0xbef600ff, 0x01000000,
- 0xc0211bfa, 0x00000078,
- 0x80788478, 0xc0211b3a,
+ 0xbf85ffee, 0xb8ef2985,
+ 0x806f816f, 0x8e6f836f,
+ 0xb8f92b05, 0x80798179,
+ 0x8e798279, 0x80ef796f,
+ 0x866f6f6f, 0xbf84001a,
+ 0x806fff6f, 0x00008000,
+ 0xbefc0080, 0xbf11087c,
+ 0xe0524000, 0x781d0000,
+ 0xe0524100, 0x781d0100,
+ 0xe0524200, 0x781d0200,
+ 0xe0524300, 0x781d0300,
+ 0xbf8c0f70, 0xd3d94000,
+ 0x18000100, 0xd3d94001,
+ 0x18000101, 0xd3d94002,
+ 0x18000102, 0xd3d94003,
+ 0x18000103, 0x807c847c,
+ 0x8078ff78, 0x00000400,
+ 0xbf0a6f7c, 0xbf85ffea,
+ 0xbf9c0000, 0xe0524000,
+ 0x6e1d0000, 0xe0524100,
+ 0x6e1d0100, 0xe0524200,
+ 0x6e1d0200, 0xe0524300,
+ 0x6e1d0300, 0xbf8c0f70,
+ 0xb8f82985, 0x80788178,
+ 0x8e788a78, 0x8e788178,
+ 0xb8ee1605, 0x806e816e,
+ 0x8e6e866e, 0x80786e78,
+ 0x80f8c078, 0xb8ef1605,
+ 0x806f816f, 0x8e6f846f,
+ 0x8e76826f, 0xbef600ff,
+ 0x01000000, 0xbefc006f,
+ 0xc031003a, 0x00000078,
+ 0x80f8c078, 0xbf8cc07f,
+ 0x80fc907c, 0xbf800000,
+ 0xbe802d00, 0xbe822d02,
+ 0xbe842d04, 0xbe862d06,
+ 0xbe882d08, 0xbe8a2d0a,
+ 0xbe8c2d0c, 0xbe8e2d0e,
+ 0xbf06807c, 0xbf84fff0,
+ 0xb8f82985, 0x80788178,
+ 0x8e788a78, 0x8e788178,
+ 0xb8ee1605, 0x806e816e,
+ 0x8e6e866e, 0x80786e78,
+ 0xbef60084, 0xbef600ff,
+ 0x01000000, 0xc0211bfa,
0x00000078, 0x80788478,
- 0xc0211b7a, 0x00000078,
- 0x80788478, 0xc0211c3a,
+ 0xc0211b3a, 0x00000078,
+ 0x80788478, 0xc0211b7a,
0x00000078, 0x80788478,
- 0xc0211c7a, 0x00000078,
- 0x80788478, 0xc0211eba,
+ 0xc0211c3a, 0x00000078,
+ 0x80788478, 0xc0211c7a,
0x00000078, 0x80788478,
- 0xc0211efa, 0x00000078,
- 0x80788478, 0xc0211a3a,
+ 0xc0211eba, 0x00000078,
+ 0x80788478, 0xc0211efa,
0x00000078, 0x80788478,
- 0xc0211a7a, 0x00000078,
- 0x80788478, 0xc0211cfa,
+ 0xc0211a3a, 0x00000078,
+ 0x80788478, 0xc0211a7a,
0x00000078, 0x80788478,
- 0xbf8cc07f, 0xbefc006f,
- 0xbefe0070, 0xbeff0071,
- 0x866f7bff, 0x000003ff,
- 0xb96f4803, 0x866f7bff,
- 0xfffff800, 0x8f6f8b6f,
- 0xb96fa2c3, 0xb973f801,
- 0xb8ee2985, 0x806e816e,
- 0x8e6e8a6e, 0x8e6e816e,
- 0xb8ef1605, 0x806f816f,
- 0x8e6f866f, 0x806e6f6e,
- 0x806e746e, 0x826f8075,
- 0x866fff6f, 0x0000ffff,
- 0xc00b1c37, 0x00000050,
- 0xc00b1d37, 0x00000060,
- 0xc0031e77, 0x00000074,
- 0xbf8cc07f, 0x8f6e8b79,
- 0x866eff6e, 0x001f8000,
- 0xb96ef807, 0x866dff6d,
- 0x0000ffff, 0x86fe7e7e,
- 0x86ea6a6a, 0x8f6e837a,
- 0xb96ee0c2, 0xbf800002,
- 0xb97a0002, 0xbf8a0000,
- 0xbe801f6c, 0xbf9b0000,
+ 0xc0211cfa, 0x00000078,
+ 0x80788478, 0xbf8cc07f,
+ 0xbefc006f, 0xbefe0070,
+ 0xbeff0071, 0x866f7bff,
+ 0x000003ff, 0xb96f4803,
+ 0x866f7bff, 0xfffff800,
+ 0x8f6f8b6f, 0xb96fa2c3,
+ 0xb973f801, 0xb8ee2985,
+ 0x806e816e, 0x8e6e8a6e,
+ 0x8e6e816e, 0xb8ef1605,
+ 0x806f816f, 0x8e6f866f,
+ 0x806e6f6e, 0x806e746e,
+ 0x826f8075, 0x866fff6f,
+ 0x0000ffff, 0xc00b1c37,
+ 0x00000050, 0xc00b1d37,
+ 0x00000060, 0xc0031e77,
+ 0x00000074, 0xbf8cc07f,
+ 0x8f6e8b79, 0x866eff6e,
+ 0x001f8000, 0xb96ef807,
+ 0x866dff6d, 0x0000ffff,
+ 0x86fe7e7e, 0x86ea6a6a,
+ 0x8f6e837a, 0xb96ee0c2,
+ 0xbf800002, 0xb97a0002,
+ 0xbf8a0000, 0xbe801f6c,
+ 0xbf9b0000, 0x00000000,
};
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
index e1aaa5ce0784..96fbb16ceb21 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
@@ -33,6 +33,7 @@
* gfx11:
* cpp -DASIC_FAMILY=CHIP_PLUM_BONITO cwsr_trap_handler_gfx10.asm -P -o gfx11.sp3
* sp3 gfx11.sp3 -hex gfx11.hex
+ *
*/
#define CHIP_NAVI10 26
@@ -43,23 +44,33 @@
#define HAVE_XNACK (ASIC_FAMILY < CHIP_SIENNA_CICHLID)
#define HAVE_SENDMSG_RTN (ASIC_FAMILY >= CHIP_PLUM_BONITO)
#define HAVE_BUFFER_LDS_LOAD (ASIC_FAMILY < CHIP_PLUM_BONITO)
-#define SW_SA_TRAP (ASIC_FAMILY >= CHIP_PLUM_BONITO)
+#define SW_SA_TRAP (ASIC_FAMILY == CHIP_PLUM_BONITO)
#define SAVE_AFTER_XNACK_ERROR (HAVE_XNACK && !NO_SQC_STORE) // workaround for TCP store failure after XNACK error when ALLOW_REPLAY=0, for debugger
+#define SINGLE_STEP_MISSED_WORKAROUND 1 //workaround for lost MODE.DEBUG_EN exception when SAVECTX raised
-var SINGLE_STEP_MISSED_WORKAROUND = 1 //workaround for lost MODE.DEBUG_EN exception when SAVECTX raised
+#define S_COHERENCE glc:1
+#define V_COHERENCE slc:1 glc:1
+#define S_WAITCNT_0 s_waitcnt 0
var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006
var SQ_WAVE_STATUS_HALT_MASK = 0x2000
var SQ_WAVE_STATUS_ECC_ERR_MASK = 0x20000
var SQ_WAVE_STATUS_TRAP_EN_SHIFT = 6
+var SQ_WAVE_IB_STS2_WAVE64_SHIFT = 11
+var SQ_WAVE_IB_STS2_WAVE64_SIZE = 1
+var SQ_WAVE_LDS_ALLOC_GRANULARITY = 8
+var S_STATUS_HWREG = HW_REG_STATUS
+var S_STATUS_ALWAYS_CLEAR_MASK = SQ_WAVE_STATUS_SPI_PRIO_MASK|SQ_WAVE_STATUS_ECC_ERR_MASK
+var S_STATUS_HALT_MASK = SQ_WAVE_STATUS_HALT_MASK
+var S_SAVE_PC_HI_TRAP_ID_MASK = 0x00FF0000
+var S_SAVE_PC_HI_HT_MASK = 0x01000000
+var SQ_WAVE_STATUS_NO_VGPRS_SHIFT = 24
var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12
var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9
var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE = 8
var SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT = 24
var SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE = 4
-var SQ_WAVE_IB_STS2_WAVE64_SHIFT = 11
-var SQ_WAVE_IB_STS2_WAVE64_SIZE = 1
#if ASIC_FAMILY < CHIP_PLUM_BONITO
var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 8
@@ -74,16 +85,13 @@ var SQ_WAVE_TRAPSTS_ADDR_WATCH_MASK = 0x80
var SQ_WAVE_TRAPSTS_ADDR_WATCH_SHIFT = 7
var SQ_WAVE_TRAPSTS_MEM_VIOL_MASK = 0x100
var SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT = 8
-var SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK = 0x3FF
-var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT = 0x0
-var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE = 10
-var SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK = 0xFFFFF800
-var SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT = 11
-var SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE = 21
var SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK = 0x800
+var SQ_WAVE_TRAPSTS_ILLEGAL_INST_SHIFT = 11
var SQ_WAVE_TRAPSTS_EXCP_HI_MASK = 0x7000
#if ASIC_FAMILY >= CHIP_PLUM_BONITO
+var SQ_WAVE_TRAPSTS_HOST_TRAP_SHIFT = 16
var SQ_WAVE_TRAPSTS_WAVE_START_MASK = 0x20000
+var SQ_WAVE_TRAPSTS_WAVE_START_SHIFT = 17
var SQ_WAVE_TRAPSTS_WAVE_END_MASK = 0x40000
var SQ_WAVE_TRAPSTS_TRAP_AFTER_INST_MASK = 0x100000
#endif
@@ -99,15 +107,27 @@ var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK = 0x003F8000
var SQ_WAVE_MODE_DEBUG_EN_MASK = 0x800
+var S_TRAPSTS_RESTORE_PART_1_SIZE = SQ_WAVE_TRAPSTS_SAVECTX_SHIFT
+var S_TRAPSTS_RESTORE_PART_2_SHIFT = SQ_WAVE_TRAPSTS_ILLEGAL_INST_SHIFT
+
#if ASIC_FAMILY < CHIP_PLUM_BONITO
var S_TRAPSTS_NON_MASKABLE_EXCP_MASK = SQ_WAVE_TRAPSTS_MEM_VIOL_MASK|SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK
+var S_TRAPSTS_RESTORE_PART_2_SIZE = 32 - S_TRAPSTS_RESTORE_PART_2_SHIFT
+var S_TRAPSTS_RESTORE_PART_3_SHIFT = 0
+var S_TRAPSTS_RESTORE_PART_3_SIZE = 0
#else
var S_TRAPSTS_NON_MASKABLE_EXCP_MASK = SQ_WAVE_TRAPSTS_MEM_VIOL_MASK |\
SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK |\
SQ_WAVE_TRAPSTS_WAVE_START_MASK |\
SQ_WAVE_TRAPSTS_WAVE_END_MASK |\
SQ_WAVE_TRAPSTS_TRAP_AFTER_INST_MASK
+var S_TRAPSTS_RESTORE_PART_2_SIZE = SQ_WAVE_TRAPSTS_HOST_TRAP_SHIFT - SQ_WAVE_TRAPSTS_ILLEGAL_INST_SHIFT
+var S_TRAPSTS_RESTORE_PART_3_SHIFT = SQ_WAVE_TRAPSTS_WAVE_START_SHIFT
+var S_TRAPSTS_RESTORE_PART_3_SIZE = 32 - S_TRAPSTS_RESTORE_PART_3_SHIFT
#endif
+var S_TRAPSTS_HWREG = HW_REG_TRAPSTS
+var S_TRAPSTS_SAVE_CONTEXT_MASK = SQ_WAVE_TRAPSTS_SAVECTX_MASK
+var S_TRAPSTS_SAVE_CONTEXT_SHIFT = SQ_WAVE_TRAPSTS_SAVECTX_SHIFT
// bits [31:24] unused by SPI debug data
var TTMP11_SAVE_REPLAY_W64H_SHIFT = 31
@@ -121,8 +141,6 @@ var TTMP11_DEBUG_TRAP_ENABLED_MASK = 0x800000
// when ADD_TID_ENABLE and BUF_DATA_FORMAT_32 for MTBUF), ADD_TID_ENABLE
var S_SAVE_BUF_RSRC_WORD1_STRIDE = 0x00040000
var S_SAVE_BUF_RSRC_WORD3_MISC = 0x10807FAC
-var S_SAVE_PC_HI_TRAP_ID_MASK = 0x00FF0000
-var S_SAVE_PC_HI_HT_MASK = 0x01000000
var S_SAVE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000
var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT = 26
@@ -182,6 +200,7 @@ var s_restore_buf_rsrc3 = ttmp11
var s_restore_size = ttmp6
var s_restore_ttmps_lo = s_restore_tmp
var s_restore_ttmps_hi = s_restore_alloc_size
+var s_restore_spi_init_hi_save = s_restore_exec_hi
shader main
asic(DEFAULT)
@@ -194,13 +213,13 @@ L_JUMP_TO_RESTORE:
s_branch L_RESTORE
L_SKIP_RESTORE:
- s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC
+ s_getreg_b32 s_save_status, hwreg(S_STATUS_HWREG) //save STATUS since we will change SCC
// Clear SPI_PRIO: do not save with elevated priority.
// Clear ECC_ERR: prevents SQC store and triggers FATAL_HALT if setreg'd.
- s_andn2_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_SPI_PRIO_MASK|SQ_WAVE_STATUS_ECC_ERR_MASK
+ s_andn2_b32 s_save_status, s_save_status, S_STATUS_ALWAYS_CLEAR_MASK
- s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
+ s_getreg_b32 s_save_trapsts, hwreg(S_TRAPSTS_HWREG)
#if SW_SA_TRAP
// If ttmp1[30] is set then issue s_barrier to unblock dependent waves.
@@ -215,7 +234,7 @@ L_TRAP_NO_BARRIER:
s_cbranch_scc1 L_CHECK_SAVE
#endif
- s_and_b32 ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK
+ s_and_b32 ttmp2, s_save_status, S_STATUS_HALT_MASK
s_cbranch_scc0 L_NOT_HALTED
L_HALTED:
@@ -224,14 +243,14 @@ L_HALTED:
s_cbranch_scc1 L_FETCH_2ND_TRAP
L_CHECK_SAVE:
- s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK
+ s_and_b32 ttmp2, s_save_trapsts, S_TRAPSTS_SAVE_CONTEXT_MASK
s_cbranch_scc1 L_SAVE
// Wave is halted but neither host trap nor SAVECTX is raised.
// Caused by instruction fetch memory violation.
// Spin wait until context saved to prevent interrupt storm.
s_sleep 0x10
- s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
+ s_getreg_b32 s_save_trapsts, hwreg(S_TRAPSTS_HWREG)
s_branch L_CHECK_SAVE
L_NOT_HALTED:
@@ -265,15 +284,15 @@ L_CHECK_TRAP_ID:
s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK
s_cbranch_scc1 L_FETCH_2ND_TRAP
-if SINGLE_STEP_MISSED_WORKAROUND
+#if SINGLE_STEP_MISSED_WORKAROUND
// Prioritize single step exception over context save.
// Second-level trap will halt wave and RFE, re-entering for SAVECTX.
s_getreg_b32 ttmp2, hwreg(HW_REG_MODE)
s_and_b32 ttmp2, ttmp2, SQ_WAVE_MODE_DEBUG_EN_MASK
s_cbranch_scc1 L_FETCH_2ND_TRAP
-end
+#endif
- s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK
+ s_and_b32 ttmp2, s_save_trapsts, S_TRAPSTS_SAVE_CONTEXT_MASK
s_cbranch_scc1 L_SAVE
L_FETCH_2ND_TRAP:
@@ -286,7 +305,7 @@ L_FETCH_2ND_TRAP:
// ttmp12 holds SQ_WAVE_STATUS
#if HAVE_SENDMSG_RTN
s_sendmsg_rtn_b64 [ttmp14, ttmp15], sendmsg(MSG_RTN_GET_TMA)
- s_waitcnt lgkmcnt(0)
+ S_WAITCNT_0
#else
s_getreg_b32 ttmp14, hwreg(HW_REG_SHADER_TMA_LO)
s_getreg_b32 ttmp15, hwreg(HW_REG_SHADER_TMA_HI)
@@ -298,16 +317,16 @@ L_FETCH_2ND_TRAP:
s_or_b32 ttmp15, ttmp15, 0xFFFF0000
L_NO_SIGN_EXTEND_TMA:
- s_load_dword ttmp2, [ttmp14, ttmp15], 0x10 glc:1 // debug trap enabled flag
- s_waitcnt lgkmcnt(0)
+ s_load_dword ttmp2, [ttmp14, ttmp15], 0x10 S_COHERENCE // debug trap enabled flag
+ S_WAITCNT_0
s_lshl_b32 ttmp2, ttmp2, TTMP11_DEBUG_TRAP_ENABLED_SHIFT
s_andn2_b32 ttmp11, ttmp11, TTMP11_DEBUG_TRAP_ENABLED_MASK
s_or_b32 ttmp11, ttmp11, ttmp2
- s_load_dwordx2 [ttmp2, ttmp3], [ttmp14, ttmp15], 0x0 glc:1 // second-level TBA
- s_waitcnt lgkmcnt(0)
- s_load_dwordx2 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 glc:1 // second-level TMA
- s_waitcnt lgkmcnt(0)
+ s_load_dwordx2 [ttmp2, ttmp3], [ttmp14, ttmp15], 0x0 S_COHERENCE // second-level TBA
+ S_WAITCNT_0
+ s_load_dwordx2 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 S_COHERENCE // second-level TMA
+ S_WAITCNT_0
s_and_b64 [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3]
s_cbranch_scc0 L_NO_NEXT_TRAP // second-level trap handler not been set
@@ -315,9 +334,13 @@ L_NO_SIGN_EXTEND_TMA:
L_NO_NEXT_TRAP:
// If not caused by trap then halt wave to prevent re-entry.
- s_and_b32 ttmp2, s_save_pc_hi, (S_SAVE_PC_HI_TRAP_ID_MASK|S_SAVE_PC_HI_HT_MASK)
+ s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK
s_cbranch_scc1 L_TRAP_CASE
- s_or_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK
+
+ // Host trap will not cause trap re-entry.
+ s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_HT_MASK
+ s_cbranch_scc1 L_EXIT_TRAP
+ s_or_b32 s_save_status, s_save_status, S_STATUS_HALT_MASK
// If the PC points to S_ENDPGM then context save will fail if STATUS.HALT is set.
// Rewind the PC to prevent this from occurring.
@@ -327,10 +350,6 @@ L_NO_NEXT_TRAP:
s_branch L_EXIT_TRAP
L_TRAP_CASE:
- // Host trap will not cause trap re-entry.
- s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_HT_MASK
- s_cbranch_scc1 L_EXIT_TRAP
-
// Advance past trap instruction to prevent re-entry.
s_add_u32 ttmp0, ttmp0, 0x4
s_addc_u32 ttmp1, ttmp1, 0x0
@@ -345,14 +364,22 @@ L_EXIT_TRAP:
// Restore SQ_WAVE_STATUS.
s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32
- s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status
+ s_setreg_b32 hwreg(S_STATUS_HWREG), s_save_status
s_rfe_b64 [ttmp0, ttmp1]
L_SAVE:
+ // If VGPRs have been deallocated then terminate the wavefront.
+ // It has no remaining program to run and cannot save without VGPRs.
+#if ASIC_FAMILY == CHIP_PLUM_BONITO
+ s_bitcmp1_b32 s_save_status, SQ_WAVE_STATUS_NO_VGPRS_SHIFT
+ s_cbranch_scc0 L_HAVE_VGPRS
+ s_endpgm
+L_HAVE_VGPRS:
+#endif
s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32]
s_mov_b32 s_save_tmp, 0
- s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp //clear saveCtx bit
+ s_setreg_b32 hwreg(S_TRAPSTS_HWREG, S_TRAPSTS_SAVE_CONTEXT_SHIFT, 1), s_save_tmp //clear saveCtx bit
#if HAVE_XNACK
save_and_clear_ib_sts(s_save_tmp, s_save_trapsts)
@@ -377,7 +404,7 @@ L_SLEEP:
s_sleep 0x2
s_cbranch_execz L_SLEEP
#else
- s_waitcnt lgkmcnt(0)
+ S_WAITCNT_0
#endif
// Save first_wave flag so we can clear high bits of save address.
@@ -399,7 +426,7 @@ L_SLEEP:
s_and_b32 s_save_ttmps_hi, exec_hi, 0xFFFF
s_mov_b32 exec_lo, 0xFFFFFFFF
s_mov_b32 exec_hi, 0xFFFFFFFF
- global_store_dword_addtid v0, [s_save_ttmps_lo, s_save_ttmps_hi] slc:1 glc:1
+ global_store_dword_addtid v0, [s_save_ttmps_lo, s_save_ttmps_hi] V_COHERENCE
v_mov_b32 v0, 0x0
s_mov_b32 exec_lo, s_save_ttmps_lo
s_mov_b32 exec_hi, s_save_ttmps_hi
@@ -407,7 +434,7 @@ L_SLEEP:
// Save trap temporaries 4-11, 13 initialized by SPI debug dispatch logic
// ttmp SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR)+0x40
- get_wave_size(s_save_ttmps_hi)
+ get_wave_size2(s_save_ttmps_hi)
get_vgpr_size_bytes(s_save_ttmps_lo, s_save_ttmps_hi)
get_svgpr_size_bytes(s_save_ttmps_hi)
s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_ttmps_hi
@@ -431,15 +458,15 @@ L_SLEEP:
s_mov_b32 exec_lo, 0x3FFF
s_mov_b32 exec_hi, 0x0
- global_store_dword_addtid v0, [s_save_ttmps_lo, s_save_ttmps_hi] inst_offset:0x40 slc:1 glc:1
+ global_store_dword_addtid v0, [s_save_ttmps_lo, s_save_ttmps_hi] inst_offset:0x40 V_COHERENCE
v_readlane_b32 ttmp14, v0, 0xE
v_readlane_b32 ttmp15, v0, 0xF
s_mov_b32 exec_lo, ttmp14
s_mov_b32 exec_hi, ttmp15
#else
- s_store_dwordx4 [ttmp4, ttmp5, ttmp6, ttmp7], [s_save_ttmps_lo, s_save_ttmps_hi], 0x50 glc:1
- s_store_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_save_ttmps_lo, s_save_ttmps_hi], 0x60 glc:1
- s_store_dword ttmp13, [s_save_ttmps_lo, s_save_ttmps_hi], 0x74 glc:1
+ s_store_dwordx4 [ttmp4, ttmp5, ttmp6, ttmp7], [s_save_ttmps_lo, s_save_ttmps_hi], 0x50 S_COHERENCE
+ s_store_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_save_ttmps_lo, s_save_ttmps_hi], 0x60 S_COHERENCE
+ s_store_dword ttmp13, [s_save_ttmps_lo, s_save_ttmps_hi], 0x74 S_COHERENCE
#endif
/* setup Resource Contants */
@@ -453,7 +480,7 @@ L_SLEEP:
/* global mem offset */
s_mov_b32 s_save_mem_offset, 0x0
- get_wave_size(s_wave_size)
+ get_wave_size2(s_wave_size)
#if HAVE_XNACK
// Save and clear vector XNACK state late to free up SGPRs.
@@ -488,11 +515,11 @@ L_SAVE_FIRST_VGPRS32_WITH_TCP:
#endif
#if !NO_SQC_STORE
- buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+ buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE
#endif
- buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128
- buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128*2
- buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128*3
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE offset:128
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE offset:128*2
+ buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE offset:128*3
s_branch L_SAVE_HWREG
L_SAVE_4VGPR_WAVE64:
@@ -511,11 +538,11 @@ L_SAVE_FIRST_VGPRS64_WITH_TCP:
#endif
#if !NO_SQC_STORE
- buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+ buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE
#endif
- buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256
- buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2
- buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE offset:256
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE offset:256*2
+ buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE offset:256*3
/* save HW registers */
@@ -543,7 +570,7 @@ L_SAVE_HWREG:
write_hwreg_to_mem(s_save_exec_hi, s_save_buf_rsrc0, s_save_mem_offset)
write_hwreg_to_mem(s_save_status, s_save_buf_rsrc0, s_save_mem_offset)
- s_getreg_b32 s_save_tmp, hwreg(HW_REG_TRAPSTS)
+ s_getreg_b32 s_save_tmp, hwreg(S_TRAPSTS_HWREG)
write_hwreg_to_mem(s_save_tmp, s_save_buf_rsrc0, s_save_mem_offset)
// Not used on Sienna_Cichlid but keep layout same for debugger.
@@ -562,7 +589,7 @@ L_SAVE_HWREG:
// Write HWREGs with 16 VGPR lanes. TTMPs occupy space after this.
s_mov_b32 exec_lo, 0xFFFF
s_mov_b32 exec_hi, 0x0
- buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE
// Write SGPRs with 32 VGPR lanes. This works in wave32 and wave64 mode.
s_mov_b32 exec_lo, 0xFFFFFFFF
@@ -605,7 +632,7 @@ L_SAVE_SGPR_LOOP:
s_cmp_eq_u32 ttmp13, 0x20 //have 32 VGPR lanes filled?
s_cbranch_scc0 L_SAVE_SGPR_SKIP_TCP_STORE
- buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE
s_add_u32 s_save_mem_offset, s_save_mem_offset, 0x80
s_mov_b32 ttmp13, 0x0
v_mov_b32 v2, 0x0
@@ -626,7 +653,7 @@ L_SAVE_SGPR_SKIP_TCP_STORE:
write_12sgpr_to_mem(s0, s_save_buf_rsrc0, s_save_mem_offset)
#if NO_SQC_STORE
- buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE
#else
// restore s_save_buf_rsrc0,1
s_mov_b32 s_save_buf_rsrc0, s_save_xnack_mask
@@ -656,8 +683,7 @@ L_SAVE_LDS_NORMAL:
// first wave do LDS save;
- s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 6 //LDS size in dwords = lds_size * 64dw
- s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 2 //LDS size in bytes
+ s_lshl_b32 s_save_alloc_size, s_save_alloc_size, SQ_WAVE_LDS_ALLOC_GRANULARITY
s_mov_b32 s_save_buf_rsrc2, s_save_alloc_size //NUM_RECORDS in bytes
// LDS at offset: size(VGPR)+size(SVGPR)+SIZE(SGPR)+SIZE(HWREG)
@@ -688,7 +714,7 @@ L_SAVE_LDS_W32:
L_SAVE_LDS_LOOP_SQC_W32:
ds_read_b32 v1, v0
- s_waitcnt 0
+ S_WAITCNT_0
write_vgprs_to_mem_with_sqc_w32(v1, 1, s_save_buf_rsrc0, s_save_mem_offset)
@@ -708,8 +734,8 @@ L_SAVE_LDS_WITH_TCP_W32:
s_nop 0
L_SAVE_LDS_LOOP_W32:
ds_read_b32 v1, v0
- s_waitcnt 0
- buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+ S_WAITCNT_0
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE
s_add_u32 m0, m0, s3 //every buffer_store_lds does 128 bytes
s_add_u32 s_save_mem_offset, s_save_mem_offset, s3
@@ -726,7 +752,7 @@ L_SAVE_LDS_W64:
L_SAVE_LDS_LOOP_SQC_W64:
ds_read_b32 v1, v0
- s_waitcnt 0
+ S_WAITCNT_0
write_vgprs_to_mem_with_sqc_w64(v1, 1, s_save_buf_rsrc0, s_save_mem_offset)
@@ -746,8 +772,8 @@ L_SAVE_LDS_WITH_TCP_W64:
s_nop 0
L_SAVE_LDS_LOOP_W64:
ds_read_b32 v1, v0
- s_waitcnt 0
- buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+ S_WAITCNT_0
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE
s_add_u32 m0, m0, s3 //every buffer_store_lds does 256 bytes
s_add_u32 s_save_mem_offset, s_save_mem_offset, s3
@@ -814,10 +840,10 @@ L_SAVE_VGPR_W32_LOOP:
v_movrels_b32 v2, v2 //v2 = v[2+m0]
v_movrels_b32 v3, v3 //v3 = v[3+m0]
- buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
- buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128
- buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128*2
- buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128*3
+ buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE offset:128
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE offset:128*2
+ buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE offset:128*3
s_add_u32 m0, m0, 4 //next vgpr index
s_add_u32 s_save_mem_offset, s_save_mem_offset, 128*4 //every buffer_store_dword does 128 bytes
@@ -859,10 +885,10 @@ L_SAVE_VGPR_W64_LOOP:
v_movrels_b32 v2, v2 //v2 = v[2+m0]
v_movrels_b32 v3, v3 //v3 = v[3+m0]
- buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
- buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256
- buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2
- buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3
+ buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE offset:256
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE offset:256*2
+ buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE offset:256*3
s_add_u32 m0, m0, 4 //next vgpr index
s_add_u32 s_save_mem_offset, s_save_mem_offset, 256*4 //every buffer_store_dword does 256 bytes
@@ -899,7 +925,7 @@ L_SAVE_SHARED_VGPR_WAVE64_LOOP_SQC:
L_SAVE_SHARED_VGPR_WAVE64_LOOP:
v_movrels_b32 v0, v0 //v0 = v[0+m0]
- buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+ buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset V_COHERENCE
s_add_u32 m0, m0, 1 //next vgpr index
s_add_u32 s_save_mem_offset, s_save_mem_offset, 128
s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0
@@ -917,7 +943,7 @@ L_RESTORE:
s_mov_b32 s_restore_buf_rsrc3, S_RESTORE_BUF_RSRC_WORD3_MISC
//determine it is wave32 or wave64
- get_wave_size(s_restore_size)
+ get_wave_size2(s_restore_size)
s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK
s_cbranch_scc0 L_RESTORE_VGPR
@@ -937,8 +963,7 @@ L_RESTORE_LDS_NORMAL:
s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE)
s_and_b32 s_restore_alloc_size, s_restore_alloc_size, 0xFFFFFFFF //lds_size is zero?
s_cbranch_scc0 L_RESTORE_VGPR //no lds used? jump to L_RESTORE_VGPR
- s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 6 //LDS size in dwords = lds_size * 64dw
- s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 2 //LDS size in bytes
+ s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, SQ_WAVE_LDS_ALLOC_GRANULARITY
s_mov_b32 s_restore_buf_rsrc2, s_restore_alloc_size //NUM_RECORDS in bytes
// LDS at offset: size(VGPR)+size(SVGPR)+SIZE(SGPR)+SIZE(HWREG)
@@ -962,7 +987,7 @@ L_RESTORE_LDS_LOOP_W32:
buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 // first 64DW
#else
buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset
- s_waitcnt vmcnt(0)
+ S_WAITCNT_0
ds_store_addtid_b32 v0
#endif
s_add_u32 m0, m0, 128 // 128 DW
@@ -976,7 +1001,7 @@ L_RESTORE_LDS_LOOP_W64:
buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 // first 64DW
#else
buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset
- s_waitcnt vmcnt(0)
+ S_WAITCNT_0
ds_store_addtid_b32 v0
#endif
s_add_u32 m0, m0, 256 // 256 DW
@@ -1017,11 +1042,11 @@ L_RESTORE_VGPR_NORMAL:
s_cbranch_scc0 L_RESTORE_SGPR
L_RESTORE_VGPR_WAVE32_LOOP:
- buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1
- buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:128
- buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:128*2
- buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:128*3
- s_waitcnt vmcnt(0)
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset V_COHERENCE
+ buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset V_COHERENCE offset:128
+ buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset V_COHERENCE offset:128*2
+ buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset V_COHERENCE offset:128*3
+ S_WAITCNT_0
v_movreld_b32 v0, v0 //v[0+m0] = v0
v_movreld_b32 v1, v1
v_movreld_b32 v2, v2
@@ -1032,11 +1057,11 @@ L_RESTORE_VGPR_WAVE32_LOOP:
s_cbranch_scc1 L_RESTORE_VGPR_WAVE32_LOOP //VGPR restore (except v0) is complete?
/* VGPR restore on v0 */
- buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1
- buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:128
- buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:128*2
- buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:128*3
- s_waitcnt vmcnt(0)
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save V_COHERENCE
+ buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save V_COHERENCE offset:128
+ buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save V_COHERENCE offset:128*2
+ buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save V_COHERENCE offset:128*3
+ S_WAITCNT_0
s_branch L_RESTORE_SGPR
@@ -1051,11 +1076,11 @@ L_RESTORE_VGPR_WAVE64:
s_cbranch_scc0 L_RESTORE_SHARED_VGPR
L_RESTORE_VGPR_WAVE64_LOOP:
- buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1
- buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256
- buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256*2
- buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256*3
- s_waitcnt vmcnt(0)
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset V_COHERENCE
+ buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset V_COHERENCE offset:256
+ buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset V_COHERENCE offset:256*2
+ buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset V_COHERENCE offset:256*3
+ S_WAITCNT_0
v_movreld_b32 v0, v0 //v[0+m0] = v0
v_movreld_b32 v1, v1
v_movreld_b32 v2, v2
@@ -1077,8 +1102,8 @@ L_RESTORE_SHARED_VGPR:
s_mov_b32 exec_lo, 0xFFFFFFFF
s_mov_b32 exec_hi, 0x00000000
L_RESTORE_SHARED_VGPR_WAVE64_LOOP:
- buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1
- s_waitcnt vmcnt(0)
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset V_COHERENCE
+ S_WAITCNT_0
v_movreld_b32 v0, v0 //v[0+m0] = v0
s_add_u32 m0, m0, 1 //next vgpr index
s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128
@@ -1089,11 +1114,11 @@ L_RESTORE_SHARED_VGPR_WAVE64_LOOP:
/* VGPR restore on v0 */
L_RESTORE_V0:
- buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1
- buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256
- buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256*2
- buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256*3
- s_waitcnt vmcnt(0)
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save V_COHERENCE
+ buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save V_COHERENCE offset:256
+ buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save V_COHERENCE offset:256*2
+ buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save V_COHERENCE offset:256*3
+ S_WAITCNT_0
/* restore SGPRs */
//will be 2+8+16*6
@@ -1110,7 +1135,7 @@ L_RESTORE_SGPR:
s_mov_b32 m0, s_sgpr_save_num
read_4sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset)
- s_waitcnt lgkmcnt(0)
+ S_WAITCNT_0
s_sub_u32 m0, m0, 4 // Restore from S[0] to S[104]
s_nop 0 // hazard SALU M0=> S_MOVREL
@@ -1119,7 +1144,7 @@ L_RESTORE_SGPR:
s_movreld_b64 s2, s2
read_8sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset)
- s_waitcnt lgkmcnt(0)
+ S_WAITCNT_0
s_sub_u32 m0, m0, 8 // Restore from S[0] to S[96]
s_nop 0 // hazard SALU M0=> S_MOVREL
@@ -1131,7 +1156,7 @@ L_RESTORE_SGPR:
L_RESTORE_SGPR_LOOP:
read_16sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset)
- s_waitcnt lgkmcnt(0)
+ S_WAITCNT_0
s_sub_u32 m0, m0, 16 // Restore from S[n] to S[0]
s_nop 0 // hazard SALU M0=> S_MOVREL
@@ -1173,12 +1198,12 @@ L_RESTORE_HWREG:
read_hwreg_from_mem(s_restore_xnack_mask, s_restore_buf_rsrc0, s_restore_mem_offset)
read_hwreg_from_mem(s_restore_mode, s_restore_buf_rsrc0, s_restore_mem_offset)
read_hwreg_from_mem(s_restore_flat_scratch, s_restore_buf_rsrc0, s_restore_mem_offset)
- s_waitcnt lgkmcnt(0)
+ S_WAITCNT_0
s_setreg_b32 hwreg(HW_REG_SHADER_FLAT_SCRATCH_LO), s_restore_flat_scratch
read_hwreg_from_mem(s_restore_flat_scratch, s_restore_buf_rsrc0, s_restore_mem_offset)
- s_waitcnt lgkmcnt(0) //from now on, it is safe to restore STATUS and IB_STS
+ S_WAITCNT_0
s_setreg_b32 hwreg(HW_REG_SHADER_FLAT_SCRATCH_HI), s_restore_flat_scratch
@@ -1186,16 +1211,21 @@ L_RESTORE_HWREG:
s_mov_b32 exec_lo, s_restore_exec_lo
s_mov_b32 exec_hi, s_restore_exec_hi
- s_and_b32 s_restore_m0, SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK, s_restore_trapsts
- s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE), s_restore_m0
-
#if HAVE_XNACK
s_setreg_b32 hwreg(HW_REG_SHADER_XNACK_MASK), s_restore_xnack_mask
#endif
- s_and_b32 s_restore_m0, SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK, s_restore_trapsts
- s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT
- s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE), s_restore_m0
+ // {TRAPSTS/EXCP_FLAG_PRIV}.SAVE_CONTEXT and HOST_TRAP may have changed.
+ // Only restore the other fields to avoid clobbering them.
+ s_setreg_b32 hwreg(S_TRAPSTS_HWREG, 0, S_TRAPSTS_RESTORE_PART_1_SIZE), s_restore_trapsts
+ s_lshr_b32 s_restore_trapsts, s_restore_trapsts, S_TRAPSTS_RESTORE_PART_2_SHIFT
+ s_setreg_b32 hwreg(S_TRAPSTS_HWREG, S_TRAPSTS_RESTORE_PART_2_SHIFT, S_TRAPSTS_RESTORE_PART_2_SIZE), s_restore_trapsts
+
+if S_TRAPSTS_RESTORE_PART_3_SIZE > 0
+ s_lshr_b32 s_restore_trapsts, s_restore_trapsts, S_TRAPSTS_RESTORE_PART_3_SHIFT - S_TRAPSTS_RESTORE_PART_2_SHIFT
+ s_setreg_b32 hwreg(S_TRAPSTS_HWREG, S_TRAPSTS_RESTORE_PART_3_SHIFT, S_TRAPSTS_RESTORE_PART_3_SIZE), s_restore_trapsts
+end
+
s_setreg_b32 hwreg(HW_REG_MODE), s_restore_mode
// Restore trap temporaries 4-11, 13 initialized by SPI debug dispatch logic
@@ -1207,10 +1237,10 @@ L_RESTORE_HWREG:
s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_buf_rsrc0
s_addc_u32 s_restore_ttmps_hi, s_restore_buf_rsrc1, 0x0
s_and_b32 s_restore_ttmps_hi, s_restore_ttmps_hi, 0xFFFF
- s_load_dwordx4 [ttmp4, ttmp5, ttmp6, ttmp7], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x50 glc:1
- s_load_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x60 glc:1
- s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x74 glc:1
- s_waitcnt lgkmcnt(0)
+ s_load_dwordx4 [ttmp4, ttmp5, ttmp6, ttmp7], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x50 S_COHERENCE
+ s_load_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x60 S_COHERENCE
+ s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x74 S_COHERENCE
+ S_WAITCNT_0
#if HAVE_XNACK
restore_ib_sts(s_restore_tmp, s_restore_m0)
@@ -1232,7 +1262,8 @@ L_RESTORE_HWREG:
L_RETURN_WITHOUT_PRIV:
#endif
- s_setreg_b32 hwreg(HW_REG_STATUS), s_restore_status // SCC is included, which is changed by previous salu
+ s_setreg_b32 hwreg(S_STATUS_HWREG), s_restore_status // SCC is included, which is changed by previous salu
+
s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution
L_END_PGM:
@@ -1247,7 +1278,7 @@ function write_hwreg_to_mem(s, s_rsrc, s_mem_offset)
#else
s_mov_b32 exec_lo, m0
s_mov_b32 m0, s_mem_offset
- s_buffer_store_dword s, s_rsrc, m0 glc:1
+ s_buffer_store_dword s, s_rsrc, m0 S_COHERENCE
s_add_u32 s_mem_offset, s_mem_offset, 4
s_mov_b32 m0, exec_lo
#endif
@@ -1262,10 +1293,10 @@ function write_16sgpr_to_mem(s, s_rsrc, s_mem_offset)
s_add_u32 ttmp13, ttmp13, 0x1
end
#else
- s_buffer_store_dwordx4 s[0], s_rsrc, 0 glc:1
- s_buffer_store_dwordx4 s[4], s_rsrc, 16 glc:1
- s_buffer_store_dwordx4 s[8], s_rsrc, 32 glc:1
- s_buffer_store_dwordx4 s[12], s_rsrc, 48 glc:1
+ s_buffer_store_dwordx4 s[0], s_rsrc, 0 S_COHERENCE
+ s_buffer_store_dwordx4 s[4], s_rsrc, 16 S_COHERENCE
+ s_buffer_store_dwordx4 s[8], s_rsrc, 32 S_COHERENCE
+ s_buffer_store_dwordx4 s[12], s_rsrc, 48 S_COHERENCE
s_add_u32 s_rsrc[0], s_rsrc[0], 4*16
s_addc_u32 s_rsrc[1], s_rsrc[1], 0x0
#endif
@@ -1279,32 +1310,32 @@ function write_12sgpr_to_mem(s, s_rsrc, s_mem_offset)
s_add_u32 ttmp13, ttmp13, 0x1
end
#else
- s_buffer_store_dwordx4 s[0], s_rsrc, 0 glc:1
- s_buffer_store_dwordx4 s[4], s_rsrc, 16 glc:1
- s_buffer_store_dwordx4 s[8], s_rsrc, 32 glc:1
+ s_buffer_store_dwordx4 s[0], s_rsrc, 0 S_COHERENCE
+ s_buffer_store_dwordx4 s[4], s_rsrc, 16 S_COHERENCE
+ s_buffer_store_dwordx4 s[8], s_rsrc, 32 S_COHERENCE
s_add_u32 s_rsrc[0], s_rsrc[0], 4*12
s_addc_u32 s_rsrc[1], s_rsrc[1], 0x0
#endif
end
function read_hwreg_from_mem(s, s_rsrc, s_mem_offset)
- s_buffer_load_dword s, s_rsrc, s_mem_offset glc:1
+ s_buffer_load_dword s, s_rsrc, s_mem_offset S_COHERENCE
s_add_u32 s_mem_offset, s_mem_offset, 4
end
function read_16sgpr_from_mem(s, s_rsrc, s_mem_offset)
s_sub_u32 s_mem_offset, s_mem_offset, 4*16
- s_buffer_load_dwordx16 s, s_rsrc, s_mem_offset glc:1
+ s_buffer_load_dwordx16 s, s_rsrc, s_mem_offset S_COHERENCE
end
function read_8sgpr_from_mem(s, s_rsrc, s_mem_offset)
s_sub_u32 s_mem_offset, s_mem_offset, 4*8
- s_buffer_load_dwordx8 s, s_rsrc, s_mem_offset glc:1
+ s_buffer_load_dwordx8 s, s_rsrc, s_mem_offset S_COHERENCE
end
function read_4sgpr_from_mem(s, s_rsrc, s_mem_offset)
s_sub_u32 s_mem_offset, s_mem_offset, 4*4
- s_buffer_load_dwordx4 s, s_rsrc, s_mem_offset glc:1
+ s_buffer_load_dwordx4 s, s_rsrc, s_mem_offset S_COHERENCE
end
#if SAVE_AFTER_XNACK_ERROR
@@ -1345,11 +1376,6 @@ function write_vgprs_to_mem_with_sqc_w64(vgpr0, n_vgprs, s_rsrc, s_mem_offset)
end
#endif
-function get_lds_size_bytes(s_lds_size_byte)
- s_getreg_b32 s_lds_size_byte, hwreg(HW_REG_LDS_ALLOC, SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT, SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE)
- s_lshl_b32 s_lds_size_byte, s_lds_size_byte, 8 //LDS size in dwords = lds_size * 64 *4Bytes // granularity 64DW
-end
-
function get_vgpr_size_bytes(s_vgpr_size_byte, s_size)
s_getreg_b32 s_vgpr_size_byte, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE)
s_add_u32 s_vgpr_size_byte, s_vgpr_size_byte, 1
@@ -1375,11 +1401,12 @@ function get_hwreg_size_bytes
return 128
end
-function get_wave_size(s_reg)
+function get_wave_size2(s_reg)
s_getreg_b32 s_reg, hwreg(HW_REG_IB_STS2,SQ_WAVE_IB_STS2_WAVE64_SHIFT,SQ_WAVE_IB_STS2_WAVE64_SIZE)
s_lshl_b32 s_reg, s_reg, S_WAVE_SIZE
end
+#if HAVE_XNACK
function save_and_clear_ib_sts(tmp1, tmp2)
// Preserve and clear scalar XNACK state before issuing scalar loads.
// Save IB_STS.REPLAY_W64H[25], RCNT[21:16], FIRST_REPLAY[15] into
@@ -1404,3 +1431,4 @@ function restore_ib_sts(tmp1, tmp2)
s_or_b32 tmp1, tmp1, tmp2
s_setreg_b32 hwreg(HW_REG_IB_STS), tmp1
end
+#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm
new file mode 100644
index 000000000000..5a1a1b1f897f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm
@@ -0,0 +1,1136 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/* To compile this assembly code:
+ *
+ * gfx12:
+ * cpp -DASIC_FAMILY=CHIP_GFX12 cwsr_trap_handler_gfx12.asm -P -o gfx12.sp3
+ * sp3 gfx12.sp3 -hex gfx12.hex
+ */
+
+#define CHIP_GFX12 37
+
+#define SINGLE_STEP_MISSED_WORKAROUND 1 //workaround for lost TRAP_AFTER_INST exception when SAVECTX raised
+#define HAVE_VALU_SGPR_HAZARD (ASIC_FAMILY == CHIP_GFX12)
+
+var SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK = 0x4
+var SQ_WAVE_STATE_PRIV_SCC_SHIFT = 9
+var SQ_WAVE_STATE_PRIV_SYS_PRIO_MASK = 0xC00
+var SQ_WAVE_STATE_PRIV_HALT_MASK = 0x4000
+var SQ_WAVE_STATE_PRIV_POISON_ERR_MASK = 0x8000
+var SQ_WAVE_STATE_PRIV_POISON_ERR_SHIFT = 15
+var SQ_WAVE_STATUS_WAVE64_SHIFT = 29
+var SQ_WAVE_STATUS_WAVE64_SIZE = 1
+var SQ_WAVE_STATUS_NO_VGPRS_SHIFT = 24
+var SQ_WAVE_STATE_PRIV_ALWAYS_CLEAR_MASK = SQ_WAVE_STATE_PRIV_SYS_PRIO_MASK|SQ_WAVE_STATE_PRIV_POISON_ERR_MASK
+var S_SAVE_PC_HI_TRAP_ID_MASK = 0xF0000000
+
+var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12
+var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9
+var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE = 8
+var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 12
+var SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT = 24
+var SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE = 4
+var SQ_WAVE_LDS_ALLOC_GRANULARITY = 9
+
+var SQ_WAVE_EXCP_FLAG_PRIV_ADDR_WATCH_MASK = 0xF
+var SQ_WAVE_EXCP_FLAG_PRIV_MEM_VIOL_MASK = 0x10
+var SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT = 5
+var SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK = 0x20
+var SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_MASK = 0x40
+var SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT = 6
+var SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK = 0x80
+var SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_SHIFT = 7
+var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_MASK = 0x100
+var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT = 8
+var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_END_MASK = 0x200
+var SQ_WAVE_EXCP_FLAG_PRIV_TRAP_AFTER_INST_MASK = 0x800
+var SQ_WAVE_TRAP_CTRL_ADDR_WATCH_MASK = 0x80
+var SQ_WAVE_TRAP_CTRL_TRAP_AFTER_INST_MASK = 0x200
+
+var SQ_WAVE_EXCP_FLAG_PRIV_NON_MASKABLE_EXCP_MASK= SQ_WAVE_EXCP_FLAG_PRIV_MEM_VIOL_MASK |\
+ SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_MASK |\
+ SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK |\
+ SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_MASK |\
+ SQ_WAVE_EXCP_FLAG_PRIV_WAVE_END_MASK |\
+ SQ_WAVE_EXCP_FLAG_PRIV_TRAP_AFTER_INST_MASK
+var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_1_SIZE = SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT
+var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SHIFT = SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT
+var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SIZE = SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_SHIFT - SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT
+var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SHIFT = SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT
+var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SIZE = 32 - SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SHIFT
+var BARRIER_STATE_SIGNAL_OFFSET = 16
+var BARRIER_STATE_VALID_OFFSET = 0
+
+var TTMP11_DEBUG_TRAP_ENABLED_SHIFT = 23
+var TTMP11_DEBUG_TRAP_ENABLED_MASK = 0x800000
+
+// SQ_SEL_X/Y/Z/W, BUF_NUM_FORMAT_FLOAT, (0 for MUBUF stride[17:14]
+// when ADD_TID_ENABLE and BUF_DATA_FORMAT_32 for MTBUF), ADD_TID_ENABLE
+var S_SAVE_BUF_RSRC_WORD1_STRIDE = 0x00040000
+var S_SAVE_BUF_RSRC_WORD3_MISC = 0x10807FAC
+var S_SAVE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000
+var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT = 26
+
+var S_SAVE_PC_HI_FIRST_WAVE_MASK = 0x80000000
+var S_SAVE_PC_HI_FIRST_WAVE_SHIFT = 31
+
+var s_sgpr_save_num = 108
+
+var s_save_spi_init_lo = exec_lo
+var s_save_spi_init_hi = exec_hi
+var s_save_pc_lo = ttmp0
+var s_save_pc_hi = ttmp1
+var s_save_exec_lo = ttmp2
+var s_save_exec_hi = ttmp3
+var s_save_state_priv = ttmp12
+var s_save_excp_flag_priv = ttmp15
+var s_save_xnack_mask = s_save_excp_flag_priv
+var s_wave_size = ttmp7
+var s_save_buf_rsrc0 = ttmp8
+var s_save_buf_rsrc1 = ttmp9
+var s_save_buf_rsrc2 = ttmp10
+var s_save_buf_rsrc3 = ttmp11
+var s_save_mem_offset = ttmp4
+var s_save_alloc_size = s_save_excp_flag_priv
+var s_save_tmp = ttmp14
+var s_save_m0 = ttmp5
+var s_save_ttmps_lo = s_save_tmp
+var s_save_ttmps_hi = s_save_excp_flag_priv
+
+var S_RESTORE_BUF_RSRC_WORD1_STRIDE = S_SAVE_BUF_RSRC_WORD1_STRIDE
+var S_RESTORE_BUF_RSRC_WORD3_MISC = S_SAVE_BUF_RSRC_WORD3_MISC
+
+var S_RESTORE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000
+var S_RESTORE_SPI_INIT_FIRST_WAVE_SHIFT = 26
+var S_WAVE_SIZE = 25
+
+var s_restore_spi_init_lo = exec_lo
+var s_restore_spi_init_hi = exec_hi
+var s_restore_mem_offset = ttmp12
+var s_restore_alloc_size = ttmp3
+var s_restore_tmp = ttmp2
+var s_restore_mem_offset_save = s_restore_tmp
+var s_restore_m0 = s_restore_alloc_size
+var s_restore_mode = ttmp7
+var s_restore_flat_scratch = s_restore_tmp
+var s_restore_pc_lo = ttmp0
+var s_restore_pc_hi = ttmp1
+var s_restore_exec_lo = ttmp4
+var s_restore_exec_hi = ttmp5
+var s_restore_state_priv = ttmp14
+var s_restore_excp_flag_priv = ttmp15
+var s_restore_xnack_mask = ttmp13
+var s_restore_buf_rsrc0 = ttmp8
+var s_restore_buf_rsrc1 = ttmp9
+var s_restore_buf_rsrc2 = ttmp10
+var s_restore_buf_rsrc3 = ttmp11
+var s_restore_size = ttmp6
+var s_restore_ttmps_lo = s_restore_tmp
+var s_restore_ttmps_hi = s_restore_alloc_size
+var s_restore_spi_init_hi_save = s_restore_exec_hi
+
+shader main
+ asic(DEFAULT)
+ type(CS)
+ wave_size(32)
+
+ s_branch L_SKIP_RESTORE //NOT restore. might be a regular trap or save
+
+L_JUMP_TO_RESTORE:
+ s_branch L_RESTORE
+
+L_SKIP_RESTORE:
+ s_getreg_b32 s_save_state_priv, hwreg(HW_REG_WAVE_STATE_PRIV) //save STATUS since we will change SCC
+
+ // Clear SPI_PRIO: do not save with elevated priority.
+ // Clear ECC_ERR: prevents SQC store and triggers FATAL_HALT if setreg'd.
+ s_andn2_b32 s_save_state_priv, s_save_state_priv, SQ_WAVE_STATE_PRIV_ALWAYS_CLEAR_MASK
+
+ s_getreg_b32 s_save_excp_flag_priv, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV)
+
+ s_and_b32 ttmp2, s_save_state_priv, SQ_WAVE_STATE_PRIV_HALT_MASK
+ s_cbranch_scc0 L_NOT_HALTED
+
+L_HALTED:
+ // Host trap may occur while wave is halted.
+ s_and_b32 ttmp2, s_save_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK
+ s_cbranch_scc1 L_FETCH_2ND_TRAP
+
+L_CHECK_SAVE:
+ s_and_b32 ttmp2, s_save_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK
+ s_cbranch_scc1 L_SAVE
+
+ // Wave is halted but neither host trap nor SAVECTX is raised.
+ // Caused by instruction fetch memory violation.
+ // Spin wait until context saved to prevent interrupt storm.
+ s_sleep 0x10
+ s_getreg_b32 s_save_excp_flag_priv, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV)
+ s_branch L_CHECK_SAVE
+
+L_NOT_HALTED:
+ // Let second-level handle non-SAVECTX exception or trap.
+ // Any concurrent SAVECTX will be handled upon re-entry once halted.
+
+ // Check non-maskable exceptions. memory_violation, illegal_instruction
+ // and xnack_error exceptions always cause the wave to enter the trap
+ // handler.
+ s_and_b32 ttmp2, s_save_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_NON_MASKABLE_EXCP_MASK
+ s_cbranch_scc1 L_FETCH_2ND_TRAP
+
+ // Check for maskable exceptions in trapsts.excp and trapsts.excp_hi.
+ // Maskable exceptions only cause the wave to enter the trap handler if
+ // their respective bit in mode.excp_en is set.
+ s_getreg_b32 ttmp2, hwreg(HW_REG_WAVE_EXCP_FLAG_USER)
+ s_and_b32 ttmp3, s_save_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_ADDR_WATCH_MASK
+ s_cbranch_scc0 L_NOT_ADDR_WATCH
+ s_or_b32 ttmp2, ttmp2, SQ_WAVE_TRAP_CTRL_ADDR_WATCH_MASK
+
+L_NOT_ADDR_WATCH:
+ s_getreg_b32 ttmp3, hwreg(HW_REG_WAVE_TRAP_CTRL)
+ s_and_b32 ttmp2, ttmp3, ttmp2
+ s_cbranch_scc1 L_FETCH_2ND_TRAP
+
+L_CHECK_TRAP_ID:
+ // Check trap_id != 0
+ s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK
+ s_cbranch_scc1 L_FETCH_2ND_TRAP
+
+#if SINGLE_STEP_MISSED_WORKAROUND
+ // Prioritize single step exception over context save.
+ // Second-level trap will halt wave and RFE, re-entering for SAVECTX.
+ // WAVE_TRAP_CTRL is already in ttmp3.
+ s_and_b32 ttmp3, ttmp3, SQ_WAVE_TRAP_CTRL_TRAP_AFTER_INST_MASK
+ s_cbranch_scc1 L_FETCH_2ND_TRAP
+#endif
+
+ s_and_b32 ttmp2, s_save_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK
+ s_cbranch_scc1 L_SAVE
+
+L_FETCH_2ND_TRAP:
+ // Read second-level TBA/TMA from first-level TMA and jump if available.
+ // ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data)
+ // ttmp12 holds SQ_WAVE_STATUS
+ s_sendmsg_rtn_b64 [ttmp14, ttmp15], sendmsg(MSG_RTN_GET_TMA)
+ s_wait_idle
+ s_lshl_b64 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8
+
+ s_bitcmp1_b32 ttmp15, 0xF
+ s_cbranch_scc0 L_NO_SIGN_EXTEND_TMA
+ s_or_b32 ttmp15, ttmp15, 0xFFFF0000
+L_NO_SIGN_EXTEND_TMA:
+
+ s_load_dword ttmp2, [ttmp14, ttmp15], 0x10 scope:SCOPE_SYS // debug trap enabled flag
+ s_wait_idle
+ s_lshl_b32 ttmp2, ttmp2, TTMP11_DEBUG_TRAP_ENABLED_SHIFT
+ s_andn2_b32 ttmp11, ttmp11, TTMP11_DEBUG_TRAP_ENABLED_MASK
+ s_or_b32 ttmp11, ttmp11, ttmp2
+
+ s_load_dwordx2 [ttmp2, ttmp3], [ttmp14, ttmp15], 0x0 scope:SCOPE_SYS // second-level TBA
+ s_wait_idle
+ s_load_dwordx2 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 scope:SCOPE_SYS // second-level TMA
+ s_wait_idle
+
+ s_and_b64 [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3]
+ s_cbranch_scc0 L_NO_NEXT_TRAP // second-level trap handler not been set
+ s_setpc_b64 [ttmp2, ttmp3] // jump to second-level trap handler
+
+L_NO_NEXT_TRAP:
+ // If not caused by trap then halt wave to prevent re-entry.
+ s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK
+ s_cbranch_scc1 L_TRAP_CASE
+
+ // Host trap will not cause trap re-entry.
+ s_getreg_b32 ttmp2, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV)
+ s_and_b32 ttmp2, ttmp2, SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK
+ s_cbranch_scc1 L_EXIT_TRAP
+ s_or_b32 s_save_state_priv, s_save_state_priv, SQ_WAVE_STATE_PRIV_HALT_MASK
+
+ // If the PC points to S_ENDPGM then context save will fail if STATE_PRIV.HALT is set.
+ // Rewind the PC to prevent this from occurring.
+ s_sub_u32 ttmp0, ttmp0, 0x8
+ s_subb_u32 ttmp1, ttmp1, 0x0
+
+ s_branch L_EXIT_TRAP
+
+L_TRAP_CASE:
+ // Advance past trap instruction to prevent re-entry.
+ s_add_u32 ttmp0, ttmp0, 0x4
+ s_addc_u32 ttmp1, ttmp1, 0x0
+
+L_EXIT_TRAP:
+ s_and_b32 ttmp1, ttmp1, 0xFFFF
+
+ // Restore SQ_WAVE_STATUS.
+ s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
+ s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32
+
+ // STATE_PRIV.BARRIER_COMPLETE may have changed since we read it.
+ // Only restore fields which the trap handler changes.
+ s_lshr_b32 s_save_state_priv, s_save_state_priv, SQ_WAVE_STATE_PRIV_SCC_SHIFT
+ s_setreg_b32 hwreg(HW_REG_WAVE_STATE_PRIV, SQ_WAVE_STATE_PRIV_SCC_SHIFT, \
+ SQ_WAVE_STATE_PRIV_POISON_ERR_SHIFT - SQ_WAVE_STATE_PRIV_SCC_SHIFT + 1), s_save_state_priv
+
+ s_rfe_b64 [ttmp0, ttmp1]
+
+L_SAVE:
+ // If VGPRs have been deallocated then terminate the wavefront.
+ // It has no remaining program to run and cannot save without VGPRs.
+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_STATUS)
+ s_bitcmp1_b32 s_save_tmp, SQ_WAVE_STATUS_NO_VGPRS_SHIFT
+ s_cbranch_scc0 L_HAVE_VGPRS
+ s_endpgm
+L_HAVE_VGPRS:
+
+ s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32]
+ s_mov_b32 s_save_tmp, 0
+ s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV, SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT, 1), s_save_tmp //clear saveCtx bit
+
+ /* inform SPI the readiness and wait for SPI's go signal */
+ s_mov_b32 s_save_exec_lo, exec_lo //save EXEC and use EXEC for the go signal from SPI
+ s_mov_b32 s_save_exec_hi, exec_hi
+ s_mov_b64 exec, 0x0 //clear EXEC to get ready to receive
+
+ s_sendmsg_rtn_b64 [exec_lo, exec_hi], sendmsg(MSG_RTN_SAVE_WAVE)
+ s_wait_idle
+
+ // Save first_wave flag so we can clear high bits of save address.
+ s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK
+ s_lshl_b32 s_save_tmp, s_save_tmp, (S_SAVE_PC_HI_FIRST_WAVE_SHIFT - S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT)
+ s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp
+
+ // Trap temporaries must be saved via VGPR but all VGPRs are in use.
+ // There is no ttmp space to hold the resource constant for VGPR save.
+ // Save v0 by itself since it requires only two SGPRs.
+ s_mov_b32 s_save_ttmps_lo, exec_lo
+ s_and_b32 s_save_ttmps_hi, exec_hi, 0xFFFF
+ s_mov_b32 exec_lo, 0xFFFFFFFF
+ s_mov_b32 exec_hi, 0xFFFFFFFF
+ global_store_dword_addtid v0, [s_save_ttmps_lo, s_save_ttmps_hi] scope:SCOPE_SYS
+ v_mov_b32 v0, 0x0
+ s_mov_b32 exec_lo, s_save_ttmps_lo
+ s_mov_b32 exec_hi, s_save_ttmps_hi
+
+ // Save trap temporaries 4-11, 13 initialized by SPI debug dispatch logic
+ // ttmp SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR)+0x40
+ get_wave_size2(s_save_ttmps_hi)
+ get_vgpr_size_bytes(s_save_ttmps_lo, s_save_ttmps_hi)
+ get_svgpr_size_bytes(s_save_ttmps_hi)
+ s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_ttmps_hi
+ s_and_b32 s_save_ttmps_hi, s_save_spi_init_hi, 0xFFFF
+ s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, get_sgpr_size_bytes()
+ s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_spi_init_lo
+ s_addc_u32 s_save_ttmps_hi, s_save_ttmps_hi, 0x0
+
+ v_writelane_b32 v0, ttmp4, 0x4
+ v_writelane_b32 v0, ttmp5, 0x5
+ v_writelane_b32 v0, ttmp6, 0x6
+ v_writelane_b32 v0, ttmp7, 0x7
+ v_writelane_b32 v0, ttmp8, 0x8
+ v_writelane_b32 v0, ttmp9, 0x9
+ v_writelane_b32 v0, ttmp10, 0xA
+ v_writelane_b32 v0, ttmp11, 0xB
+ v_writelane_b32 v0, ttmp13, 0xD
+ v_writelane_b32 v0, exec_lo, 0xE
+ v_writelane_b32 v0, exec_hi, 0xF
+ valu_sgpr_hazard()
+
+ s_mov_b32 exec_lo, 0x3FFF
+ s_mov_b32 exec_hi, 0x0
+ global_store_dword_addtid v0, [s_save_ttmps_lo, s_save_ttmps_hi] offset:0x40 scope:SCOPE_SYS
+ v_readlane_b32 ttmp14, v0, 0xE
+ v_readlane_b32 ttmp15, v0, 0xF
+ s_mov_b32 exec_lo, ttmp14
+ s_mov_b32 exec_hi, ttmp15
+
+ /* setup Resource Contants */
+ s_mov_b32 s_save_buf_rsrc0, s_save_spi_init_lo //base_addr_lo
+ s_and_b32 s_save_buf_rsrc1, s_save_spi_init_hi, 0x0000FFFF //base_addr_hi
+ s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE
+ s_mov_b32 s_save_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) although not neccessarily inited
+ s_mov_b32 s_save_buf_rsrc3, S_SAVE_BUF_RSRC_WORD3_MISC
+
+ s_mov_b32 s_save_m0, m0
+
+ /* global mem offset */
+ s_mov_b32 s_save_mem_offset, 0x0
+ get_wave_size2(s_wave_size)
+
+ /* save first 4 VGPRs, needed for SGPR save */
+ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on
+ s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE
+ s_and_b32 m0, m0, 1
+ s_cmp_eq_u32 m0, 1
+ s_cbranch_scc1 L_ENABLE_SAVE_4VGPR_EXEC_HI
+ s_mov_b32 exec_hi, 0x00000000
+ s_branch L_SAVE_4VGPR_WAVE32
+L_ENABLE_SAVE_4VGPR_EXEC_HI:
+ s_mov_b32 exec_hi, 0xFFFFFFFF
+ s_branch L_SAVE_4VGPR_WAVE64
+L_SAVE_4VGPR_WAVE32:
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ // VGPR Allocated in 4-GPR granularity
+
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128*2
+ buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128*3
+ s_branch L_SAVE_HWREG
+
+L_SAVE_4VGPR_WAVE64:
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ // VGPR Allocated in 4-GPR granularity
+
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256*2
+ buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256*3
+
+ /* save HW registers */
+
+L_SAVE_HWREG:
+ // HWREG SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR)
+ get_vgpr_size_bytes(s_save_mem_offset, s_wave_size)
+ get_svgpr_size_bytes(s_save_tmp)
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, get_sgpr_size_bytes()
+
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ v_mov_b32 v0, 0x0 //Offset[31:0] from buffer resource
+ v_mov_b32 v1, 0x0 //Offset[63:32] from buffer resource
+ v_mov_b32 v2, 0x0 //Set of SGPRs for TCP store
+
+ // Ensure no further changes to barrier or LDS state.
+ // STATE_PRIV.BARRIER_COMPLETE may change up to this point.
+ s_barrier_signal -2
+ s_barrier_wait -2
+
+ // Re-read final state of BARRIER_COMPLETE field for save.
+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_STATE_PRIV)
+ s_and_b32 s_save_tmp, s_save_tmp, SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK
+ s_andn2_b32 s_save_state_priv, s_save_state_priv, SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK
+ s_or_b32 s_save_state_priv, s_save_state_priv, s_save_tmp
+
+ s_andn2_b32 s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK
+ v_writelane_b32 v2, s_save_m0, 0x0
+ v_writelane_b32 v2, s_save_pc_lo, 0x1
+ v_writelane_b32 v2, s_save_tmp, 0x2
+ v_writelane_b32 v2, s_save_exec_lo, 0x3
+ v_writelane_b32 v2, s_save_exec_hi, 0x4
+ v_writelane_b32 v2, s_save_state_priv, 0x5
+ v_writelane_b32 v2, s_save_xnack_mask, 0x7
+ valu_sgpr_hazard()
+
+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV)
+ v_writelane_b32 v2, s_save_tmp, 0x6
+
+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_MODE)
+ v_writelane_b32 v2, s_save_tmp, 0x8
+
+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_SCRATCH_BASE_LO)
+ v_writelane_b32 v2, s_save_tmp, 0x9
+
+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_SCRATCH_BASE_HI)
+ v_writelane_b32 v2, s_save_tmp, 0xA
+
+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_EXCP_FLAG_USER)
+ v_writelane_b32 v2, s_save_tmp, 0xB
+
+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_TRAP_CTRL)
+ v_writelane_b32 v2, s_save_tmp, 0xC
+
+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_STATUS)
+ v_writelane_b32 v2, s_save_tmp, 0xD
+
+ s_get_barrier_state s_save_tmp, -1
+ s_wait_kmcnt (0)
+ v_writelane_b32 v2, s_save_tmp, 0xE
+ valu_sgpr_hazard()
+
+ // Write HWREGs with 16 VGPR lanes. TTMPs occupy space after this.
+ s_mov_b32 exec_lo, 0xFFFF
+ s_mov_b32 exec_hi, 0x0
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS
+
+ // Write SGPRs with 32 VGPR lanes. This works in wave32 and wave64 mode.
+ s_mov_b32 exec_lo, 0xFFFFFFFF
+
+ /* save SGPRs */
+ // Save SGPR before LDS save, then the s0 to s4 can be used during LDS save...
+
+ // SGPR SR memory offset : size(VGPR)+size(SVGPR)
+ get_vgpr_size_bytes(s_save_mem_offset, s_wave_size)
+ get_svgpr_size_bytes(s_save_tmp)
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ s_mov_b32 ttmp13, 0x0 //next VGPR lane to copy SGPR into
+
+ s_mov_b32 m0, 0x0 //SGPR initial index value =0
+ s_nop 0x0 //Manually inserted wait states
+L_SAVE_SGPR_LOOP:
+ // SGPR is allocated in 16 SGPR granularity
+ s_movrels_b64 s0, s0 //s0 = s[0+m0], s1 = s[1+m0]
+ s_movrels_b64 s2, s2 //s2 = s[2+m0], s3 = s[3+m0]
+ s_movrels_b64 s4, s4 //s4 = s[4+m0], s5 = s[5+m0]
+ s_movrels_b64 s6, s6 //s6 = s[6+m0], s7 = s[7+m0]
+ s_movrels_b64 s8, s8 //s8 = s[8+m0], s9 = s[9+m0]
+ s_movrels_b64 s10, s10 //s10 = s[10+m0], s11 = s[11+m0]
+ s_movrels_b64 s12, s12 //s12 = s[12+m0], s13 = s[13+m0]
+ s_movrels_b64 s14, s14 //s14 = s[14+m0], s15 = s[15+m0]
+
+ s_cmp_eq_u32 ttmp13, 0x0
+ s_cbranch_scc0 L_WRITE_V2_SECOND_HALF
+ write_16sgpr_to_v2(s0, 0x0)
+ s_branch L_SAVE_SGPR_SKIP_TCP_STORE
+L_WRITE_V2_SECOND_HALF:
+ write_16sgpr_to_v2(s0, 0x10)
+
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, 0x80
+ s_mov_b32 ttmp13, 0x0
+ v_mov_b32 v2, 0x0
+L_SAVE_SGPR_SKIP_TCP_STORE:
+
+ s_add_u32 m0, m0, 16 //next sgpr index
+ s_cmp_lt_u32 m0, 96 //scc = (m0 < first 96 SGPR) ? 1 : 0
+ s_cbranch_scc1 L_SAVE_SGPR_LOOP //first 96 SGPR save is complete?
+
+ //save the rest 12 SGPR
+ s_movrels_b64 s0, s0 //s0 = s[0+m0], s1 = s[1+m0]
+ s_movrels_b64 s2, s2 //s2 = s[2+m0], s3 = s[3+m0]
+ s_movrels_b64 s4, s4 //s4 = s[4+m0], s5 = s[5+m0]
+ s_movrels_b64 s6, s6 //s6 = s[6+m0], s7 = s[7+m0]
+ s_movrels_b64 s8, s8 //s8 = s[8+m0], s9 = s[9+m0]
+ s_movrels_b64 s10, s10 //s10 = s[10+m0], s11 = s[11+m0]
+ write_12sgpr_to_v2(s0)
+
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS
+
+ /* save LDS */
+
+L_SAVE_LDS:
+ // Change EXEC to all threads...
+ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on
+ s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE
+ s_and_b32 m0, m0, 1
+ s_cmp_eq_u32 m0, 1
+ s_cbranch_scc1 L_ENABLE_SAVE_LDS_EXEC_HI
+ s_mov_b32 exec_hi, 0x00000000
+ s_branch L_SAVE_LDS_NORMAL
+L_ENABLE_SAVE_LDS_EXEC_HI:
+ s_mov_b32 exec_hi, 0xFFFFFFFF
+L_SAVE_LDS_NORMAL:
+ s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_WAVE_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE)
+ s_and_b32 s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF //lds_size is zero?
+ s_cbranch_scc0 L_SAVE_LDS_DONE //no lds used? jump to L_SAVE_DONE
+
+ s_and_b32 s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK
+ s_cbranch_scc0 L_SAVE_LDS_DONE
+
+ // first wave do LDS save;
+
+ s_lshl_b32 s_save_alloc_size, s_save_alloc_size, SQ_WAVE_LDS_ALLOC_GRANULARITY
+ s_mov_b32 s_save_buf_rsrc2, s_save_alloc_size //NUM_RECORDS in bytes
+
+ // LDS at offset: size(VGPR)+size(SVGPR)+SIZE(SGPR)+SIZE(HWREG)
+ //
+ get_vgpr_size_bytes(s_save_mem_offset, s_wave_size)
+ get_svgpr_size_bytes(s_save_tmp)
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, get_sgpr_size_bytes()
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, get_hwreg_size_bytes()
+
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ //load 0~63*4(byte address) to vgpr v0
+ v_mbcnt_lo_u32_b32 v0, -1, 0
+ v_mbcnt_hi_u32_b32 v0, -1, v0
+ v_mul_u32_u24 v0, 4, v0
+
+ s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE
+ s_and_b32 m0, m0, 1
+ s_cmp_eq_u32 m0, 1
+ s_mov_b32 m0, 0x0
+ s_cbranch_scc1 L_SAVE_LDS_W64
+
+L_SAVE_LDS_W32:
+ s_mov_b32 s3, 128
+ s_nop 0
+ s_nop 0
+ s_nop 0
+L_SAVE_LDS_LOOP_W32:
+ ds_read_b32 v1, v0
+ s_wait_idle
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS
+
+ s_add_u32 m0, m0, s3 //every buffer_store_lds does 128 bytes
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, s3
+ v_add_nc_u32 v0, v0, 128 //mem offset increased by 128 bytes
+ s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_SAVE_LDS_LOOP_W32 //LDS save is complete?
+
+ s_branch L_SAVE_LDS_DONE
+
+L_SAVE_LDS_W64:
+ s_mov_b32 s3, 256
+ s_nop 0
+ s_nop 0
+ s_nop 0
+L_SAVE_LDS_LOOP_W64:
+ ds_read_b32 v1, v0
+ s_wait_idle
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS
+
+ s_add_u32 m0, m0, s3 //every buffer_store_lds does 256 bytes
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, s3
+ v_add_nc_u32 v0, v0, 256 //mem offset increased by 256 bytes
+ s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_SAVE_LDS_LOOP_W64 //LDS save is complete?
+
+L_SAVE_LDS_DONE:
+ /* save VGPRs - set the Rest VGPRs */
+L_SAVE_VGPR:
+ // VGPR SR memory offset: 0
+ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on
+ s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE
+ s_and_b32 m0, m0, 1
+ s_cmp_eq_u32 m0, 1
+ s_cbranch_scc1 L_ENABLE_SAVE_VGPR_EXEC_HI
+ s_mov_b32 s_save_mem_offset, (0+128*4) // for the rest VGPRs
+ s_mov_b32 exec_hi, 0x00000000
+ s_branch L_SAVE_VGPR_NORMAL
+L_ENABLE_SAVE_VGPR_EXEC_HI:
+ s_mov_b32 s_save_mem_offset, (0+256*4) // for the rest VGPRs
+ s_mov_b32 exec_hi, 0xFFFFFFFF
+L_SAVE_VGPR_NORMAL:
+ s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_WAVE_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE)
+ s_add_u32 s_save_alloc_size, s_save_alloc_size, 1
+ s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value)
+ //determine it is wave32 or wave64
+ s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE
+ s_and_b32 m0, m0, 1
+ s_cmp_eq_u32 m0, 1
+ s_cbranch_scc1 L_SAVE_VGPR_WAVE64
+
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ // VGPR Allocated in 4-GPR granularity
+
+ // VGPR store using dw burst
+ s_mov_b32 m0, 0x4 //VGPR initial index value =4
+ s_cmp_lt_u32 m0, s_save_alloc_size
+ s_cbranch_scc0 L_SAVE_VGPR_END
+
+L_SAVE_VGPR_W32_LOOP:
+ v_movrels_b32 v0, v0 //v0 = v[0+m0]
+ v_movrels_b32 v1, v1 //v1 = v[1+m0]
+ v_movrels_b32 v2, v2 //v2 = v[2+m0]
+ v_movrels_b32 v3, v3 //v3 = v[3+m0]
+
+ buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128*2
+ buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128*3
+
+ s_add_u32 m0, m0, 4 //next vgpr index
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, 128*4 //every buffer_store_dword does 128 bytes
+ s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_SAVE_VGPR_W32_LOOP //VGPR save is complete?
+
+ s_branch L_SAVE_VGPR_END
+
+L_SAVE_VGPR_WAVE64:
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ // VGPR store using dw burst
+ s_mov_b32 m0, 0x4 //VGPR initial index value =4
+ s_cmp_lt_u32 m0, s_save_alloc_size
+ s_cbranch_scc0 L_SAVE_SHARED_VGPR
+
+L_SAVE_VGPR_W64_LOOP:
+ v_movrels_b32 v0, v0 //v0 = v[0+m0]
+ v_movrels_b32 v1, v1 //v1 = v[1+m0]
+ v_movrels_b32 v2, v2 //v2 = v[2+m0]
+ v_movrels_b32 v3, v3 //v3 = v[3+m0]
+
+ buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256*2
+ buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256*3
+
+ s_add_u32 m0, m0, 4 //next vgpr index
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, 256*4 //every buffer_store_dword does 256 bytes
+ s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_SAVE_VGPR_W64_LOOP //VGPR save is complete?
+
+L_SAVE_SHARED_VGPR:
+ s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_WAVE_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE)
+ s_and_b32 s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF //shared_vgpr_size is zero?
+ s_cbranch_scc0 L_SAVE_VGPR_END //no shared_vgpr used? jump to L_SAVE_LDS
+ s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 3 //Number of SHARED_VGPRs = shared_vgpr_size * 8 (non-zero value)
+ //m0 now has the value of normal vgpr count, just add the m0 with shared_vgpr count to get the total count.
+ //save shared_vgpr will start from the index of m0
+ s_add_u32 s_save_alloc_size, s_save_alloc_size, m0
+ s_mov_b32 exec_lo, 0xFFFFFFFF
+ s_mov_b32 exec_hi, 0x00000000
+
+L_SAVE_SHARED_VGPR_WAVE64_LOOP:
+ v_movrels_b32 v0, v0 //v0 = v[0+m0]
+ buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS
+ s_add_u32 m0, m0, 1 //next vgpr index
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, 128
+ s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_SAVE_SHARED_VGPR_WAVE64_LOOP //SHARED_VGPR save is complete?
+
+L_SAVE_VGPR_END:
+ s_branch L_END_PGM
+
+L_RESTORE:
+ /* Setup Resource Contants */
+ s_mov_b32 s_restore_buf_rsrc0, s_restore_spi_init_lo //base_addr_lo
+ s_and_b32 s_restore_buf_rsrc1, s_restore_spi_init_hi, 0x0000FFFF //base_addr_hi
+ s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE
+ s_mov_b32 s_restore_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes)
+ s_mov_b32 s_restore_buf_rsrc3, S_RESTORE_BUF_RSRC_WORD3_MISC
+
+ // Save s_restore_spi_init_hi for later use.
+ s_mov_b32 s_restore_spi_init_hi_save, s_restore_spi_init_hi
+
+ //determine it is wave32 or wave64
+ get_wave_size2(s_restore_size)
+
+ s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK
+ s_cbranch_scc0 L_RESTORE_VGPR
+
+ /* restore LDS */
+L_RESTORE_LDS:
+ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on
+ s_lshr_b32 m0, s_restore_size, S_WAVE_SIZE
+ s_and_b32 m0, m0, 1
+ s_cmp_eq_u32 m0, 1
+ s_cbranch_scc1 L_ENABLE_RESTORE_LDS_EXEC_HI
+ s_mov_b32 exec_hi, 0x00000000
+ s_branch L_RESTORE_LDS_NORMAL
+L_ENABLE_RESTORE_LDS_EXEC_HI:
+ s_mov_b32 exec_hi, 0xFFFFFFFF
+L_RESTORE_LDS_NORMAL:
+ s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_WAVE_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE)
+ s_and_b32 s_restore_alloc_size, s_restore_alloc_size, 0xFFFFFFFF //lds_size is zero?
+ s_cbranch_scc0 L_RESTORE_VGPR //no lds used? jump to L_RESTORE_VGPR
+ s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, SQ_WAVE_LDS_ALLOC_GRANULARITY
+ s_mov_b32 s_restore_buf_rsrc2, s_restore_alloc_size //NUM_RECORDS in bytes
+
+ // LDS at offset: size(VGPR)+size(SVGPR)+SIZE(SGPR)+SIZE(HWREG)
+ //
+ get_vgpr_size_bytes(s_restore_mem_offset, s_restore_size)
+ get_svgpr_size_bytes(s_restore_tmp)
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_sgpr_size_bytes()
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_hwreg_size_bytes()
+
+ s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ s_lshr_b32 m0, s_restore_size, S_WAVE_SIZE
+ s_and_b32 m0, m0, 1
+ s_cmp_eq_u32 m0, 1
+ s_mov_b32 m0, 0x0
+ s_cbranch_scc1 L_RESTORE_LDS_LOOP_W64
+
+L_RESTORE_LDS_LOOP_W32:
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset
+ s_wait_idle
+ ds_store_addtid_b32 v0
+ s_add_u32 m0, m0, 128 // 128 DW
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128 //mem offset increased by 128DW
+ s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_RESTORE_LDS_LOOP_W32 //LDS restore is complete?
+ s_branch L_RESTORE_VGPR
+
+L_RESTORE_LDS_LOOP_W64:
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset
+ s_wait_idle
+ ds_store_addtid_b32 v0
+ s_add_u32 m0, m0, 256 // 256 DW
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256 //mem offset increased by 256DW
+ s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_RESTORE_LDS_LOOP_W64 //LDS restore is complete?
+
+ /* restore VGPRs */
+L_RESTORE_VGPR:
+ // VGPR SR memory offset : 0
+ s_mov_b32 s_restore_mem_offset, 0x0
+ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on
+ s_lshr_b32 m0, s_restore_size, S_WAVE_SIZE
+ s_and_b32 m0, m0, 1
+ s_cmp_eq_u32 m0, 1
+ s_cbranch_scc1 L_ENABLE_RESTORE_VGPR_EXEC_HI
+ s_mov_b32 exec_hi, 0x00000000
+ s_branch L_RESTORE_VGPR_NORMAL
+L_ENABLE_RESTORE_VGPR_EXEC_HI:
+ s_mov_b32 exec_hi, 0xFFFFFFFF
+L_RESTORE_VGPR_NORMAL:
+ s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_WAVE_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE)
+ s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 1
+ s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value)
+ //determine it is wave32 or wave64
+ s_lshr_b32 m0, s_restore_size, S_WAVE_SIZE
+ s_and_b32 m0, m0, 1
+ s_cmp_eq_u32 m0, 1
+ s_cbranch_scc1 L_RESTORE_VGPR_WAVE64
+
+ s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ // VGPR load using dw burst
+ s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v1, v0 will be the last
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128*4
+ s_mov_b32 m0, 4 //VGPR initial index value = 4
+ s_cmp_lt_u32 m0, s_restore_alloc_size
+ s_cbranch_scc0 L_RESTORE_SGPR
+
+L_RESTORE_VGPR_WAVE32_LOOP:
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS
+ buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:128
+ buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:128*2
+ buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:128*3
+ s_wait_idle
+ v_movreld_b32 v0, v0 //v[0+m0] = v0
+ v_movreld_b32 v1, v1
+ v_movreld_b32 v2, v2
+ v_movreld_b32 v3, v3
+ s_add_u32 m0, m0, 4 //next vgpr index
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128*4 //every buffer_load_dword does 128 bytes
+ s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_RESTORE_VGPR_WAVE32_LOOP //VGPR restore (except v0) is complete?
+
+ /* VGPR restore on v0 */
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS
+ buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:128
+ buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:128*2
+ buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:128*3
+ s_wait_idle
+
+ s_branch L_RESTORE_SGPR
+
+L_RESTORE_VGPR_WAVE64:
+ s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ // VGPR load using dw burst
+ s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v4, v0 will be the last
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4
+ s_mov_b32 m0, 4 //VGPR initial index value = 4
+ s_cmp_lt_u32 m0, s_restore_alloc_size
+ s_cbranch_scc0 L_RESTORE_SHARED_VGPR
+
+L_RESTORE_VGPR_WAVE64_LOOP:
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS
+ buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:256
+ buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:256*2
+ buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:256*3
+ s_wait_idle
+ v_movreld_b32 v0, v0 //v[0+m0] = v0
+ v_movreld_b32 v1, v1
+ v_movreld_b32 v2, v2
+ v_movreld_b32 v3, v3
+ s_add_u32 m0, m0, 4 //next vgpr index
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 //every buffer_load_dword does 256 bytes
+ s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_RESTORE_VGPR_WAVE64_LOOP //VGPR restore (except v0) is complete?
+
+L_RESTORE_SHARED_VGPR:
+ s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_WAVE_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE) //shared_vgpr_size
+ s_and_b32 s_restore_alloc_size, s_restore_alloc_size, 0xFFFFFFFF //shared_vgpr_size is zero?
+ s_cbranch_scc0 L_RESTORE_V0 //no shared_vgpr used?
+ s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 3 //Number of SHARED_VGPRs = shared_vgpr_size * 8 (non-zero value)
+ //m0 now has the value of normal vgpr count, just add the m0 with shared_vgpr count to get the total count.
+ //restore shared_vgpr will start from the index of m0
+ s_add_u32 s_restore_alloc_size, s_restore_alloc_size, m0
+ s_mov_b32 exec_lo, 0xFFFFFFFF
+ s_mov_b32 exec_hi, 0x00000000
+L_RESTORE_SHARED_VGPR_WAVE64_LOOP:
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS
+ s_wait_idle
+ v_movreld_b32 v0, v0 //v[0+m0] = v0
+ s_add_u32 m0, m0, 1 //next vgpr index
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128
+ s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_RESTORE_SHARED_VGPR_WAVE64_LOOP //VGPR restore (except v0) is complete?
+
+ s_mov_b32 exec_hi, 0xFFFFFFFF //restore back exec_hi before restoring V0!!
+
+ /* VGPR restore on v0 */
+L_RESTORE_V0:
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS
+ buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:256
+ buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:256*2
+ buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:256*3
+ s_wait_idle
+
+ /* restore SGPRs */
+ //will be 2+8+16*6
+ // SGPR SR memory offset : size(VGPR)+size(SVGPR)
+L_RESTORE_SGPR:
+ get_vgpr_size_bytes(s_restore_mem_offset, s_restore_size)
+ get_svgpr_size_bytes(s_restore_tmp)
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_sgpr_size_bytes()
+ s_sub_u32 s_restore_mem_offset, s_restore_mem_offset, 20*4 //s108~s127 is not saved
+
+ s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ s_mov_b32 m0, s_sgpr_save_num
+
+ read_4sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset)
+ s_wait_idle
+
+ s_sub_u32 m0, m0, 4 // Restore from S[0] to S[104]
+ s_nop 0 // hazard SALU M0=> S_MOVREL
+
+ s_movreld_b64 s0, s0 //s[0+m0] = s0
+ s_movreld_b64 s2, s2
+
+ read_8sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset)
+ s_wait_idle
+
+ s_sub_u32 m0, m0, 8 // Restore from S[0] to S[96]
+ s_nop 0 // hazard SALU M0=> S_MOVREL
+
+ s_movreld_b64 s0, s0 //s[0+m0] = s0
+ s_movreld_b64 s2, s2
+ s_movreld_b64 s4, s4
+ s_movreld_b64 s6, s6
+
+ L_RESTORE_SGPR_LOOP:
+ read_16sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset)
+ s_wait_idle
+
+ s_sub_u32 m0, m0, 16 // Restore from S[n] to S[0]
+ s_nop 0 // hazard SALU M0=> S_MOVREL
+
+ s_movreld_b64 s0, s0 //s[0+m0] = s0
+ s_movreld_b64 s2, s2
+ s_movreld_b64 s4, s4
+ s_movreld_b64 s6, s6
+ s_movreld_b64 s8, s8
+ s_movreld_b64 s10, s10
+ s_movreld_b64 s12, s12
+ s_movreld_b64 s14, s14
+
+ s_cmp_eq_u32 m0, 0 //scc = (m0 < s_sgpr_save_num) ? 1 : 0
+ s_cbranch_scc0 L_RESTORE_SGPR_LOOP
+
+ // s_barrier with STATE_PRIV.TRAP_AFTER_INST=1, STATUS.PRIV=1 incorrectly asserts debug exception.
+ // Clear DEBUG_EN before and restore MODE after the barrier.
+ s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE), 0
+
+ /* restore HW registers */
+L_RESTORE_HWREG:
+ // HWREG SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR)
+ get_vgpr_size_bytes(s_restore_mem_offset, s_restore_size)
+ get_svgpr_size_bytes(s_restore_tmp)
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_sgpr_size_bytes()
+
+ s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ // Restore s_restore_spi_init_hi before the saved value gets clobbered.
+ s_mov_b32 s_restore_spi_init_hi, s_restore_spi_init_hi_save
+
+ read_hwreg_from_mem(s_restore_m0, s_restore_buf_rsrc0, s_restore_mem_offset)
+ read_hwreg_from_mem(s_restore_pc_lo, s_restore_buf_rsrc0, s_restore_mem_offset)
+ read_hwreg_from_mem(s_restore_pc_hi, s_restore_buf_rsrc0, s_restore_mem_offset)
+ read_hwreg_from_mem(s_restore_exec_lo, s_restore_buf_rsrc0, s_restore_mem_offset)
+ read_hwreg_from_mem(s_restore_exec_hi, s_restore_buf_rsrc0, s_restore_mem_offset)
+ read_hwreg_from_mem(s_restore_state_priv, s_restore_buf_rsrc0, s_restore_mem_offset)
+ read_hwreg_from_mem(s_restore_excp_flag_priv, s_restore_buf_rsrc0, s_restore_mem_offset)
+ read_hwreg_from_mem(s_restore_xnack_mask, s_restore_buf_rsrc0, s_restore_mem_offset)
+ read_hwreg_from_mem(s_restore_mode, s_restore_buf_rsrc0, s_restore_mem_offset)
+ read_hwreg_from_mem(s_restore_flat_scratch, s_restore_buf_rsrc0, s_restore_mem_offset)
+ s_wait_idle
+
+ s_setreg_b32 hwreg(HW_REG_WAVE_SCRATCH_BASE_LO), s_restore_flat_scratch
+
+ read_hwreg_from_mem(s_restore_flat_scratch, s_restore_buf_rsrc0, s_restore_mem_offset)
+ s_wait_idle
+
+ s_setreg_b32 hwreg(HW_REG_WAVE_SCRATCH_BASE_HI), s_restore_flat_scratch
+
+ read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset)
+ s_wait_idle
+ s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_USER), s_restore_tmp
+
+ read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset)
+ s_wait_idle
+ s_setreg_b32 hwreg(HW_REG_WAVE_TRAP_CTRL), s_restore_tmp
+
+ // Only the first wave needs to restore the workgroup barrier.
+ s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK
+ s_cbranch_scc0 L_SKIP_BARRIER_RESTORE
+
+ // Skip over WAVE_STATUS, since there is no state to restore from it
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 4
+
+ read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset)
+ s_wait_idle
+
+ s_bitcmp1_b32 s_restore_tmp, BARRIER_STATE_VALID_OFFSET
+ s_cbranch_scc0 L_SKIP_BARRIER_RESTORE
+
+ // extract the saved signal count from s_restore_tmp
+ s_lshr_b32 s_restore_tmp, s_restore_tmp, BARRIER_STATE_SIGNAL_OFFSET
+
+ // We need to call s_barrier_signal repeatedly to restore the signal
+ // count of the work group barrier. The member count is already
+ // initialized with the number of waves in the work group.
+L_BARRIER_RESTORE_LOOP:
+ s_and_b32 s_restore_tmp, s_restore_tmp, s_restore_tmp
+ s_cbranch_scc0 L_SKIP_BARRIER_RESTORE
+ s_barrier_signal -1
+ s_add_i32 s_restore_tmp, s_restore_tmp, -1
+ s_branch L_BARRIER_RESTORE_LOOP
+
+L_SKIP_BARRIER_RESTORE:
+
+ s_mov_b32 m0, s_restore_m0
+ s_mov_b32 exec_lo, s_restore_exec_lo
+ s_mov_b32 exec_hi, s_restore_exec_hi
+
+ // EXCP_FLAG_PRIV.SAVE_CONTEXT and HOST_TRAP may have changed.
+ // Only restore the other fields to avoid clobbering them.
+ s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV, 0, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_1_SIZE), s_restore_excp_flag_priv
+ s_lshr_b32 s_restore_excp_flag_priv, s_restore_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SHIFT
+ s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SHIFT, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SIZE), s_restore_excp_flag_priv
+ s_lshr_b32 s_restore_excp_flag_priv, s_restore_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SHIFT - SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SHIFT
+ s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SHIFT, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SIZE), s_restore_excp_flag_priv
+
+ s_setreg_b32 hwreg(HW_REG_WAVE_MODE), s_restore_mode
+
+ // Restore trap temporaries 4-11, 13 initialized by SPI debug dispatch logic
+ // ttmp SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR)+0x40
+ get_vgpr_size_bytes(s_restore_ttmps_lo, s_restore_size)
+ get_svgpr_size_bytes(s_restore_ttmps_hi)
+ s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_ttmps_hi
+ s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, get_sgpr_size_bytes()
+ s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_buf_rsrc0
+ s_addc_u32 s_restore_ttmps_hi, s_restore_buf_rsrc1, 0x0
+ s_and_b32 s_restore_ttmps_hi, s_restore_ttmps_hi, 0xFFFF
+ s_load_dwordx4 [ttmp4, ttmp5, ttmp6, ttmp7], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x50 scope:SCOPE_SYS
+ s_load_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x60 scope:SCOPE_SYS
+ s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x74 scope:SCOPE_SYS
+ s_wait_idle
+
+ s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS
+ s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
+ s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32
+
+ s_setreg_b32 hwreg(HW_REG_WAVE_STATE_PRIV), s_restore_state_priv // SCC is included, which is changed by previous salu
+
+ // Make barrier and LDS state visible to all waves in the group.
+ // STATE_PRIV.BARRIER_COMPLETE may change after this point.
+ s_barrier_signal -2
+ s_barrier_wait -2
+
+ s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution
+
+L_END_PGM:
+ // Make sure that no wave of the workgroup can exit the trap handler
+ // before the workgroup barrier state is saved.
+ s_barrier_signal -2
+ s_barrier_wait -2
+ s_endpgm_saved
+end
+
+function write_16sgpr_to_v2(s, lane_offset)
+ // Copy into VGPR for later TCP store.
+ for var sgpr_idx = 0; sgpr_idx < 16; sgpr_idx ++
+ v_writelane_b32 v2, s[sgpr_idx], sgpr_idx + lane_offset
+ end
+ valu_sgpr_hazard()
+ s_add_u32 ttmp13, ttmp13, 0x10
+end
+
+function write_12sgpr_to_v2(s)
+ // Copy into VGPR for later TCP store.
+ for var sgpr_idx = 0; sgpr_idx < 12; sgpr_idx ++
+ v_writelane_b32 v2, s[sgpr_idx], sgpr_idx
+ end
+ valu_sgpr_hazard()
+end
+
+function read_hwreg_from_mem(s, s_rsrc, s_mem_offset)
+ s_buffer_load_dword s, s_rsrc, s_mem_offset scope:SCOPE_SYS
+ s_add_u32 s_mem_offset, s_mem_offset, 4
+end
+
+function read_16sgpr_from_mem(s, s_rsrc, s_mem_offset)
+ s_sub_u32 s_mem_offset, s_mem_offset, 4*16
+ s_buffer_load_dwordx16 s, s_rsrc, s_mem_offset scope:SCOPE_SYS
+end
+
+function read_8sgpr_from_mem(s, s_rsrc, s_mem_offset)
+ s_sub_u32 s_mem_offset, s_mem_offset, 4*8
+ s_buffer_load_dwordx8 s, s_rsrc, s_mem_offset scope:SCOPE_SYS
+end
+
+function read_4sgpr_from_mem(s, s_rsrc, s_mem_offset)
+ s_sub_u32 s_mem_offset, s_mem_offset, 4*4
+ s_buffer_load_dwordx4 s, s_rsrc, s_mem_offset scope:SCOPE_SYS
+end
+
+function get_vgpr_size_bytes(s_vgpr_size_byte, s_size)
+ s_getreg_b32 s_vgpr_size_byte, hwreg(HW_REG_WAVE_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE)
+ s_add_u32 s_vgpr_size_byte, s_vgpr_size_byte, 1
+ s_bitcmp1_b32 s_size, S_WAVE_SIZE
+ s_cbranch_scc1 L_ENABLE_SHIFT_W64
+ s_lshl_b32 s_vgpr_size_byte, s_vgpr_size_byte, (2+7) //Number of VGPRs = (vgpr_size + 1) * 4 * 32 * 4 (non-zero value)
+ s_branch L_SHIFT_DONE
+L_ENABLE_SHIFT_W64:
+ s_lshl_b32 s_vgpr_size_byte, s_vgpr_size_byte, (2+8) //Number of VGPRs = (vgpr_size + 1) * 4 * 64 * 4 (non-zero value)
+L_SHIFT_DONE:
+end
+
+function get_svgpr_size_bytes(s_svgpr_size_byte)
+ s_getreg_b32 s_svgpr_size_byte, hwreg(HW_REG_WAVE_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE)
+ s_lshl_b32 s_svgpr_size_byte, s_svgpr_size_byte, (3+7)
+end
+
+function get_sgpr_size_bytes
+ return 512
+end
+
+function get_hwreg_size_bytes
+ return 128
+end
+
+function get_wave_size2(s_reg)
+ s_getreg_b32 s_reg, hwreg(HW_REG_WAVE_STATUS,SQ_WAVE_STATUS_WAVE64_SHIFT,SQ_WAVE_STATUS_WAVE64_SIZE)
+ s_lshl_b32 s_reg, s_reg, S_WAVE_SIZE
+end
+
+function valu_sgpr_hazard
+#if HAVE_VALU_SGPR_HAZARD
+ for var rep = 0; rep < 8; rep ++
+ ds_nop
+ end
+#endif
+end
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
index bb26338204f4..6869e07a2fff 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
@@ -37,17 +37,28 @@
* gc_9_4_3:
* cpp -DASIC_FAMILY=GC_9_4_3 cwsr_trap_handler_gfx9.asm -P -o gc_9_4_3.sp3
* sp3 gc_9_4_3.sp3 -hex gc_9_4_3.hex
+ *
+ * gc_9_5_0:
+ * cpp -DASIC_FAMILY=GC_9_5_0 cwsr_trap_handler_gfx9.asm -P -o gc_9_5_0.sp3
+ * sp3 gc_9_5_0.sp3 -hex gc_9_5_0.hex
*/
#define CHIP_VEGAM 18
#define CHIP_ARCTURUS 23
#define CHIP_ALDEBARAN 25
#define CHIP_GC_9_4_3 26
+#define CHIP_GC_9_5_0 27
var ACK_SQC_STORE = 1 //workaround for suspected SQC store bug causing incorrect stores under concurrency
var SAVE_AFTER_XNACK_ERROR = 1 //workaround for TCP store failure after XNACK error when ALLOW_REPLAY=0, for debugger
var SINGLE_STEP_MISSED_WORKAROUND = (ASIC_FAMILY <= CHIP_ALDEBARAN) //workaround for lost MODE.DEBUG_EN exception when SAVECTX raised
+#if ASIC_FAMILY < CHIP_GC_9_4_3
+#define VMEM_MODIFIERS slc:1 glc:1
+#else
+#define VMEM_MODIFIERS sc0:1 nt:1
+#endif
+
/**************************************************************************/
/* variables */
/**************************************************************************/
@@ -62,7 +73,13 @@ var SQ_WAVE_STATUS_ALLOW_REPLAY_MASK = 0x400000
var SQ_WAVE_STATUS_ECC_ERR_MASK = 0x20000
var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12
+#if ASIC_FAMILY >= CHIP_GC_9_5_0
+var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 11
+var LDS_RESTORE_GRANULARITY_BYTES = 1280
+#else
var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9
+var LDS_RESTORE_GRANULARITY_BYTES = 512
+#endif
var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE = 6
var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE = 3 //FIXME sq.blk still has 4 bits at this time while SQ programming guide has 3 bits
var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT = 24
@@ -430,7 +447,9 @@ L_SAVE:
s_getreg_b32 s_save_m0, hwreg(HW_REG_MODE) //MODE
write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)
-
+ // Clear VSKIP state now that MODE.VSKIP has been saved.
+ // If user shader set it then vector instructions would be skipped.
+ s_setvskip 0,0
/* the first wave in the threadgroup */
s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK // extract fisrt wave bit
@@ -557,12 +576,21 @@ if SAVE_AFTER_XNACK_ERROR
v_lshlrev_b32 v2, 2, v3
L_SAVE_LDS_LOOP_SQC:
+#if ASIC_FAMILY < CHIP_GC_9_5_0
ds_read2_b32 v[0:1], v2 offset0:0 offset1:0x40
s_waitcnt lgkmcnt(0)
-
write_vgprs_to_mem_with_sqc(v0, 2, s_save_buf_rsrc0, s_save_mem_offset)
v_add_u32 v2, 0x200, v2
+#else
+ // gfx950 needs to save in multiple of 256 bytes.
+ ds_read_b32 v0, v2
+ s_waitcnt lgkmcnt(0)
+ write_vgprs_to_mem_with_sqc(v0, 1, s_save_buf_rsrc0, s_save_mem_offset)
+
+ v_add_u32 v2, 0x100, v2
+#endif
+
v_cmp_lt_u32 vcc[0:1], v2, s_save_alloc_size
s_cbranch_vccnz L_SAVE_LDS_LOOP_SQC
@@ -581,11 +609,14 @@ end
L_SAVE_LDS_LOOP_VECTOR:
ds_read_b64 v[0:1], v2 //x =LDS[a], byte address
s_waitcnt lgkmcnt(0)
- buffer_store_dwordx2 v[0:1], v2, s_save_buf_rsrc0, s_save_mem_offset offen:1 glc:1 slc:1
+ buffer_store_dwordx2 v[0:1], v2, s_save_buf_rsrc0, s_save_mem_offset VMEM_MODIFIERS offen:1
// s_waitcnt vmcnt(0)
// v_add_u32 v2, vcc[0:1], v2, v3
v_add_u32 v2, v2, v3
v_cmp_lt_u32 vcc[0:1], v2, s_save_alloc_size
+#if ASIC_FAMILY >= CHIP_GC_9_5_0
+ s_mov_b64 exec, vcc
+#endif
s_cbranch_vccnz L_SAVE_LDS_LOOP_VECTOR
// restore rsrc3
@@ -748,8 +779,13 @@ L_RESTORE:
L_RESTORE_LDS_LOOP:
buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 // first 64DW
buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 offset:256 // second 64DW
- s_add_u32 m0, m0, 256*2 // 128 DW
- s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*2 //mem offset increased by 128DW
+#if ASIC_FAMILY >= CHIP_GC_9_5_0
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 offset:512 // third 64DW
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 offset:768 // forth 64DW
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 offset:1024 // fifth 64DW
+#endif
+ s_add_u32 m0, m0, LDS_RESTORE_GRANULARITY_BYTES // 128/320 DW
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, LDS_RESTORE_GRANULARITY_BYTES //mem offset increased by 128/320 DW
s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0
s_cbranch_scc1 L_RESTORE_LDS_LOOP //LDS restore is complete?
@@ -979,17 +1015,17 @@ L_TCP_STORE_CHECK_DONE:
end
function write_4vgprs_to_mem(s_rsrc, s_mem_offset)
- buffer_store_dword v0, v0, s_rsrc, s_mem_offset slc:1 glc:1
- buffer_store_dword v1, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256
- buffer_store_dword v2, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256*2
- buffer_store_dword v3, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256*3
+ buffer_store_dword v0, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS
+ buffer_store_dword v1, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256
+ buffer_store_dword v2, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256*2
+ buffer_store_dword v3, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256*3
end
function read_4vgprs_from_mem(s_rsrc, s_mem_offset)
- buffer_load_dword v0, v0, s_rsrc, s_mem_offset slc:1 glc:1
- buffer_load_dword v1, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256
- buffer_load_dword v2, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256*2
- buffer_load_dword v3, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256*3
+ buffer_load_dword v0, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS
+ buffer_load_dword v1, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256
+ buffer_load_dword v2, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256*2
+ buffer_load_dword v3, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256*3
s_waitcnt vmcnt(0)
end
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index dfa8c69532d4..a2149afa5803 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -36,7 +36,6 @@
#include <linux/mman.h>
#include <linux/ptrace.h>
#include <linux/dma-buf.h>
-#include <linux/fdtable.h>
#include <linux/processor.h>
#include "kfd_priv.h"
#include "kfd_device_queue_manager.h"
@@ -156,8 +155,8 @@ static int kfd_open(struct inode *inode, struct file *filep)
/* filep now owns the reference returned by kfd_create_process */
filep->private_data = process;
- dev_dbg(kfd_device, "process %d opened, compat mode (32 bit) - %d\n",
- process->pasid, process->is_32bit_user_mode);
+ dev_dbg(kfd_device, "process pid %d opened kfd node, compat mode (32 bit) - %d\n",
+ process->lead_thread->pid, process->is_32bit_user_mode);
return 0;
}
@@ -213,6 +212,11 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
return -EINVAL;
}
+ if (args->ring_size < KFD_MIN_QUEUE_RING_SIZE) {
+ args->ring_size = KFD_MIN_QUEUE_RING_SIZE;
+ pr_debug("Size lower. clamped to KFD_MIN_QUEUE_RING_SIZE");
+ }
+
if (!access_ok((const void __user *) args->read_pointer_address,
sizeof(uint32_t))) {
pr_err("Can't access read pointer\n");
@@ -247,14 +251,15 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
q_properties->priority = args->queue_priority;
q_properties->queue_address = args->ring_base_address;
q_properties->queue_size = args->ring_size;
- q_properties->read_ptr = (uint32_t *) args->read_pointer_address;
- q_properties->write_ptr = (uint32_t *) args->write_pointer_address;
+ q_properties->read_ptr = (void __user *)args->read_pointer_address;
+ q_properties->write_ptr = (void __user *)args->write_pointer_address;
q_properties->eop_ring_buffer_address = args->eop_buffer_address;
q_properties->eop_ring_buffer_size = args->eop_buffer_size;
q_properties->ctx_save_restore_area_address =
args->ctx_save_restore_address;
q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size;
q_properties->ctl_stack_size = args->ctl_stack_size;
+ q_properties->sdma_engine_id = args->sdma_engine_id;
if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE ||
args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
@@ -262,6 +267,8 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
q_properties->type = KFD_QUEUE_TYPE_SDMA;
else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_XGMI)
q_properties->type = KFD_QUEUE_TYPE_SDMA_XGMI;
+ else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_BY_ENG_ID)
+ q_properties->type = KFD_QUEUE_TYPE_SDMA_BY_ENG_ID;
else
return -ENOTSUPP;
@@ -306,7 +313,6 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
struct kfd_process_device *pdd;
struct queue_properties q_properties;
uint32_t doorbell_offset_in_process = 0;
- struct amdgpu_bo *wptr_bo = NULL;
memset(&q_properties, 0, sizeof(struct queue_properties));
@@ -334,6 +340,18 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
goto err_bind_process;
}
+ if (q_properties.type == KFD_QUEUE_TYPE_SDMA_BY_ENG_ID) {
+ int max_sdma_eng_id = kfd_get_num_sdma_engines(dev) +
+ kfd_get_num_xgmi_sdma_engines(dev) - 1;
+
+ if (q_properties.sdma_engine_id > max_sdma_eng_id) {
+ err = -EINVAL;
+ pr_err("sdma_engine_id %i exceeds maximum id of %i\n",
+ q_properties.sdma_engine_id, max_sdma_eng_id);
+ goto err_sdma_engine_id;
+ }
+ }
+
if (!pdd->qpd.proc_doorbells) {
err = kfd_alloc_process_doorbells(dev->kfd, pdd);
if (err) {
@@ -342,48 +360,17 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
}
}
- /* Starting with GFX11, wptr BOs must be mapped to GART for MES to determine work
- * on unmapped queues for usermode queue oversubscription (no aggregated doorbell)
- */
- if (dev->kfd->shared_resources.enable_mes &&
- ((dev->adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK)
- >> AMDGPU_MES_API_VERSION_SHIFT) >= 2) {
- struct amdgpu_bo_va_mapping *wptr_mapping;
- struct amdgpu_vm *wptr_vm;
-
- wptr_vm = drm_priv_to_vm(pdd->drm_priv);
- err = amdgpu_bo_reserve(wptr_vm->root.bo, false);
- if (err)
- goto err_wptr_map_gart;
-
- wptr_mapping = amdgpu_vm_bo_lookup_mapping(
- wptr_vm, args->write_pointer_address >> PAGE_SHIFT);
- amdgpu_bo_unreserve(wptr_vm->root.bo);
- if (!wptr_mapping) {
- pr_err("Failed to lookup wptr bo\n");
- err = -EINVAL;
- goto err_wptr_map_gart;
- }
-
- wptr_bo = wptr_mapping->bo_va->base.bo;
- if (wptr_bo->tbo.base.size > PAGE_SIZE) {
- pr_err("Requested GART mapping for wptr bo larger than one page\n");
- err = -EINVAL;
- goto err_wptr_map_gart;
- }
-
- err = amdgpu_amdkfd_map_gtt_bo_to_gart(wptr_bo);
- if (err) {
- pr_err("Failed to map wptr bo to GART\n");
- goto err_wptr_map_gart;
- }
+ err = kfd_queue_acquire_buffers(pdd, &q_properties);
+ if (err) {
+ pr_debug("failed to acquire user queue buffers\n");
+ goto err_acquire_queue_buf;
}
- pr_debug("Creating queue for PASID 0x%x on gpu 0x%x\n",
- p->pasid,
+ pr_debug("Creating queue for process pid %d on gpu 0x%x\n",
+ p->lead_thread->pid,
dev->id);
- err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id, wptr_bo,
+ err = pqm_create_queue(&p->pqm, dev, &q_properties, &queue_id,
NULL, NULL, NULL, &doorbell_offset_in_process);
if (err != 0)
goto err_create_queue;
@@ -417,9 +404,10 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
return 0;
err_create_queue:
- if (wptr_bo)
- amdgpu_amdkfd_free_gtt_mem(dev->adev, wptr_bo);
-err_wptr_map_gart:
+ kfd_queue_unref_bo_vas(pdd, &q_properties);
+ kfd_queue_release_buffers(pdd, &q_properties);
+err_acquire_queue_buf:
+err_sdma_engine_id:
err_bind_process:
err_pdd:
mutex_unlock(&p->mutex);
@@ -432,9 +420,9 @@ static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p,
int retval;
struct kfd_ioctl_destroy_queue_args *args = data;
- pr_debug("Destroying queue id %d for pasid 0x%x\n",
+ pr_debug("Destroying queue id %d for process pid %d\n",
args->queue_id,
- p->pasid);
+ p->lead_thread->pid);
mutex_lock(&p->mutex);
@@ -478,6 +466,11 @@ static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p,
return -EINVAL;
}
+ if (args->ring_size < KFD_MIN_QUEUE_RING_SIZE) {
+ args->ring_size = KFD_MIN_QUEUE_RING_SIZE;
+ pr_debug("Size lower. clamped to KFD_MIN_QUEUE_RING_SIZE");
+ }
+
properties.queue_address = args->ring_base_address;
properties.queue_size = args->ring_size;
properties.queue_percent = args->queue_percentage & 0xFF;
@@ -485,8 +478,8 @@ static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p,
properties.pm4_target_xcc = (args->queue_percentage >> 8) & 0xFF;
properties.priority = args->queue_priority;
- pr_debug("Updating queue id %d for pasid 0x%x\n",
- args->queue_id, p->pasid);
+ pr_debug("Updating queue id %d for process pid %d\n",
+ args->queue_id, p->lead_thread->pid);
mutex_lock(&p->mutex);
@@ -613,7 +606,8 @@ static int kfd_ioctl_set_memory_policy(struct file *filep,
default_policy,
alternate_policy,
(void __user *)args->alternate_aperture_base,
- args->alternate_aperture_size))
+ args->alternate_aperture_size,
+ args->misc_process_flag))
err = -EINVAL;
out:
@@ -712,7 +706,7 @@ static int kfd_ioctl_get_process_apertures(struct file *filp,
struct kfd_process_device_apertures *pAperture;
int i;
- dev_dbg(kfd_device, "get apertures for PASID 0x%x", p->pasid);
+ dev_dbg(kfd_device, "get apertures for process pid %d", p->lead_thread->pid);
args->num_of_nodes = 0;
@@ -764,7 +758,8 @@ static int kfd_ioctl_get_process_apertures_new(struct file *filp,
int ret;
int i;
- dev_dbg(kfd_device, "get apertures for PASID 0x%x", p->pasid);
+ dev_dbg(kfd_device, "get apertures for process pid %d",
+ p->lead_thread->pid);
if (args->num_of_nodes == 0) {
/* Return number of nodes, so that user space can alloacate
@@ -779,8 +774,8 @@ static int kfd_ioctl_get_process_apertures_new(struct file *filp,
* nodes, but not more than args->num_of_nodes as that is
* the amount of memory allocated by user
*/
- pa = kzalloc((sizeof(struct kfd_process_device_apertures) *
- args->num_of_nodes), GFP_KERNEL);
+ pa = kcalloc(args->num_of_nodes, sizeof(struct kfd_process_device_apertures),
+ GFP_KERNEL);
if (!pa)
return -ENOMEM;
@@ -1139,7 +1134,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
goto err_unlock;
}
offset = dev->adev->rmmio_remap.bus_addr;
- if (!offset) {
+ if (!offset || (PAGE_SIZE > 4096)) {
err = -ENOMEM;
goto err_unlock;
}
@@ -1165,7 +1160,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
if (flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM)
size >>= 1;
- WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + PAGE_ALIGN(size));
+ atomic64_add(PAGE_ALIGN(size), &pdd->vram_usage);
}
mutex_unlock(&p->mutex);
@@ -1236,7 +1231,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
kfd_process_device_remove_obj_handle(
pdd, GET_IDR_HANDLE(args->handle));
- WRITE_ONCE(pdd->vram_usage, pdd->vram_usage - size);
+ atomic64_sub(size, &pdd->vram_usage);
err_unlock:
err_pdd:
@@ -1417,8 +1412,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
err = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
peer_pdd->dev->adev, (struct kgd_mem *)mem, peer_pdd->drm_priv);
if (err) {
- pr_err("Failed to unmap from gpu %d/%d\n",
- i, args->n_devices);
+ pr_debug("Failed to unmap from gpu %d/%d\n", i, args->n_devices);
goto unmap_memory_from_gpu_failed;
}
args->n_success = i+1;
@@ -1523,7 +1517,7 @@ static int kfd_ioctl_get_dmabuf_info(struct file *filep,
/* Find a KFD GPU device that supports the get_dmabuf_info query */
for (i = 0; kfd_topology_enum_kfd_devices(i, &dev) == 0; i++)
- if (dev)
+ if (dev && !kfd_devcgroup_check_permission(dev))
break;
if (!dev)
return -EINVAL;
@@ -1545,7 +1539,7 @@ static int kfd_ioctl_get_dmabuf_info(struct file *filep,
if (xcp_id >= 0)
args->gpu_id = dmabuf_adev->kfd.dev->nodes[xcp_id]->id;
else
- args->gpu_id = dmabuf_adev->kfd.dev->nodes[0]->id;
+ args->gpu_id = dev->id;
args->flags = flags;
/* Copy metadata buffer to user mode */
@@ -1852,7 +1846,8 @@ static uint32_t get_process_num_bos(struct kfd_process *p)
}
static int criu_get_prime_handle(struct kgd_mem *mem,
- int flags, u32 *shared_fd)
+ int flags, u32 *shared_fd,
+ struct file **file)
{
struct dma_buf *dmabuf;
int ret;
@@ -1863,13 +1858,14 @@ static int criu_get_prime_handle(struct kgd_mem *mem,
return ret;
}
- ret = dma_buf_fd(dmabuf, flags);
+ ret = get_unused_fd_flags(flags);
if (ret < 0) {
pr_err("dmabuf create fd failed, ret:%d\n", ret);
goto out_free_dmabuf;
}
*shared_fd = ret;
+ *file = dmabuf->file;
return 0;
out_free_dmabuf:
@@ -1877,6 +1873,25 @@ out_free_dmabuf:
return ret;
}
+static void commit_files(struct file **files,
+ struct kfd_criu_bo_bucket *bo_buckets,
+ unsigned int count,
+ int err)
+{
+ while (count--) {
+ struct file *file = files[count];
+
+ if (!file)
+ continue;
+ if (err) {
+ fput(file);
+ put_unused_fd(bo_buckets[count].dmabuf_fd);
+ } else {
+ fd_install(bo_buckets[count].dmabuf_fd, file);
+ }
+ }
+}
+
static int criu_checkpoint_bos(struct kfd_process *p,
uint32_t num_bos,
uint8_t __user *user_bos,
@@ -1885,6 +1900,7 @@ static int criu_checkpoint_bos(struct kfd_process *p,
{
struct kfd_criu_bo_bucket *bo_buckets;
struct kfd_criu_bo_priv_data *bo_privs;
+ struct file **files = NULL;
int ret = 0, pdd_index, bo_index = 0, id;
void *mem;
@@ -1898,6 +1914,12 @@ static int criu_checkpoint_bos(struct kfd_process *p,
goto exit;
}
+ files = kvzalloc(num_bos * sizeof(struct file *), GFP_KERNEL);
+ if (!files) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+
for (pdd_index = 0; pdd_index < p->n_pdds; pdd_index++) {
struct kfd_process_device *pdd = p->pdds[pdd_index];
struct amdgpu_bo *dumper_bo;
@@ -1908,11 +1930,6 @@ static int criu_checkpoint_bos(struct kfd_process *p,
struct kfd_criu_bo_priv_data *bo_priv;
int i, dev_idx = 0;
- if (!mem) {
- ret = -ENOMEM;
- goto exit;
- }
-
kgd_mem = (struct kgd_mem *)mem;
dumper_bo = kgd_mem->bo;
@@ -1945,7 +1962,7 @@ static int criu_checkpoint_bos(struct kfd_process *p,
ret = criu_get_prime_handle(kgd_mem,
bo_bucket->alloc_flags &
KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? DRM_RDWR : 0,
- &bo_bucket->dmabuf_fd);
+ &bo_bucket->dmabuf_fd, &files[bo_index]);
if (ret)
goto exit;
} else {
@@ -1963,7 +1980,7 @@ static int criu_checkpoint_bos(struct kfd_process *p,
bo_bucket->offset = amdgpu_bo_mmap_offset(dumper_bo);
for (i = 0; i < p->n_pdds; i++) {
- if (amdgpu_amdkfd_bo_mapped_to_dev(p->pdds[i]->dev->adev, kgd_mem))
+ if (amdgpu_amdkfd_bo_mapped_to_dev(p->pdds[i]->drm_priv, kgd_mem))
bo_priv->mapped_gpuids[dev_idx++] = p->pdds[i]->user_gpu_id;
}
@@ -1996,12 +2013,8 @@ static int criu_checkpoint_bos(struct kfd_process *p,
*priv_offset += num_bos * sizeof(*bo_privs);
exit:
- while (ret && bo_index--) {
- if (bo_buckets[bo_index].alloc_flags
- & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT))
- close_fd(bo_buckets[bo_index].dmabuf_fd);
- }
-
+ commit_files(files, bo_buckets, bo_index, ret);
+ kvfree(files);
kvfree(bo_buckets);
kvfree(bo_privs);
return ret;
@@ -2026,9 +2039,7 @@ static int criu_get_process_object_info(struct kfd_process *p,
num_events = kfd_get_num_events(p);
- ret = svm_range_get_info(p, &num_svm_ranges, &svm_priv_data_size);
- if (ret)
- return ret;
+ svm_range_get_info(p, &num_svm_ranges, &svm_priv_data_size);
*num_objects = num_queues + num_events + num_svm_ranges;
@@ -2307,7 +2318,7 @@ static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd,
return -EINVAL;
}
offset = pdd->dev->adev->rmmio_remap.bus_addr;
- if (!offset) {
+ if (!offset || (PAGE_SIZE > 4096)) {
pr_err("amdgpu_amdkfd_get_mmio_remap_phys_addr failed\n");
return -ENOMEM;
}
@@ -2346,14 +2357,15 @@ static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd,
} else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
bo_bucket->restored_offset = offset;
/* Update the VRAM usage count */
- WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + bo_bucket->size);
+ atomic64_add(bo_bucket->size, &pdd->vram_usage);
}
return 0;
}
static int criu_restore_bo(struct kfd_process *p,
struct kfd_criu_bo_bucket *bo_bucket,
- struct kfd_criu_bo_priv_data *bo_priv)
+ struct kfd_criu_bo_priv_data *bo_priv,
+ struct file **file)
{
struct kfd_process_device *pdd;
struct kgd_mem *kgd_mem;
@@ -2405,7 +2417,7 @@ static int criu_restore_bo(struct kfd_process *p,
if (bo_bucket->alloc_flags
& (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) {
ret = criu_get_prime_handle(kgd_mem, DRM_RDWR,
- &bo_bucket->dmabuf_fd);
+ &bo_bucket->dmabuf_fd, file);
if (ret)
return ret;
} else {
@@ -2422,6 +2434,7 @@ static int criu_restore_bos(struct kfd_process *p,
{
struct kfd_criu_bo_bucket *bo_buckets = NULL;
struct kfd_criu_bo_priv_data *bo_privs = NULL;
+ struct file **files = NULL;
int ret = 0;
uint32_t i = 0;
@@ -2435,6 +2448,12 @@ static int criu_restore_bos(struct kfd_process *p,
if (!bo_buckets)
return -ENOMEM;
+ files = kvzalloc(args->num_bos * sizeof(struct file *), GFP_KERNEL);
+ if (!files) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+
ret = copy_from_user(bo_buckets, (void __user *)args->bos,
args->num_bos * sizeof(*bo_buckets));
if (ret) {
@@ -2460,7 +2479,7 @@ static int criu_restore_bos(struct kfd_process *p,
/* Create and map new BOs */
for (; i < args->num_bos; i++) {
- ret = criu_restore_bo(p, &bo_buckets[i], &bo_privs[i]);
+ ret = criu_restore_bo(p, &bo_buckets[i], &bo_privs[i], &files[i]);
if (ret) {
pr_debug("Failed to restore BO[%d] ret%d\n", i, ret);
goto exit;
@@ -2475,11 +2494,8 @@ static int criu_restore_bos(struct kfd_process *p,
ret = -EFAULT;
exit:
- while (ret && i--) {
- if (bo_buckets[i].alloc_flags
- & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT))
- close_fd(bo_buckets[i].dmabuf_fd);
- }
+ commit_files(files, bo_buckets, i, ret);
+ kvfree(files);
kvfree(bo_buckets);
kvfree(bo_privs);
return ret;
@@ -3349,6 +3365,9 @@ static int kfd_mmio_mmap(struct kfd_node *dev, struct kfd_process *process,
if (vma->vm_end - vma->vm_start != PAGE_SIZE)
return -EINVAL;
+ if (PAGE_SIZE > 4096)
+ return -EINVAL;
+
address = dev->adev->rmmio_remap.bus_addr;
vm_flags_set(vma, VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
@@ -3356,12 +3375,12 @@ static int kfd_mmio_mmap(struct kfd_node *dev, struct kfd_process *process,
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
- pr_debug("pasid 0x%x mapping mmio page\n"
+ pr_debug("process pid %d mapping mmio page\n"
" target user address == 0x%08llX\n"
" physical address == 0x%08llX\n"
" vm_flags == 0x%04lX\n"
" size == 0x%04lX\n",
- process->pasid, (unsigned long long) vma->vm_start,
+ process->lead_thread->pid, (unsigned long long) vma->vm_start,
address, vma->vm_flags, PAGE_SIZE);
return io_remap_pfn_range(vma,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 7f2ae0d15d4a..4a7180b46b71 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -28,6 +28,7 @@
#include "kfd_topology.h"
#include "amdgpu.h"
#include "amdgpu_amdkfd.h"
+#include "amdgpu_xgmi.h"
/* GPU Processor ID base for dGPUs for which VCRAT needs to be created.
* GPU processor ID are expressed with Bit[31]=1.
@@ -1422,6 +1423,7 @@ err:
static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
+ bool cache_line_size_missing,
struct kfd_gpu_cache_info *pcache_info)
{
struct amdgpu_device *adev = kdev->adev;
@@ -1434,7 +1436,10 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE);
- pcache_info[0].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2;
+ pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2;
+ pcache_info[i].cache_line_size = adev->gfx.config.gc_tcp_cache_line_size;
+ if (cache_line_size_missing && !pcache_info[i].cache_line_size)
+ pcache_info[i].cache_line_size = 128;
i++;
}
/* Scalar L1 Instruction Cache per SQC */
@@ -1446,6 +1451,9 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
CRAT_CACHE_FLAGS_INST_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE);
pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2;
+ pcache_info[i].cache_line_size = adev->gfx.config.gc_instruction_cache_line_size;
+ if (cache_line_size_missing && !pcache_info[i].cache_line_size)
+ pcache_info[i].cache_line_size = 128;
i++;
}
/* Scalar L1 Data Cache per SQC */
@@ -1456,6 +1464,9 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE);
pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2;
+ pcache_info[i].cache_line_size = adev->gfx.config.gc_scalar_data_cache_line_size;
+ if (cache_line_size_missing && !pcache_info[i].cache_line_size)
+ pcache_info[i].cache_line_size = 64;
i++;
}
/* GL1 Data Cache per SA */
@@ -1468,6 +1479,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE);
pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
+ if (cache_line_size_missing)
+ pcache_info[i].cache_line_size = 128;
i++;
}
/* L2 Data Cache per GPU (Total Tex Cache) */
@@ -1478,6 +1491,9 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE);
pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
+ pcache_info[i].cache_line_size = adev->gfx.config.gc_tcc_cache_line_size;
+ if (cache_line_size_missing && !pcache_info[i].cache_line_size)
+ pcache_info[i].cache_line_size = 128;
i++;
}
/* L3 Data Cache per GPU */
@@ -1488,6 +1504,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE);
pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
+ pcache_info[i].cache_line_size = 64;
i++;
}
return i;
@@ -1503,6 +1520,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config_v2(struct kfd_dev *kdev,
if (adev->gfx.config.gc_tcp_size_per_cu) {
pcache_info[i].cache_size = adev->gfx.config.gc_tcp_size_per_cu;
pcache_info[i].cache_level = 1;
+ /* Cacheline size not available in IP discovery for gc943,gc944 */
+ pcache_info[i].cache_line_size = 128;
pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE);
@@ -1514,6 +1533,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config_v2(struct kfd_dev *kdev,
pcache_info[i].cache_size =
adev->gfx.config.gc_l1_instruction_cache_size_per_sqc;
pcache_info[i].cache_level = 1;
+ pcache_info[i].cache_line_size = 64;
pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_INST_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE);
@@ -1524,6 +1544,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config_v2(struct kfd_dev *kdev,
if (adev->gfx.config.gc_l1_data_cache_size_per_sqc) {
pcache_info[i].cache_size = adev->gfx.config.gc_l1_data_cache_size_per_sqc;
pcache_info[i].cache_level = 1;
+ pcache_info[i].cache_line_size = 64;
pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE);
@@ -1534,6 +1555,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config_v2(struct kfd_dev *kdev,
if (adev->gfx.config.gc_tcc_size) {
pcache_info[i].cache_size = adev->gfx.config.gc_tcc_size;
pcache_info[i].cache_level = 2;
+ pcache_info[i].cache_line_size = 128;
pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE);
@@ -1544,6 +1566,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config_v2(struct kfd_dev *kdev,
if (adev->gmc.mall_size) {
pcache_info[i].cache_size = adev->gmc.mall_size / 1024;
pcache_info[i].cache_level = 3;
+ pcache_info[i].cache_line_size = 64;
pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE);
@@ -1556,6 +1579,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config_v2(struct kfd_dev *kdev,
int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pcache_info)
{
int num_of_cache_types = 0;
+ bool cache_line_size_missing = false;
switch (kdev->adev->asic_type) {
case CHIP_KAVERI:
@@ -1614,6 +1638,8 @@ int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pc
num_of_cache_types = ARRAY_SIZE(aldebaran_cache_info);
break;
case IP_VERSION(9, 4, 3):
+ case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
num_of_cache_types =
kfd_fill_gpu_cache_info_from_gfx_config_v2(kdev->kfd,
*pcache_info);
@@ -1677,8 +1703,19 @@ int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pc
case IP_VERSION(11, 0, 4):
case IP_VERSION(11, 5, 0):
case IP_VERSION(11, 5, 1):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
+ /* Cacheline size not available in IP discovery for gc11.
+ * kfd_fill_gpu_cache_info_from_gfx_config to hard code it
+ */
+ cache_line_size_missing = true;
+ fallthrough;
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
num_of_cache_types =
- kfd_fill_gpu_cache_info_from_gfx_config(kdev->kfd, *pcache_info);
+ kfd_fill_gpu_cache_info_from_gfx_config(kdev->kfd,
+ cache_line_size_missing,
+ *pcache_info);
break;
default:
*pcache_info = dummy_cache_info;
@@ -2096,9 +2133,6 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
bool ext_cpu = KFD_GC_VERSION(kdev) != IP_VERSION(9, 4, 3);
int mem_bw = 819200, weight = ext_cpu ? KFD_CRAT_XGMI_WEIGHT :
KFD_CRAT_INTRA_SOCKET_WEIGHT;
- uint32_t bandwidth = ext_cpu ? amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(
- kdev->adev, NULL, true) : mem_bw;
-
/*
* with host gpu xgmi link, host can access gpu memory whether
* or not pcie bar type is large, so always create bidirectional
@@ -2107,8 +2141,16 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL;
sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI;
sub_type_hdr->weight_xgmi = weight;
- sub_type_hdr->minimum_bandwidth_mbs = bandwidth;
- sub_type_hdr->maximum_bandwidth_mbs = bandwidth;
+ if (ext_cpu) {
+ amdgpu_xgmi_get_bandwidth(kdev->adev, NULL,
+ AMDGPU_XGMI_BW_MODE_PER_LINK,
+ AMDGPU_XGMI_BW_UNIT_MBYTES,
+ &sub_type_hdr->minimum_bandwidth_mbs,
+ &sub_type_hdr->maximum_bandwidth_mbs);
+ } else {
+ sub_type_hdr->minimum_bandwidth_mbs = mem_bw;
+ sub_type_hdr->maximum_bandwidth_mbs = mem_bw;
+ }
} else {
sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_PCIEXPRESS;
sub_type_hdr->minimum_bandwidth_mbs =
@@ -2161,12 +2203,12 @@ static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size,
if (use_ta_info) {
sub_type_hdr->weight_xgmi = KFD_CRAT_XGMI_WEIGHT *
- amdgpu_amdkfd_get_xgmi_hops_count(kdev->adev, peer_kdev->adev);
- sub_type_hdr->maximum_bandwidth_mbs =
- amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev,
- peer_kdev->adev, false);
- sub_type_hdr->minimum_bandwidth_mbs = sub_type_hdr->maximum_bandwidth_mbs ?
- amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev, NULL, true) : 0;
+ amdgpu_xgmi_get_hops_count(kdev->adev, peer_kdev->adev);
+ amdgpu_xgmi_get_bandwidth(kdev->adev, peer_kdev->adev,
+ AMDGPU_XGMI_BW_MODE_PER_PEER,
+ AMDGPU_XGMI_BW_UNIT_MBYTES,
+ &sub_type_hdr->minimum_bandwidth_mbs,
+ &sub_type_hdr->maximum_bandwidth_mbs);
} else {
bool is_single_hop = kdev->kfd == peer_kdev->kfd;
int weight = is_single_hop ? KFD_CRAT_INTRA_SOCKET_WEIGHT :
@@ -2210,9 +2252,6 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
* Modify length and total_entries as subunits are added.
*/
avail_size -= sizeof(struct crat_header);
- if (avail_size < 0)
- return -ENOMEM;
-
memset(crat_table, 0, sizeof(struct crat_header));
memcpy(&crat_table->signature, CRAT_SIGNATURE,
@@ -2226,9 +2265,6 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
* First fill in the sub type header and then sub type data
*/
avail_size -= sizeof(struct crat_subtype_computeunit);
- if (avail_size < 0)
- return -ENOMEM;
-
sub_type_hdr = (struct crat_subtype_generic *)(crat_table + 1);
memset(sub_type_hdr, 0, sizeof(struct crat_subtype_computeunit));
@@ -2325,6 +2361,8 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
continue;
if (peer_dev->gpu->kfd->hive_id != kdev->kfd->hive_id)
continue;
+ if (!amdgpu_xgmi_get_is_sharing_enabled(kdev->adev, peer_dev->gpu->adev))
+ continue;
sub_type_hdr = (typeof(sub_type_hdr))(
(char *)sub_type_hdr +
sizeof(struct crat_subtype_iolink));
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
index 300634b9f668..a8ca7ecb6d27 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
@@ -42,8 +42,6 @@
#define CRAT_OEMTABLEID_LENGTH 8
#define CRAT_RESERVED_LENGTH 6
-#define CRAT_OEMID_64BIT_MASK ((1ULL << (CRAT_OEMID_LENGTH * 8)) - 1)
-
/* Compute Unit flags */
#define COMPUTE_UNIT_CPU (1 << 0) /* Create Virtual CRAT for CPU */
#define COMPUTE_UNIT_GPU (1 << 1) /* Create Virtual CRAT for GPU */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
index d889e3545120..ba99e0f258ae 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
@@ -25,6 +25,7 @@
#include "kfd_topology.h"
#include <linux/file.h>
#include <uapi/linux/kfd_ioctl.h>
+#include <uapi/linux/kfd_sysfs.h>
#define MAX_WATCH_ADDRESSES 4
@@ -103,7 +104,8 @@ void debug_event_write_work_handler(struct work_struct *work)
struct kfd_process,
debug_event_workarea);
- kernel_write(process->dbg_ev_file, &write_data, 1, &pos);
+ if (process->debug_trap_enabled && process->dbg_ev_file)
+ kernel_write(process->dbg_ev_file, &write_data, 1, &pos);
}
/* update process/device/queue exception status, write to descriptor
@@ -202,11 +204,12 @@ bool kfd_set_dbg_ev_from_interrupt(struct kfd_node *dev,
size_t exception_data_size)
{
struct kfd_process *p;
+ struct kfd_process_device *pdd = NULL;
bool signaled_to_debugger_or_runtime = false;
- p = kfd_lookup_process_by_pasid(pasid);
+ p = kfd_lookup_process_by_pasid(pasid, &pdd);
- if (!p)
+ if (!pdd)
return false;
if (!kfd_dbg_ev_raise(trap_mask, p, dev, doorbell_id, true,
@@ -236,9 +239,8 @@ bool kfd_set_dbg_ev_from_interrupt(struct kfd_node *dev,
mutex_unlock(&p->mutex);
} else if (trap_mask & KFD_EC_MASK(EC_DEVICE_MEMORY_VIOLATION)) {
- kfd_dqm_evict_pasid(dev->dqm, p->pasid);
- kfd_signal_vm_fault_event(dev, p->pasid, NULL,
- exception_data);
+ kfd_evict_process_device(pdd);
+ kfd_signal_vm_fault_event(pdd, NULL, exception_data);
signaled_to_debugger_or_runtime = true;
}
@@ -274,8 +276,8 @@ int kfd_dbg_send_exception_to_runtime(struct kfd_process *p,
data = (struct kfd_hsa_memory_exception_data *)
pdd->vm_fault_exc_data;
- kfd_dqm_evict_pasid(pdd->dev->dqm, p->pasid);
- kfd_signal_vm_fault_event(pdd->dev, p->pasid, NULL, data);
+ kfd_evict_process_device(pdd);
+ kfd_signal_vm_fault_event(pdd, NULL, data);
error_reason &= ~KFD_EC_MASK(EC_DEVICE_MEMORY_VIOLATION);
}
@@ -348,10 +350,27 @@ int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd, bool sq_trap_en)
{
uint32_t spi_dbg_cntl = pdd->spi_dbg_override | pdd->spi_dbg_launch_mode;
uint32_t flags = pdd->process->dbg_flags;
+ struct amdgpu_device *adev = pdd->dev->adev;
+ int r;
if (!kfd_dbg_is_per_vmid_supported(pdd->dev))
return 0;
+ if (!pdd->proc_ctx_cpu_ptr) {
+ r = amdgpu_amdkfd_alloc_gtt_mem(adev,
+ AMDGPU_MES_PROC_CTX_SIZE,
+ &pdd->proc_ctx_bo,
+ &pdd->proc_ctx_gpu_addr,
+ &pdd->proc_ctx_cpu_ptr,
+ false);
+ if (r) {
+ dev_err(adev->dev,
+ "failed to allocate process context bo\n");
+ return r;
+ }
+ memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
+ }
+
return amdgpu_mes_set_shader_debugger(pdd->dev->adev, pdd->proc_ctx_gpu_addr, spi_dbg_cntl,
pdd->watch_points, flags, sq_trap_en);
}
@@ -363,47 +382,47 @@ static int kfd_dbg_get_dev_watch_id(struct kfd_process_device *pdd, int *watch_i
*watch_id = KFD_DEBUGGER_INVALID_WATCH_POINT_ID;
- spin_lock(&pdd->dev->kfd->watch_points_lock);
+ spin_lock(&pdd->dev->watch_points_lock);
for (i = 0; i < MAX_WATCH_ADDRESSES; i++) {
/* device watchpoint in use so skip */
- if ((pdd->dev->kfd->alloc_watch_ids >> i) & 0x1)
+ if ((pdd->dev->alloc_watch_ids >> i) & 0x1)
continue;
pdd->alloc_watch_ids |= 0x1 << i;
- pdd->dev->kfd->alloc_watch_ids |= 0x1 << i;
+ pdd->dev->alloc_watch_ids |= 0x1 << i;
*watch_id = i;
- spin_unlock(&pdd->dev->kfd->watch_points_lock);
+ spin_unlock(&pdd->dev->watch_points_lock);
return 0;
}
- spin_unlock(&pdd->dev->kfd->watch_points_lock);
+ spin_unlock(&pdd->dev->watch_points_lock);
return -ENOMEM;
}
static void kfd_dbg_clear_dev_watch_id(struct kfd_process_device *pdd, int watch_id)
{
- spin_lock(&pdd->dev->kfd->watch_points_lock);
+ spin_lock(&pdd->dev->watch_points_lock);
/* process owns device watch point so safe to clear */
if ((pdd->alloc_watch_ids >> watch_id) & 0x1) {
pdd->alloc_watch_ids &= ~(0x1 << watch_id);
- pdd->dev->kfd->alloc_watch_ids &= ~(0x1 << watch_id);
+ pdd->dev->alloc_watch_ids &= ~(0x1 << watch_id);
}
- spin_unlock(&pdd->dev->kfd->watch_points_lock);
+ spin_unlock(&pdd->dev->watch_points_lock);
}
static bool kfd_dbg_owns_dev_watch_id(struct kfd_process_device *pdd, int watch_id)
{
bool owns_watch_id = false;
- spin_lock(&pdd->dev->kfd->watch_points_lock);
+ spin_lock(&pdd->dev->watch_points_lock);
owns_watch_id = watch_id < MAX_WATCH_ADDRESSES &&
((pdd->alloc_watch_ids >> watch_id) & 0x1);
- spin_unlock(&pdd->dev->kfd->watch_points_lock);
+ spin_unlock(&pdd->dev->watch_points_lock);
return owns_watch_id;
}
@@ -497,14 +516,24 @@ int kfd_dbg_trap_set_flags(struct kfd_process *target, uint32_t *flags)
int i, r = 0, rewind_count = 0;
for (i = 0; i < target->n_pdds; i++) {
- if (!kfd_dbg_is_per_vmid_supported(target->pdds[i]->dev) &&
+ struct kfd_topology_device *topo_dev =
+ kfd_topology_device_by_id(target->pdds[i]->dev->id);
+ uint32_t caps = topo_dev->node_props.capability;
+
+ if (!(caps & HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED) &&
(*flags & KFD_DBG_TRAP_FLAG_SINGLE_MEM_OP)) {
*flags = prev_flags;
return -EACCES;
}
+
+ if (!(caps & HSA_CAP_TRAP_DEBUG_PRECISE_ALU_OPERATIONS_SUPPORTED) &&
+ (*flags & KFD_DBG_TRAP_FLAG_SINGLE_ALU_OP)) {
+ *flags = prev_flags;
+ return -EACCES;
+ }
}
- target->dbg_flags = *flags & KFD_DBG_TRAP_FLAG_SINGLE_MEM_OP;
+ target->dbg_flags = *flags;
*flags = prev_flags;
for (i = 0; i < target->n_pdds; i++) {
struct kfd_process_device *pdd = target->pdds[i];
@@ -645,6 +674,7 @@ int kfd_dbg_trap_disable(struct kfd_process *target)
else if (target->runtime_info.runtime_state != DEBUG_RUNTIME_STATE_DISABLED)
target->runtime_info.runtime_state = DEBUG_RUNTIME_STATE_ENABLED;
+ cancel_work_sync(&target->debug_event_workarea);
fput(target->dbg_ev_file);
target->dbg_ev_file = NULL;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
index fd0ff64d4184..27aa1a5b120f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
@@ -78,6 +78,8 @@ static inline bool kfd_dbg_is_per_vmid_supported(struct kfd_node *dev)
{
return (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2) ||
KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) ||
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4) ||
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0) ||
KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0));
}
@@ -134,6 +136,7 @@ static inline bool kfd_dbg_has_ttmps_always_setup(struct kfd_node *dev)
KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 2)) ||
(KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0) &&
KFD_GC_VERSION(dev) < IP_VERSION(12, 0, 0) &&
- (dev->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 70);
+ (dev->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 70) ||
+ (KFD_GC_VERSION(dev) >= IP_VERSION(12, 0, 0));
}
#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c
index 4a5a0a4e00f2..9bde2c64540f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c
@@ -27,6 +27,16 @@
#include "kfd_priv.h"
static struct dentry *debugfs_root;
+static struct dentry *debugfs_proc;
+static struct list_head procs;
+
+struct debugfs_proc_entry {
+ struct list_head list;
+ struct dentry *proc_dentry;
+ pid_t pid;
+};
+
+#define MAX_DEBUGFS_FILENAME_LEN 32
static int kfd_debugfs_open(struct inode *inode, struct file *file)
{
@@ -92,6 +102,8 @@ static const struct file_operations kfd_debugfs_hang_hws_fops = {
void kfd_debugfs_init(void)
{
debugfs_root = debugfs_create_dir("kfd", NULL);
+ debugfs_proc = debugfs_create_dir("proc", debugfs_root);
+ INIT_LIST_HEAD(&procs);
debugfs_create_file("mqds", S_IFREG | 0444, debugfs_root,
kfd_debugfs_mqds_by_process, &kfd_debugfs_fops);
@@ -107,5 +119,69 @@ void kfd_debugfs_init(void)
void kfd_debugfs_fini(void)
{
+ debugfs_remove_recursive(debugfs_proc);
debugfs_remove_recursive(debugfs_root);
}
+
+static ssize_t kfd_debugfs_pasid_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct kfd_process_device *pdd = file_inode(file)->i_private;
+ char tmp[32];
+ int len;
+
+ len = snprintf(tmp, sizeof(tmp), "%u\n", pdd->pasid);
+
+ return simple_read_from_buffer(buf, count, ppos, tmp, len);
+}
+
+static const struct file_operations kfd_debugfs_pasid_fops = {
+ .owner = THIS_MODULE,
+ .read = kfd_debugfs_pasid_read,
+};
+
+void kfd_debugfs_add_process(struct kfd_process *p)
+{
+ int i;
+ char name[MAX_DEBUGFS_FILENAME_LEN];
+ struct debugfs_proc_entry *entry;
+
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ return;
+
+ list_add(&entry->list, &procs);
+ entry->pid = p->lead_thread->pid;
+ snprintf(name, MAX_DEBUGFS_FILENAME_LEN, "%d",
+ (int)entry->pid);
+ entry->proc_dentry = debugfs_create_dir(name, debugfs_proc);
+
+ /* Create debugfs files for each GPU:
+ * - proc/<pid>/pasid_<gpuid>
+ */
+ for (i = 0; i < p->n_pdds; i++) {
+ struct kfd_process_device *pdd = p->pdds[i];
+
+ snprintf(name, MAX_DEBUGFS_FILENAME_LEN, "pasid_%u",
+ pdd->dev->id);
+ debugfs_create_file((const char *)name, S_IFREG | 0444,
+ entry->proc_dentry, pdd,
+ &kfd_debugfs_pasid_fops);
+ }
+}
+
+void kfd_debugfs_remove_process(struct kfd_process *p)
+{
+ struct debugfs_proc_entry *entry, *next;
+
+ mutex_lock(&kfd_processes_mutex);
+ list_for_each_entry_safe(entry, next, &procs, list) {
+ if (entry->pid != p->lead_thread->pid)
+ continue;
+
+ debugfs_remove_recursive(entry->proc_dentry);
+ list_del(&entry->list);
+ kfree(entry);
+ }
+ mutex_unlock(&kfd_processes_mutex);
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 041ec3de55e7..bf0854bd5555 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -56,6 +56,7 @@ extern const struct kfd2kgd_calls gc_9_4_3_kfd2kgd;
extern const struct kfd2kgd_calls gfx_v10_kfd2kgd;
extern const struct kfd2kgd_calls gfx_v10_3_kfd2kgd;
extern const struct kfd2kgd_calls gfx_v11_kfd2kgd;
+extern const struct kfd2kgd_calls gfx_v12_kfd2kgd;
static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
unsigned int chunk_size);
@@ -83,6 +84,8 @@ static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd)
case IP_VERSION(4, 2, 2):/* ARCTURUS */
case IP_VERSION(4, 4, 0):/* ALDEBARAN */
case IP_VERSION(4, 4, 2):
+ case IP_VERSION(4, 4, 5):
+ case IP_VERSION(4, 4, 4):
case IP_VERSION(5, 0, 0):/* NAVI10 */
case IP_VERSION(5, 0, 1):/* CYAN_SKILLFISH */
case IP_VERSION(5, 0, 2):/* NAVI14 */
@@ -97,6 +100,10 @@ static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd)
case IP_VERSION(6, 0, 3):
case IP_VERSION(6, 1, 0):
case IP_VERSION(6, 1, 1):
+ case IP_VERSION(6, 1, 2):
+ case IP_VERSION(6, 1, 3):
+ case IP_VERSION(7, 0, 0):
+ case IP_VERSION(7, 0, 1):
kfd->device_info.num_sdma_queues_per_engine = 8;
break;
default:
@@ -115,6 +122,10 @@ static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd)
case IP_VERSION(6, 0, 3):
case IP_VERSION(6, 1, 0):
case IP_VERSION(6, 1, 1):
+ case IP_VERSION(6, 1, 2):
+ case IP_VERSION(6, 1, 3):
+ case IP_VERSION(7, 0, 0):
+ case IP_VERSION(7, 0, 1):
/* Reserve 1 for paging and 1 for gfx */
kfd->device_info.num_reserved_sdma_queues_per_engine = 2;
/* BIT(0)=engine-0 queue-0; BIT(1)=engine-1 queue-0; BIT(2)=engine-0 queue-1; ... */
@@ -143,6 +154,8 @@ static void kfd_device_info_set_event_interrupt_class(struct kfd_dev *kfd)
kfd->device_info.event_interrupt_class = &event_interrupt_class_v9;
break;
case IP_VERSION(9, 4, 3): /* GC 9.4.3 */
+ case IP_VERSION(9, 4, 4): /* GC 9.4.4 */
+ case IP_VERSION(9, 5, 0): /* GC 9.5.0 */
kfd->device_info.event_interrupt_class =
&event_interrupt_class_v9_4_3;
break;
@@ -168,6 +181,13 @@ static void kfd_device_info_set_event_interrupt_class(struct kfd_dev *kfd)
case IP_VERSION(11, 0, 4):
case IP_VERSION(11, 5, 0):
case IP_VERSION(11, 5, 1):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
+ kfd->device_info.event_interrupt_class = &event_interrupt_class_v11;
+ break;
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ /* GFX12_TODO: Change to v12 version. */
kfd->device_info.event_interrupt_class = &event_interrupt_class_v11;
break;
default:
@@ -220,6 +240,11 @@ static void kfd_device_info_init(struct kfd_dev *kfd,
*/
kfd->device_info.needs_pci_atomics = true;
kfd->device_info.no_atomic_fw_version = kfd->adev->gfx.rs64_enable ? 509 : 0;
+ } else if (gc_version < IP_VERSION(13, 0, 0)) {
+ kfd->device_info.needs_pci_atomics = true;
+ kfd->device_info.no_atomic_fw_version = 2090;
+ } else {
+ kfd->device_info.needs_pci_atomics = true;
}
} else {
kfd->device_info.doorbell_size = 4;
@@ -327,9 +352,12 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
f2g = &aldebaran_kfd2kgd;
break;
case IP_VERSION(9, 4, 3):
- gfx_target_version = adev->rev_id >= 1 ? 90402
- : adev->flags & AMD_IS_APU ? 90400
- : 90401;
+ case IP_VERSION(9, 4, 4):
+ gfx_target_version = 90402;
+ f2g = &gc_9_4_3_kfd2kgd;
+ break;
+ case IP_VERSION(9, 5, 0):
+ gfx_target_version = 90500;
f2g = &gc_9_4_3_kfd2kgd;
break;
/* Navi10 */
@@ -408,15 +436,8 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
f2g = &gfx_v11_kfd2kgd;
break;
case IP_VERSION(11, 0, 3):
- if ((adev->pdev->device == 0x7460 &&
- adev->pdev->revision == 0x00) ||
- (adev->pdev->device == 0x7461 &&
- adev->pdev->revision == 0x00))
- /* Note: Compiler version is 11.0.5 while HW version is 11.0.3 */
- gfx_target_version = 110005;
- else
- /* Note: Compiler version is 11.0.1 while HW version is 11.0.3 */
- gfx_target_version = 110001;
+ /* Note: Compiler version is 11.0.1 while HW version is 11.0.3 */
+ gfx_target_version = 110001;
f2g = &gfx_v11_kfd2kgd;
break;
case IP_VERSION(11, 5, 0):
@@ -427,6 +448,22 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
gfx_target_version = 110501;
f2g = &gfx_v11_kfd2kgd;
break;
+ case IP_VERSION(11, 5, 2):
+ gfx_target_version = 110502;
+ f2g = &gfx_v11_kfd2kgd;
+ break;
+ case IP_VERSION(11, 5, 3):
+ gfx_target_version = 110503;
+ f2g = &gfx_v11_kfd2kgd;
+ break;
+ case IP_VERSION(12, 0, 0):
+ gfx_target_version = 120000;
+ f2g = &gfx_v12_kfd2kgd;
+ break;
+ case IP_VERSION(12, 0, 1):
+ gfx_target_version = 120001;
+ f2g = &gfx_v12_kfd2kgd;
+ break;
default:
break;
}
@@ -435,12 +472,12 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
if (!f2g) {
if (amdgpu_ip_version(adev, GC_HWIP, 0))
- dev_err(kfd_device,
+ dev_info(kfd_device,
"GC IP %06x %s not supported in kfd\n",
amdgpu_ip_version(adev, GC_HWIP, 0),
vf ? "VF" : "");
else
- dev_err(kfd_device, "%s %s not supported in kfd\n",
+ dev_info(kfd_device, "%s %s not supported in kfd\n",
amdgpu_asic_name[adev->asic_type], vf ? "VF" : "");
return NULL;
}
@@ -480,11 +517,16 @@ static void kfd_cwsr_init(struct kfd_dev *kfd)
> KFD_CWSR_TMA_OFFSET);
kfd->cwsr_isa = cwsr_trap_aldebaran_hex;
kfd->cwsr_isa_size = sizeof(cwsr_trap_aldebaran_hex);
- } else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3)) {
+ } else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3) ||
+ KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 4)) {
BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_4_3_hex)
> KFD_CWSR_TMA_OFFSET);
kfd->cwsr_isa = cwsr_trap_gfx9_4_3_hex;
kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_4_3_hex);
+ } else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 5, 0)) {
+ BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_5_0_hex) > PAGE_SIZE);
+ kfd->cwsr_isa = cwsr_trap_gfx9_5_0_hex;
+ kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_5_0_hex);
} else if (KFD_GC_VERSION(kfd) < IP_VERSION(10, 1, 1)) {
BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex)
> KFD_CWSR_TMA_OFFSET);
@@ -500,12 +542,17 @@ static void kfd_cwsr_init(struct kfd_dev *kfd)
> KFD_CWSR_TMA_OFFSET);
kfd->cwsr_isa = cwsr_trap_gfx10_hex;
kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx10_hex);
- } else {
+ } else if (KFD_GC_VERSION(kfd) < IP_VERSION(12, 0, 0)) {
/* The gfx11 cwsr trap handler must fit inside a single
page. */
BUILD_BUG_ON(sizeof(cwsr_trap_gfx11_hex) > PAGE_SIZE);
kfd->cwsr_isa = cwsr_trap_gfx11_hex;
kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx11_hex);
+ } else {
+ BUILD_BUG_ON(sizeof(cwsr_trap_gfx12_hex)
+ > KFD_CWSR_TMA_OFFSET);
+ kfd->cwsr_isa = cwsr_trap_gfx12_hex;
+ kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx12_hex);
}
kfd->cwsr_enabled = true;
@@ -530,15 +577,21 @@ static int kfd_gws_init(struct kfd_node *node)
&& kfd->mec2_fw_version >= 0x30) ||
(KFD_GC_VERSION(node) == IP_VERSION(9, 4, 2)
&& kfd->mec2_fw_version >= 0x28) ||
- (KFD_GC_VERSION(node) == IP_VERSION(9, 4, 3)) ||
+ (KFD_GC_VERSION(node) == IP_VERSION(9, 4, 3) ||
+ KFD_GC_VERSION(node) == IP_VERSION(9, 4, 4)) ||
+ (KFD_GC_VERSION(node) == IP_VERSION(9, 5, 0)) ||
(KFD_GC_VERSION(node) >= IP_VERSION(10, 3, 0)
&& KFD_GC_VERSION(node) < IP_VERSION(11, 0, 0)
&& kfd->mec2_fw_version >= 0x6b) ||
(KFD_GC_VERSION(node) >= IP_VERSION(11, 0, 0)
&& KFD_GC_VERSION(node) < IP_VERSION(12, 0, 0)
- && mes_rev >= 68))))
+ && mes_rev >= 68) ||
+ (KFD_GC_VERSION(node) >= IP_VERSION(12, 0, 0))))) {
+ if (KFD_GC_VERSION(node) >= IP_VERSION(12, 0, 0))
+ node->adev->gds.gws_size = 64;
ret = amdgpu_amdkfd_alloc_gws(node->adev,
node->adev->gds.gws_size, &node->gws);
+ }
return ret;
}
@@ -602,6 +655,14 @@ static void kfd_cleanup_nodes(struct kfd_dev *kfd, unsigned int num_nodes)
struct kfd_node *knode;
unsigned int i;
+ /*
+ * flush_work ensures that there are no outstanding
+ * work-queue items that will access interrupt_ring. New work items
+ * can't be created because we stopped interrupt handling above.
+ */
+ flush_workqueue(kfd->ih_wq);
+ destroy_workqueue(kfd->ih_wq);
+
for (i = 0; i < num_nodes; i++) {
knode = kfd->nodes[i];
device_queue_manager_uninit(knode->dqm);
@@ -697,14 +758,14 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
last_vmid_kfd = fls(gpu_resources->compute_vmid_bitmap)-1;
vmid_num_kfd = last_vmid_kfd - first_vmid_kfd + 1;
- /* For GFX9.4.3, we need special handling for VMIDs depending on
- * partition mode.
+ /* For multi-partition capable GPUs, we need special handling for VMIDs
+ * depending on partition mode.
* In CPX mode, the VMID range needs to be shared between XCDs.
* Additionally, there are 13 VMIDs (3-15) available for KFD. To
* divide them equally, we change starting VMID to 4 and not use
* VMID 3.
- * If the VMID range changes for GFX9.4.3, then this code MUST be
- * revisited.
+ * If the VMID range changes for multi-partition capable GPUs, then
+ * this code MUST be revisited.
*/
if (kfd->adev->xcp_mgr) {
partition_mode = amdgpu_xcp_query_partition_mode(kfd->adev->xcp_mgr,
@@ -769,11 +830,12 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
kfd->hive_id = kfd->adev->gmc.xgmi.hive_id;
/*
- * For GFX9.4.3, the KFD abstracts all partitions within a socket as
- * xGMI connected in the topology so assign a unique hive id per
- * device based on the pci device location if device is in PCIe mode.
+ * For multi-partition capable GPUs, the KFD abstracts all partitions
+ * within a socket as xGMI connected in the topology so assign a unique
+ * hive id per device based on the pci device location if device is in
+ * PCIe mode.
*/
- if (!kfd->hive_id && (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3)) && kfd->num_nodes > 1)
+ if (!kfd->hive_id && kfd->num_nodes > 1)
kfd->hive_id = pci_dev_id(kfd->adev->pdev);
kfd->noretry = kfd->adev->gmc.noretry;
@@ -811,11 +873,11 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
KFD_XCP_MEMORY_SIZE(node->adev, node->node_id) >> 20);
}
- if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3) &&
- partition_mode == AMDGPU_CPX_PARTITION_MODE &&
+ if (partition_mode == AMDGPU_CPX_PARTITION_MODE &&
kfd->num_nodes != 1) {
- /* For GFX9.4.3 and CPX mode, first XCD gets VMID range
- * 4-9 and second XCD gets VMID range 10-15.
+ /* For multi-partition capable GPUs and CPX mode, first
+ * XCD gets VMID range 4-9 and second XCD gets VMID
+ * range 10-15.
*/
node->vm_info.first_vmid_kfd = (i%2 == 0) ?
@@ -839,7 +901,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
amdgpu_amdkfd_get_local_mem_info(kfd->adev,
&node->local_mem_info, node->xcp);
- if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3))
+ if (kfd->adev->xcp_mgr)
kfd_setup_interrupt_bitmap(node, i);
/* Initialize the KFD node */
@@ -847,13 +909,14 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
dev_err(kfd_device, "Error initializing KFD node\n");
goto node_init_error;
}
+
+ spin_lock_init(&node->watch_points_lock);
+
kfd->nodes[i] = node;
}
svm_range_set_max_pages(kfd->adev);
- spin_lock_init(&kfd->watch_points_lock);
-
kfd->init_complete = true;
dev_info(kfd_device, "added device %x:%x\n", kfd->adev->pdev->vendor,
kfd->adev->pdev->device);
@@ -870,7 +933,7 @@ node_alloc_error:
kfd_doorbell_error:
kfd_gtt_sa_fini(kfd);
kfd_gtt_sa_init_error:
- amdgpu_amdkfd_free_gtt_mem(kfd->adev, kfd->gtt_mem);
+ amdgpu_amdkfd_free_gtt_mem(kfd->adev, &kfd->gtt_mem);
alloc_gtt_mem_failure:
dev_err(kfd_device,
"device %x:%x NOT added due to errors\n",
@@ -888,13 +951,14 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
kfd_doorbell_fini(kfd);
ida_destroy(&kfd->doorbell_ida);
kfd_gtt_sa_fini(kfd);
- amdgpu_amdkfd_free_gtt_mem(kfd->adev, kfd->gtt_mem);
+ amdgpu_amdkfd_free_gtt_mem(kfd->adev, &kfd->gtt_mem);
}
kfree(kfd);
}
-int kgd2kfd_pre_reset(struct kfd_dev *kfd)
+int kgd2kfd_pre_reset(struct kfd_dev *kfd,
+ struct amdgpu_reset_context *reset_context)
{
struct kfd_node *node;
int i;
@@ -904,8 +968,7 @@ int kgd2kfd_pre_reset(struct kfd_dev *kfd)
for (i = 0; i < kfd->num_nodes; i++) {
node = kfd->nodes[i];
- kfd_smi_event_update_gpu_reset(node, false);
- node->dqm->ops.pre_reset(node->dqm);
+ kfd_smi_event_update_gpu_reset(node, false, reset_context);
}
kgd2kfd_suspend(kfd, false);
@@ -944,7 +1007,7 @@ int kgd2kfd_post_reset(struct kfd_dev *kfd)
for (i = 0; i < kfd->num_nodes; i++) {
node = kfd->nodes[i];
atomic_set(&node->sram_ecc_flag, 0);
- kfd_smi_event_update_gpu_reset(node, true);
+ kfd_smi_event_update_gpu_reset(node, true, NULL);
}
return 0;
@@ -960,7 +1023,6 @@ void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
{
struct kfd_node *node;
int i;
- int count;
if (!kfd->init_complete)
return;
@@ -968,12 +1030,10 @@ void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
/* for runtime suspend, skip locking kfd */
if (!run_pm) {
mutex_lock(&kfd_processes_mutex);
- count = ++kfd_locked;
- mutex_unlock(&kfd_processes_mutex);
-
/* For first KFD device suspend all the KFD processes */
- if (count == 1)
+ if (++kfd_locked == 1)
kfd_suspend_all_processes();
+ mutex_unlock(&kfd_processes_mutex);
}
for (i = 0; i < kfd->num_nodes; i++) {
@@ -984,7 +1044,7 @@ void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
{
- int ret, count, i;
+ int ret, i;
if (!kfd->init_complete)
return 0;
@@ -998,12 +1058,10 @@ int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
/* for runtime resume, skip unlocking kfd */
if (!run_pm) {
mutex_lock(&kfd_processes_mutex);
- count = --kfd_locked;
- mutex_unlock(&kfd_processes_mutex);
-
- WARN_ONCE(count < 0, "KFD suspend / resume ref. error");
- if (count == 0)
+ if (--kfd_locked == 0)
ret = kfd_resume_all_processes();
+ WARN_ONCE(kfd_locked < 0, "KFD suspend / resume ref. error");
+ mutex_unlock(&kfd_processes_mutex);
}
return ret;
@@ -1022,21 +1080,6 @@ static int kfd_resume(struct kfd_node *node)
return err;
}
-static inline void kfd_queue_work(struct workqueue_struct *wq,
- struct work_struct *work)
-{
- int cpu, new_cpu;
-
- cpu = new_cpu = smp_processor_id();
- do {
- new_cpu = cpumask_next(new_cpu, cpu_online_mask) % nr_cpu_ids;
- if (cpu_to_node(new_cpu) == numa_node_id())
- break;
- } while (cpu != new_cpu);
-
- queue_work_on(new_cpu, wq, work);
-}
-
/* This is called directly from KGD at ISR. */
void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
{
@@ -1062,7 +1105,7 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
patched_ihre, &is_patched)
&& enqueue_ih_ring_entry(node,
is_patched ? patched_ihre : ih_ring_entry)) {
- kfd_queue_work(node->ih_wq, &node->interrupt_work);
+ queue_work(node->kfd->ih_wq, &node->interrupt_work);
spin_unlock_irqrestore(&node->interrupt_lock, flags);
return;
}
@@ -1359,6 +1402,13 @@ void kfd_dec_compute_active(struct kfd_node *node)
WARN_ONCE(count < 0, "Compute profile ref. count error");
}
+static bool kfd_compute_active(struct kfd_node *node)
+{
+ if (atomic_read(&node->kfd->compute_profile))
+ return true;
+ return false;
+}
+
void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask)
{
/*
@@ -1413,6 +1463,130 @@ void kgd2kfd_unlock_kfd(void)
mutex_unlock(&kfd_processes_mutex);
}
+int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id)
+{
+ struct kfd_node *node;
+ int ret;
+
+ if (!kfd->init_complete)
+ return 0;
+
+ if (node_id >= kfd->num_nodes) {
+ dev_warn(kfd->adev->dev, "Invalid node ID: %u exceeds %u\n",
+ node_id, kfd->num_nodes - 1);
+ return -EINVAL;
+ }
+ node = kfd->nodes[node_id];
+
+ ret = node->dqm->ops.unhalt(node->dqm);
+ if (ret)
+ dev_err(kfd_device, "Error in starting scheduler\n");
+
+ return ret;
+}
+
+int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id)
+{
+ struct kfd_node *node;
+
+ if (!kfd->init_complete)
+ return 0;
+
+ if (node_id >= kfd->num_nodes) {
+ dev_warn(kfd->adev->dev, "Invalid node ID: %u exceeds %u\n",
+ node_id, kfd->num_nodes - 1);
+ return -EINVAL;
+ }
+
+ node = kfd->nodes[node_id];
+ return node->dqm->ops.halt(node->dqm);
+}
+
+bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id)
+{
+ struct kfd_node *node;
+
+ if (!kfd->init_complete)
+ return false;
+
+ if (node_id >= kfd->num_nodes) {
+ dev_warn(kfd->adev->dev, "Invalid node ID: %u exceeds %u\n",
+ node_id, kfd->num_nodes - 1);
+ return false;
+ }
+
+ node = kfd->nodes[node_id];
+
+ return kfd_compute_active(node);
+}
+
+/**
+ * kgd2kfd_vmfault_fast_path() - KFD vm page fault interrupt handling fast path for gmc v9
+ * @adev: amdgpu device
+ * @entry: vm fault interrupt vector
+ * @retry_fault: if this is retry fault
+ *
+ * retry fault -
+ * with CAM enabled, adev primary ring
+ * | gmc_v9_0_process_interrupt()
+ * adev soft_ring
+ * | gmc_v9_0_process_interrupt() worker failed to recover page fault
+ * KFD node ih_fifo
+ * | KFD interrupt_wq worker
+ * kfd_signal_vm_fault_event
+ *
+ * without CAM, adev primary ring1
+ * | gmc_v9_0_process_interrupt worker failed to recvoer page fault
+ * KFD node ih_fifo
+ * | KFD interrupt_wq worker
+ * kfd_signal_vm_fault_event
+ *
+ * no-retry fault -
+ * adev primary ring
+ * | gmc_v9_0_process_interrupt()
+ * KFD node ih_fifo
+ * | KFD interrupt_wq worker
+ * kfd_signal_vm_fault_event
+ *
+ * fast path - After kfd_signal_vm_fault_event, gmc_v9_0_process_interrupt drop the page fault
+ * of same process, don't copy interrupt to KFD node ih_fifo.
+ * With gdb debugger enabled, need convert the retry fault to no-retry fault for
+ * debugger, cannot use the fast path.
+ *
+ * Return:
+ * true - use the fast path to handle this fault
+ * false - use normal path to handle it
+ */
+bool kgd2kfd_vmfault_fast_path(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry,
+ bool retry_fault)
+{
+ struct kfd_process *p;
+ u32 cam_index;
+
+ if (entry->ih == &adev->irq.ih_soft || entry->ih == &adev->irq.ih1) {
+ p = kfd_lookup_process_by_pasid(entry->pasid, NULL);
+ if (!p)
+ return true;
+
+ if (p->gpu_page_fault && !p->debug_trap_enabled) {
+ if (retry_fault && adev->irq.retry_cam_enabled) {
+ cam_index = entry->src_data[2] & 0x3ff;
+ WDOORBELL32(adev->irq.retry_cam_doorbell_index, cam_index);
+ }
+
+ kfd_unref_process(p);
+ return true;
+ }
+
+ /*
+ * This is the first page fault, set flag and then signal user space
+ */
+ p->gpu_page_fault = true;
+ kfd_unref_process(p);
+ }
+ return false;
+}
+
#if defined(CONFIG_DEBUG_FS)
/* This function will send a package to HIQ to hang the HWS
@@ -1425,6 +1599,11 @@ int kfd_debugfs_hang_hws(struct kfd_node *dev)
return -EINVAL;
}
+ if (dev->kfd->shared_resources.enable_mes) {
+ dev_err(dev->adev->dev, "Inducing MES hang is not supported\n");
+ return -EINVAL;
+ }
+
return dqm_debugfs_hang_hws(dev->dqm);
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index f4d395e38683..76359c6a3f3a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -35,12 +35,16 @@
#include "cik_regs.h"
#include "kfd_kernel_queue.h"
#include "amdgpu_amdkfd.h"
-#include "mes_api_def.h"
+#include "amdgpu_reset.h"
+#include "amdgpu_sdma.h"
+#include "mes_v11_api_def.h"
#include "kfd_debug.h"
/* Size of the per-pipe EOP queue */
#define CIK_HPD_EOP_BYTES_LOG2 11
#define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2)
+/* See unmap_queues_cpsch() */
+#define USE_DEFAULT_GRACE_PERIOD 0xffffffff
static int set_pasid_vmid_mapping(struct device_queue_manager *dqm,
u32 pasid, unsigned int vmid);
@@ -65,7 +69,8 @@ static inline void deallocate_hqd(struct device_queue_manager *dqm,
static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q);
static int allocate_sdma_queue(struct device_queue_manager *dqm,
struct queue *q, const uint32_t *restore_sdma_id);
-static void kfd_process_hw_exception(struct work_struct *work);
+
+static int reset_queues_on_hws_hang(struct device_queue_manager *dqm, bool is_sdma);
static inline
enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
@@ -152,17 +157,24 @@ void program_sh_mem_settings(struct device_queue_manager *dqm,
static void kfd_hws_hang(struct device_queue_manager *dqm)
{
+ struct device_process_node *cur;
+ struct qcm_process_device *qpd;
+ struct queue *q;
+
+ /* Mark all device queues as reset. */
+ list_for_each_entry(cur, &dqm->queues, list) {
+ qpd = cur->qpd;
+ list_for_each_entry(q, &qpd->queues_list, list) {
+ struct kfd_process_device *pdd = qpd_to_pdd(qpd);
+
+ pdd->has_reset_queue = true;
+ }
+ }
+
/*
* Issue a GPU reset if HWS is unresponsive
*/
- dqm->is_hws_hang = true;
-
- /* It's possible we're detecting a HWS hang in the
- * middle of a GPU reset. No need to schedule another
- * reset in this case.
- */
- if (!dqm->is_resetting)
- schedule_work(&dqm->hw_exception_work);
+ amdgpu_amdkfd_gpu_reset(dqm->dev->adev);
}
static int convert_to_mes_queue_type(int queue_type)
@@ -194,11 +206,13 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
int r, queue_type;
uint64_t wptr_addr_off;
- if (dqm->is_hws_hang)
+ if (!dqm->sched_running || dqm->sched_halt)
+ return 0;
+ if (!down_read_trylock(&adev->reset_domain->sem))
return -EIO;
memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input));
- queue_input.process_id = qpd->pqm->process->pasid;
+ queue_input.process_id = pdd->pasid;
queue_input.page_table_base_addr = qpd->page_table_base;
queue_input.process_va_start = 0;
queue_input.process_va_end = adev->vm_manager.max_pfn - 1;
@@ -214,10 +228,8 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
queue_input.mqd_addr = q->gart_mqd_addr;
queue_input.wptr_addr = (uint64_t)q->properties.write_ptr;
- if (q->wptr_bo) {
- wptr_addr_off = (uint64_t)q->properties.write_ptr & (PAGE_SIZE - 1);
- queue_input.wptr_mc_addr = amdgpu_bo_gpu_offset(q->wptr_bo) + wptr_addr_off;
- }
+ wptr_addr_off = (uint64_t)q->properties.write_ptr & (PAGE_SIZE - 1);
+ queue_input.wptr_mc_addr = amdgpu_bo_gpu_offset(q->properties.wptr_bo) + wptr_addr_off;
queue_input.is_kfd_process = 1;
queue_input.is_aql_queue = (q->properties.format == KFD_QUEUE_FORMAT_AQL);
@@ -236,6 +248,7 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
if (queue_type < 0) {
dev_err(adev->dev, "Queue type not supported with MES, queue:%d\n",
q->properties.type);
+ up_read(&adev->reset_domain->sem);
return -EINVAL;
}
queue_input.queue_type = (uint32_t)queue_type;
@@ -245,6 +258,7 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
amdgpu_mes_lock(&adev->mes);
r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input);
amdgpu_mes_unlock(&adev->mes);
+ up_read(&adev->reset_domain->sem);
if (r) {
dev_err(adev->dev, "failed to add hardware queue to MES, doorbell=0x%x\n",
q->properties.doorbell_off);
@@ -262,7 +276,9 @@ static int remove_queue_mes(struct device_queue_manager *dqm, struct queue *q,
int r;
struct mes_remove_queue_input queue_input;
- if (dqm->is_hws_hang)
+ if (!dqm->sched_running || dqm->sched_halt)
+ return 0;
+ if (!down_read_trylock(&adev->reset_domain->sem))
return -EIO;
memset(&queue_input, 0x0, sizeof(struct mes_remove_queue_input));
@@ -272,6 +288,7 @@ static int remove_queue_mes(struct device_queue_manager *dqm, struct queue *q,
amdgpu_mes_lock(&adev->mes);
r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input);
amdgpu_mes_unlock(&adev->mes);
+ up_read(&adev->reset_domain->sem);
if (r) {
dev_err(adev->dev, "failed to remove hardware queue from MES, doorbell=0x%x\n",
@@ -283,7 +300,7 @@ static int remove_queue_mes(struct device_queue_manager *dqm, struct queue *q,
return r;
}
-static int remove_all_queues_mes(struct device_queue_manager *dqm)
+static int remove_all_kfd_queues_mes(struct device_queue_manager *dqm)
{
struct device_process_node *cur;
struct device *dev = dqm->dev->adev->dev;
@@ -310,6 +327,73 @@ static int remove_all_queues_mes(struct device_queue_manager *dqm)
return retval;
}
+static int add_all_kfd_queues_mes(struct device_queue_manager *dqm)
+{
+ struct device_process_node *cur;
+ struct device *dev = dqm->dev->adev->dev;
+ struct qcm_process_device *qpd;
+ struct queue *q;
+ int retval = 0;
+
+ list_for_each_entry(cur, &dqm->queues, list) {
+ qpd = cur->qpd;
+ list_for_each_entry(q, &qpd->queues_list, list) {
+ if (!q->properties.is_active)
+ continue;
+ retval = add_queue_mes(dqm, q, qpd);
+ if (retval) {
+ dev_err(dev, "%s: Failed to add queue %d for dev %d",
+ __func__,
+ q->properties.queue_id,
+ dqm->dev->id);
+ return retval;
+ }
+ }
+ }
+
+ return retval;
+}
+
+static int suspend_all_queues_mes(struct device_queue_manager *dqm)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev;
+ int r = 0;
+
+ if (!down_read_trylock(&adev->reset_domain->sem))
+ return -EIO;
+
+ r = amdgpu_mes_suspend(adev);
+ up_read(&adev->reset_domain->sem);
+
+ if (r) {
+ dev_err(adev->dev, "failed to suspend gangs from MES\n");
+ dev_err(adev->dev, "MES might be in unrecoverable state, issue a GPU reset\n");
+ kfd_hws_hang(dqm);
+ }
+
+ return r;
+}
+
+static int resume_all_queues_mes(struct device_queue_manager *dqm)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev;
+ int r = 0;
+
+ if (!down_read_trylock(&adev->reset_domain->sem))
+ return -EIO;
+
+ r = amdgpu_mes_resume(adev);
+ up_read(&adev->reset_domain->sem);
+
+ if (r) {
+ dev_err(adev->dev, "failed to resume gangs from MES\n");
+ dev_err(adev->dev, "MES might be in unrecoverable state, issue a GPU reset\n");
+ kfd_hws_hang(dqm);
+ }
+
+ return r;
+}
+
static void increment_queue_count(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
struct queue *q)
@@ -447,6 +531,7 @@ static int allocate_vmid(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
struct queue *q)
{
+ struct kfd_process_device *pdd = qpd_to_pdd(qpd);
struct device *dev = dqm->dev->adev->dev;
int allocated_vmid = -1, i;
@@ -465,9 +550,9 @@ static int allocate_vmid(struct device_queue_manager *dqm,
pr_debug("vmid allocated: %d\n", allocated_vmid);
- dqm->vmid_pasid[allocated_vmid] = q->process->pasid;
+ dqm->vmid_pasid[allocated_vmid] = pdd->pasid;
- set_pasid_vmid_mapping(dqm, q->process->pasid, allocated_vmid);
+ set_pasid_vmid_mapping(dqm, pdd->pasid, allocated_vmid);
qpd->vmid = allocated_vmid;
q->properties.vmid = allocated_vmid;
@@ -719,6 +804,11 @@ static int dbgdev_wave_reset_wavefronts(struct kfd_node *dev, struct kfd_process
return -EOPNOTSUPP;
}
+ /* taking the VMID for that process on the safe way using PDD */
+ pdd = kfd_get_process_device_data(dev, p);
+ if (!pdd)
+ return -EFAULT;
+
/* Scan all registers in the range ATC_VMID8_PASID_MAPPING ..
* ATC_VMID15_PASID_MAPPING
* to check which VMID the current process is mapped to.
@@ -728,23 +818,19 @@ static int dbgdev_wave_reset_wavefronts(struct kfd_node *dev, struct kfd_process
status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info
(dev->adev, vmid, &queried_pasid);
- if (status && queried_pasid == p->pasid) {
- pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n",
- vmid, p->pasid);
+ if (status && queried_pasid == pdd->pasid) {
+ pr_debug("Killing wave fronts of vmid %d and process pid %d\n",
+ vmid, p->lead_thread->pid);
break;
}
}
if (vmid > last_vmid_to_scan) {
- dev_err(dev->adev->dev, "Didn't find vmid for pasid 0x%x\n", p->pasid);
+ dev_err(dev->adev->dev, "Didn't find vmid for process pid %d\n",
+ p->lead_thread->pid);
return -EFAULT;
}
- /* taking the VMID for that process on the safe way using PDD */
- pdd = kfd_get_process_device_data(dev, p);
- if (!pdd)
- return -EFAULT;
-
reg_gfx_index.bits.sh_broadcast_writes = 1;
reg_gfx_index.bits.se_broadcast_writes = 1;
reg_gfx_index.bits.instance_broadcast_writes = 1;
@@ -883,6 +969,12 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q,
else if (prev_active)
retval = remove_queue_mes(dqm, q, &pdd->qpd);
+ /* queue is reset so inaccessable */
+ if (pdd->has_reset_queue) {
+ retval = -EACCES;
+ goto out_unlock;
+ }
+
if (retval) {
dev_err(dev, "unmap queue failed\n");
goto out_unlock;
@@ -974,8 +1066,8 @@ static int suspend_single_queue(struct device_queue_manager *dqm,
if (q->properties.is_suspended)
return 0;
- pr_debug("Suspending PASID %u queue [%i]\n",
- pdd->process->pasid,
+ pr_debug("Suspending process pid %d queue [%i]\n",
+ pdd->process->lead_thread->pid,
q->properties.queue_id);
is_new = q->properties.exception_status & KFD_EC_MASK(EC_QUEUE_NEW);
@@ -1022,8 +1114,8 @@ static int resume_single_queue(struct device_queue_manager *dqm,
pdd = qpd_to_pdd(qpd);
- pr_debug("Restoring from suspend PASID %u queue [%i]\n",
- pdd->process->pasid,
+ pr_debug("Restoring from suspend process pid %d queue [%i]\n",
+ pdd->process->lead_thread->pid,
q->properties.queue_id);
q->properties.is_suspended = false;
@@ -1056,8 +1148,8 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
goto out;
pdd = qpd_to_pdd(qpd);
- pr_debug_ratelimited("Evicting PASID 0x%x queues\n",
- pdd->process->pasid);
+ pr_debug_ratelimited("Evicting process pid %d queues\n",
+ pdd->process->lead_thread->pid);
pdd->last_evict_timestamp = get_jiffies_64();
/* Mark all queues as evicted. Deactivate all active queues on
@@ -1114,8 +1206,8 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
if (!pdd->drm_priv)
goto out;
- pr_debug_ratelimited("Evicting PASID 0x%x queues\n",
- pdd->process->pasid);
+ pr_debug_ratelimited("Evicting process pid %d queues\n",
+ pdd->process->lead_thread->pid);
/* Mark all queues as evicted. Deactivate all active queues on
* the qpd.
@@ -1129,11 +1221,13 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
decrement_queue_count(dqm, qpd, q);
if (dqm->dev->kfd->shared_resources.enable_mes) {
- retval = remove_queue_mes(dqm, q, qpd);
- if (retval) {
+ int err;
+
+ err = remove_queue_mes(dqm, q, qpd);
+ if (err) {
dev_err(dev, "Failed to evict queue %d\n",
q->properties.queue_id);
- goto out;
+ retval = err;
}
}
}
@@ -1173,8 +1267,8 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
goto out;
}
- pr_debug_ratelimited("Restoring PASID 0x%x queues\n",
- pdd->process->pasid);
+ pr_debug_ratelimited("Restoring process pid %d queues\n",
+ pdd->process->lead_thread->pid);
/* Update PD Base in QPD */
qpd->page_table_base = pd_base;
@@ -1257,8 +1351,8 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
if (!pdd->drm_priv)
goto vm_not_acquired;
- pr_debug_ratelimited("Restoring PASID 0x%x queues\n",
- pdd->process->pasid);
+ pr_debug_ratelimited("Restoring process pid %d queues\n",
+ pdd->process->lead_thread->pid);
/* Update PD Base in QPD */
qpd->page_table_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv);
@@ -1468,20 +1562,13 @@ static int stop_nocpsch(struct device_queue_manager *dqm)
}
if (dqm->dev->adev->asic_type == CHIP_HAWAII)
- pm_uninit(&dqm->packet_mgr, false);
+ pm_uninit(&dqm->packet_mgr);
dqm->sched_running = false;
dqm_unlock(dqm);
return 0;
}
-static void pre_reset(struct device_queue_manager *dqm)
-{
- dqm_lock(dqm);
- dqm->is_resetting = true;
- dqm_unlock(dqm);
-}
-
static int allocate_sdma_queue(struct device_queue_manager *dqm,
struct queue *q, const uint32_t *restore_sdma_id)
{
@@ -1489,8 +1576,9 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm,
int bit;
if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
- if (bitmap_empty(dqm->sdma_bitmap, KFD_MAX_SDMA_QUEUES)) {
- dev_err(dev, "No more SDMA queue to allocate\n");
+ if (bitmap_empty(dqm->sdma_bitmap, get_num_sdma_queues(dqm))) {
+ dev_warn(dev, "No more SDMA queue to allocate (%d total queues)\n",
+ get_num_sdma_queues(dqm));
return -ENOMEM;
}
@@ -1515,8 +1603,9 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm,
q->properties.sdma_queue_id = q->sdma_id /
kfd_get_num_sdma_engines(dqm->dev);
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
- if (bitmap_empty(dqm->xgmi_sdma_bitmap, KFD_MAX_SDMA_QUEUES)) {
- dev_err(dev, "No more XGMI SDMA queue to allocate\n");
+ if (bitmap_empty(dqm->xgmi_sdma_bitmap, get_num_xgmi_sdma_queues(dqm))) {
+ dev_warn(dev, "No more XGMI SDMA queue to allocate (%d total queues)\n",
+ get_num_xgmi_sdma_queues(dqm));
return -ENOMEM;
}
if (restore_sdma_id) {
@@ -1544,6 +1633,41 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm,
q->sdma_id % kfd_get_num_xgmi_sdma_engines(dqm->dev);
q->properties.sdma_queue_id = q->sdma_id /
kfd_get_num_xgmi_sdma_engines(dqm->dev);
+ } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_BY_ENG_ID) {
+ int i, num_queues, num_engines, eng_offset = 0, start_engine;
+ bool free_bit_found = false, is_xgmi = false;
+
+ if (q->properties.sdma_engine_id < kfd_get_num_sdma_engines(dqm->dev)) {
+ num_queues = get_num_sdma_queues(dqm);
+ num_engines = kfd_get_num_sdma_engines(dqm->dev);
+ q->properties.type = KFD_QUEUE_TYPE_SDMA;
+ } else {
+ num_queues = get_num_xgmi_sdma_queues(dqm);
+ num_engines = kfd_get_num_xgmi_sdma_engines(dqm->dev);
+ eng_offset = kfd_get_num_sdma_engines(dqm->dev);
+ q->properties.type = KFD_QUEUE_TYPE_SDMA_XGMI;
+ is_xgmi = true;
+ }
+
+ /* Scan available bit based on target engine ID. */
+ start_engine = q->properties.sdma_engine_id - eng_offset;
+ for (i = start_engine; i < num_queues; i += num_engines) {
+
+ if (!test_bit(i, is_xgmi ? dqm->xgmi_sdma_bitmap : dqm->sdma_bitmap))
+ continue;
+
+ clear_bit(i, is_xgmi ? dqm->xgmi_sdma_bitmap : dqm->sdma_bitmap);
+ q->sdma_id = i;
+ q->properties.sdma_queue_id = q->sdma_id / num_engines;
+ free_bit_found = true;
+ break;
+ }
+
+ if (!free_bit_found) {
+ dev_warn(dev, "No more SDMA queue to allocate for target ID %i (%d total queues)\n",
+ q->properties.sdma_engine_id, num_queues);
+ return -ENOMEM;
+ }
}
pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
@@ -1624,22 +1748,75 @@ static int initialize_cpsch(struct device_queue_manager *dqm)
dqm->active_cp_queue_count = 0;
dqm->gws_queue_count = 0;
dqm->active_runlist = false;
- INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception);
dqm->trap_debug_vmid = 0;
init_sdma_bitmaps(dqm);
- if (dqm->dev->kfd2kgd->get_iq_wait_times)
- dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev,
- &dqm->wait_times,
- ffs(dqm->dev->xcc_mask) - 1);
+ update_dqm_wait_times(dqm);
return 0;
}
+/* halt_cpsch:
+ * Unmap queues so the schedule doesn't continue remaining jobs in the queue.
+ * Then set dqm->sched_halt so queues don't map to runlist until unhalt_cpsch
+ * is called.
+ */
+static int halt_cpsch(struct device_queue_manager *dqm)
+{
+ int ret = 0;
+
+ dqm_lock(dqm);
+ if (!dqm->sched_running) {
+ dqm_unlock(dqm);
+ return 0;
+ }
+
+ WARN_ONCE(dqm->sched_halt, "Scheduling is already on halt\n");
+
+ if (!dqm->is_hws_hang) {
+ if (!dqm->dev->kfd->shared_resources.enable_mes)
+ ret = unmap_queues_cpsch(dqm,
+ KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0,
+ USE_DEFAULT_GRACE_PERIOD, false);
+ else
+ ret = remove_all_kfd_queues_mes(dqm);
+ }
+ dqm->sched_halt = true;
+ dqm_unlock(dqm);
+
+ return ret;
+}
+
+/* unhalt_cpsch
+ * Unset dqm->sched_halt and map queues back to runlist
+ */
+static int unhalt_cpsch(struct device_queue_manager *dqm)
+{
+ int ret = 0;
+
+ dqm_lock(dqm);
+ if (!dqm->sched_running || !dqm->sched_halt) {
+ WARN_ONCE(!dqm->sched_halt, "Scheduling is not on halt.\n");
+ dqm_unlock(dqm);
+ return 0;
+ }
+ dqm->sched_halt = false;
+ if (!dqm->dev->kfd->shared_resources.enable_mes)
+ ret = execute_queues_cpsch(dqm,
+ KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES,
+ 0, USE_DEFAULT_GRACE_PERIOD);
+ else
+ ret = add_all_kfd_queues_mes(dqm);
+
+ dqm_unlock(dqm);
+
+ return ret;
+}
+
static int start_cpsch(struct device_queue_manager *dqm)
{
struct device *dev = dqm->dev->adev->dev;
- int retval;
+ int retval, num_hw_queue_slots;
retval = 0;
@@ -1669,38 +1846,37 @@ static int start_cpsch(struct device_queue_manager *dqm)
init_interrupts(dqm);
/* clear hang status when driver try to start the hw scheduler */
- dqm->is_hws_hang = false;
- dqm->is_resetting = false;
dqm->sched_running = true;
- if (!dqm->dev->kfd->shared_resources.enable_mes)
+ if (!dqm->dev->kfd->shared_resources.enable_mes) {
+ if (pm_config_dequeue_wait_counts(&dqm->packet_mgr,
+ KFD_DEQUEUE_WAIT_INIT, 0 /* unused */))
+ dev_err(dev, "Setting optimized dequeue wait failed. Using default values\n");
execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD);
+ }
- /* Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU */
- if (amdgpu_emu_mode == 0 && dqm->dev->adev->gmc.is_app_apu &&
- (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 3))) {
- uint32_t reg_offset = 0;
- uint32_t grace_period = 1;
+ /* setup per-queue reset detection buffer */
+ num_hw_queue_slots = dqm->dev->kfd->shared_resources.num_queue_per_pipe *
+ dqm->dev->kfd->shared_resources.num_pipe_per_mec *
+ NUM_XCC(dqm->dev->xcc_mask);
- retval = pm_update_grace_period(&dqm->packet_mgr,
- grace_period);
- if (retval)
- dev_err(dev, "Setting grace timeout failed\n");
- else if (dqm->dev->kfd2kgd->build_grace_period_packet_info)
- /* Update dqm->wait_times maintained in software */
- dqm->dev->kfd2kgd->build_grace_period_packet_info(
- dqm->dev->adev, dqm->wait_times,
- grace_period, &reg_offset,
- &dqm->wait_times);
+ dqm->detect_hang_info_size = num_hw_queue_slots * sizeof(struct dqm_detect_hang_info);
+ dqm->detect_hang_info = kzalloc(dqm->detect_hang_info_size, GFP_KERNEL);
+
+ if (!dqm->detect_hang_info) {
+ retval = -ENOMEM;
+ goto fail_detect_hang_buffer;
}
dqm_unlock(dqm);
return 0;
+fail_detect_hang_buffer:
+ kfd_gtt_sa_free(dqm->dev, dqm->fence_mem);
fail_allocate_vidmem:
fail_set_sched_resources:
if (!dqm->dev->kfd->shared_resources.enable_mes)
- pm_uninit(&dqm->packet_mgr, false);
+ pm_uninit(&dqm->packet_mgr);
fail_packet_manager_init:
dqm_unlock(dqm);
return retval;
@@ -1708,22 +1884,17 @@ fail_packet_manager_init:
static int stop_cpsch(struct device_queue_manager *dqm)
{
- bool hanging;
-
dqm_lock(dqm);
if (!dqm->sched_running) {
dqm_unlock(dqm);
return 0;
}
- if (!dqm->is_hws_hang) {
- if (!dqm->dev->kfd->shared_resources.enable_mes)
- unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD, false);
- else
- remove_all_queues_mes(dqm);
- }
+ if (!dqm->dev->kfd->shared_resources.enable_mes)
+ unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD, false);
+ else
+ remove_all_kfd_queues_mes(dqm);
- hanging = dqm->is_hws_hang || dqm->is_resetting;
dqm->sched_running = false;
if (!dqm->dev->kfd->shared_resources.enable_mes)
@@ -1731,7 +1902,9 @@ static int stop_cpsch(struct device_queue_manager *dqm)
kfd_gtt_sa_free(dqm->dev, dqm->fence_mem);
if (!dqm->dev->kfd->shared_resources.enable_mes)
- pm_uninit(&dqm->packet_mgr, hanging);
+ pm_uninit(&dqm->packet_mgr);
+ kfree(dqm->detect_hang_info);
+ dqm->detect_hang_info = NULL;
dqm_unlock(dqm);
return 0;
@@ -1803,7 +1976,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
}
if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
- q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
+ q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI ||
+ q->properties.type == KFD_QUEUE_TYPE_SDMA_BY_ENG_ID) {
dqm_lock(dqm);
retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL);
dqm_unlock(dqm);
@@ -1900,7 +2074,7 @@ int amdkfd_fence_wait_timeout(struct device_queue_manager *dqm,
{
unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies;
struct device *dev = dqm->dev->adev->dev;
- uint64_t *fence_addr = dqm->fence_addr;
+ uint64_t *fence_addr = dqm->fence_addr;
while (*fence_addr != fence_value) {
/* Fatal err detected, this response won't come */
@@ -1930,7 +2104,7 @@ static int map_queues_cpsch(struct device_queue_manager *dqm)
struct device *dev = dqm->dev->adev->dev;
int retval;
- if (!dqm->sched_running)
+ if (!dqm->sched_running || dqm->sched_halt)
return 0;
if (dqm->active_queue_count <= 0 || dqm->processes_count <= 0)
return 0;
@@ -1948,7 +2122,240 @@ static int map_queues_cpsch(struct device_queue_manager *dqm)
return retval;
}
-/* dqm->lock mutex has to be locked before calling this function */
+static void set_queue_as_reset(struct device_queue_manager *dqm, struct queue *q,
+ struct qcm_process_device *qpd)
+{
+ struct kfd_process_device *pdd = qpd_to_pdd(qpd);
+
+ dev_err(dqm->dev->adev->dev, "queue id 0x%0x at pasid %d is reset\n",
+ q->properties.queue_id, pdd->process->lead_thread->pid);
+
+ pdd->has_reset_queue = true;
+ if (q->properties.is_active) {
+ q->properties.is_active = false;
+ decrement_queue_count(dqm, qpd, q);
+ }
+}
+
+static int detect_queue_hang(struct device_queue_manager *dqm)
+{
+ int i;
+
+ /* detect should be used only in dqm locked queue reset */
+ if (WARN_ON(dqm->detect_hang_count > 0))
+ return 0;
+
+ memset(dqm->detect_hang_info, 0, dqm->detect_hang_info_size);
+
+ for (i = 0; i < AMDGPU_MAX_QUEUES; ++i) {
+ uint32_t mec, pipe, queue;
+ int xcc_id;
+
+ mec = (i / dqm->dev->kfd->shared_resources.num_queue_per_pipe)
+ / dqm->dev->kfd->shared_resources.num_pipe_per_mec;
+
+ if (mec || !test_bit(i, dqm->dev->kfd->shared_resources.cp_queue_bitmap))
+ continue;
+
+ amdgpu_queue_mask_bit_to_mec_queue(dqm->dev->adev, i, &mec, &pipe, &queue);
+
+ for_each_inst(xcc_id, dqm->dev->xcc_mask) {
+ uint64_t queue_addr = dqm->dev->kfd2kgd->hqd_get_pq_addr(
+ dqm->dev->adev, pipe, queue, xcc_id);
+ struct dqm_detect_hang_info hang_info;
+
+ if (!queue_addr)
+ continue;
+
+ hang_info.pipe_id = pipe;
+ hang_info.queue_id = queue;
+ hang_info.xcc_id = xcc_id;
+ hang_info.queue_address = queue_addr;
+
+ dqm->detect_hang_info[dqm->detect_hang_count] = hang_info;
+ dqm->detect_hang_count++;
+ }
+ }
+
+ return dqm->detect_hang_count;
+}
+
+static struct queue *find_queue_by_address(struct device_queue_manager *dqm, uint64_t queue_address)
+{
+ struct device_process_node *cur;
+ struct qcm_process_device *qpd;
+ struct queue *q;
+
+ list_for_each_entry(cur, &dqm->queues, list) {
+ qpd = cur->qpd;
+ list_for_each_entry(q, &qpd->queues_list, list) {
+ if (queue_address == q->properties.queue_address)
+ return q;
+ }
+ }
+
+ return NULL;
+}
+
+static int reset_hung_queues(struct device_queue_manager *dqm)
+{
+ int r = 0, reset_count = 0, i;
+
+ if (!dqm->detect_hang_info || dqm->is_hws_hang)
+ return -EIO;
+
+ /* assume dqm locked. */
+ if (!detect_queue_hang(dqm))
+ return -ENOTRECOVERABLE;
+
+ for (i = 0; i < dqm->detect_hang_count; i++) {
+ struct dqm_detect_hang_info hang_info = dqm->detect_hang_info[i];
+ struct queue *q = find_queue_by_address(dqm, hang_info.queue_address);
+ struct kfd_process_device *pdd;
+ uint64_t queue_addr = 0;
+
+ if (!q) {
+ r = -ENOTRECOVERABLE;
+ goto reset_fail;
+ }
+
+ pdd = kfd_get_process_device_data(dqm->dev, q->process);
+ if (!pdd) {
+ r = -ENOTRECOVERABLE;
+ goto reset_fail;
+ }
+
+ queue_addr = dqm->dev->kfd2kgd->hqd_reset(dqm->dev->adev,
+ hang_info.pipe_id, hang_info.queue_id, hang_info.xcc_id,
+ KFD_UNMAP_LATENCY_MS);
+
+ /* either reset failed or we reset an unexpected queue. */
+ if (queue_addr != q->properties.queue_address) {
+ r = -ENOTRECOVERABLE;
+ goto reset_fail;
+ }
+
+ set_queue_as_reset(dqm, q, &pdd->qpd);
+ reset_count++;
+ }
+
+ if (reset_count == dqm->detect_hang_count)
+ kfd_signal_reset_event(dqm->dev);
+ else
+ r = -ENOTRECOVERABLE;
+
+reset_fail:
+ dqm->detect_hang_count = 0;
+
+ return r;
+}
+
+static bool sdma_has_hang(struct device_queue_manager *dqm)
+{
+ int engine_start = dqm->dev->node_id * get_num_all_sdma_engines(dqm);
+ int engine_end = engine_start + get_num_all_sdma_engines(dqm);
+ int num_queues_per_eng = dqm->dev->kfd->device_info.num_sdma_queues_per_engine;
+ int i, j;
+
+ for (i = engine_start; i < engine_end; i++) {
+ for (j = 0; j < num_queues_per_eng; j++) {
+ if (!dqm->dev->kfd2kgd->hqd_sdma_get_doorbell(dqm->dev->adev, i, j))
+ continue;
+
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static bool set_sdma_queue_as_reset(struct device_queue_manager *dqm,
+ uint32_t doorbell_off)
+{
+ struct device_process_node *cur;
+ struct qcm_process_device *qpd;
+ struct queue *q;
+
+ list_for_each_entry(cur, &dqm->queues, list) {
+ qpd = cur->qpd;
+ list_for_each_entry(q, &qpd->queues_list, list) {
+ if ((q->properties.type == KFD_QUEUE_TYPE_SDMA ||
+ q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) &&
+ q->properties.doorbell_off == doorbell_off) {
+ set_queue_as_reset(dqm, q, qpd);
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+static int reset_hung_queues_sdma(struct device_queue_manager *dqm)
+{
+ int engine_start = dqm->dev->node_id * get_num_all_sdma_engines(dqm);
+ int engine_end = engine_start + get_num_all_sdma_engines(dqm);
+ int num_queues_per_eng = dqm->dev->kfd->device_info.num_sdma_queues_per_engine;
+ int r = 0, i, j;
+
+ if (dqm->is_hws_hang)
+ return -EIO;
+
+ /* Scan for hung HW queues and reset engine. */
+ dqm->detect_hang_count = 0;
+ for (i = engine_start; i < engine_end; i++) {
+ for (j = 0; j < num_queues_per_eng; j++) {
+ uint32_t doorbell_off =
+ dqm->dev->kfd2kgd->hqd_sdma_get_doorbell(dqm->dev->adev, i, j);
+
+ if (!doorbell_off)
+ continue;
+
+ /* Reset engine and check. */
+ if (amdgpu_sdma_reset_engine(dqm->dev->adev, i) ||
+ dqm->dev->kfd2kgd->hqd_sdma_get_doorbell(dqm->dev->adev, i, j) ||
+ !set_sdma_queue_as_reset(dqm, doorbell_off)) {
+ r = -ENOTRECOVERABLE;
+ goto reset_fail;
+ }
+
+ /* Should only expect one queue active per engine */
+ dqm->detect_hang_count++;
+ break;
+ }
+ }
+
+ /* Signal process reset */
+ if (dqm->detect_hang_count)
+ kfd_signal_reset_event(dqm->dev);
+ else
+ r = -ENOTRECOVERABLE;
+
+reset_fail:
+ dqm->detect_hang_count = 0;
+
+ return r;
+}
+
+static int reset_queues_on_hws_hang(struct device_queue_manager *dqm, bool is_sdma)
+{
+ while (halt_if_hws_hang)
+ schedule();
+
+ if (!amdgpu_gpu_recovery)
+ return -ENOTRECOVERABLE;
+
+ return is_sdma ? reset_hung_queues_sdma(dqm) : reset_hung_queues(dqm);
+}
+
+/* dqm->lock mutex has to be locked before calling this function
+ *
+ * @grace_period: If USE_DEFAULT_GRACE_PERIOD then default wait time
+ * for context switch latency. Lower values are used by debugger
+ * since context switching are triggered at high frequency.
+ * This is configured by setting CP_IQ_WAIT_TIME2.SCH_WAVE
+ *
+ */
static int unmap_queues_cpsch(struct device_queue_manager *dqm,
enum kfd_unmap_queues_filter filter,
uint32_t filter_param,
@@ -1957,26 +2364,28 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
{
struct device *dev = dqm->dev->adev->dev;
struct mqd_manager *mqd_mgr;
- int retval = 0;
+ int retval;
if (!dqm->sched_running)
return 0;
- if (dqm->is_hws_hang || dqm->is_resetting)
- return -EIO;
if (!dqm->active_runlist)
- return retval;
+ return 0;
+ if (!down_read_trylock(&dqm->dev->adev->reset_domain->sem))
+ return -EIO;
if (grace_period != USE_DEFAULT_GRACE_PERIOD) {
- retval = pm_update_grace_period(&dqm->packet_mgr, grace_period);
+ retval = pm_config_dequeue_wait_counts(&dqm->packet_mgr,
+ KFD_DEQUEUE_WAIT_SET_SCH_WAVE, grace_period);
if (retval)
- return retval;
+ goto out;
}
retval = pm_send_unmap_queue(&dqm->packet_mgr, filter, filter_param, reset);
if (retval)
- return retval;
+ goto out;
*dqm->fence_addr = KFD_FENCE_INIT;
+ mb();
pm_send_query_status(&dqm->packet_mgr, dqm->fence_gpu_addr,
KFD_FENCE_COMPLETED);
/* should be timed out */
@@ -1985,7 +2394,7 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
if (retval) {
dev_err(dev, "The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n");
kfd_hws_hang(dqm);
- return retval;
+ goto out;
}
/* In the current MEC firmware implementation, if compute queue
@@ -1997,29 +2406,36 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
* check those fields
*/
mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ];
- if (mqd_mgr->read_doorbell_id(dqm->packet_mgr.priv_queue->queue->mqd)) {
- dev_err(dev, "HIQ MQD's queue_doorbell_id0 is not 0, Queue preemption time out\n");
- while (halt_if_hws_hang)
- schedule();
- return -ETIME;
- }
+ if (mqd_mgr->check_preemption_failed(mqd_mgr, dqm->packet_mgr.priv_queue->queue->mqd) &&
+ reset_queues_on_hws_hang(dqm, false))
+ goto reset_fail;
+
+ /* Check for SDMA hang and attempt SDMA reset */
+ if (sdma_has_hang(dqm) && reset_queues_on_hws_hang(dqm, true))
+ goto reset_fail;
/* We need to reset the grace period value for this device */
if (grace_period != USE_DEFAULT_GRACE_PERIOD) {
- if (pm_update_grace_period(&dqm->packet_mgr,
- USE_DEFAULT_GRACE_PERIOD))
+ if (pm_config_dequeue_wait_counts(&dqm->packet_mgr,
+ KFD_DEQUEUE_WAIT_RESET, 0 /* unused */))
dev_err(dev, "Failed to reset grace period\n");
}
pm_release_ib(&dqm->packet_mgr);
dqm->active_runlist = false;
-
+out:
+ up_read(&dqm->dev->adev->reset_domain->sem);
return retval;
+
+reset_fail:
+ dqm->is_hws_hang = true;
+ kfd_hws_hang(dqm);
+ up_read(&dqm->dev->adev->reset_domain->sem);
+ return -ETIME;
}
/* only for compute queue */
-static int reset_queues_cpsch(struct device_queue_manager *dqm,
- uint16_t pasid)
+static int reset_queues_cpsch(struct device_queue_manager *dqm, uint16_t pasid)
{
int retval;
@@ -2040,13 +2456,13 @@ static int execute_queues_cpsch(struct device_queue_manager *dqm,
{
int retval;
- if (dqm->is_hws_hang)
+ if (!down_read_trylock(&dqm->dev->adev->reset_domain->sem))
return -EIO;
retval = unmap_queues_cpsch(dqm, filter, filter_param, grace_period, false);
- if (retval)
- return retval;
-
- return map_queues_cpsch(dqm);
+ if (!retval)
+ retval = map_queues_cpsch(dqm);
+ up_read(&dqm->dev->adev->reset_domain->sem);
+ return retval;
}
static int wait_on_destroy_queue(struct device_queue_manager *dqm,
@@ -2056,6 +2472,9 @@ static int wait_on_destroy_queue(struct device_queue_manager *dqm,
q->process);
int ret = 0;
+ if (WARN_ON(!pdd))
+ return ret;
+
if (pdd->qpd.is_debug)
return ret;
@@ -2125,10 +2544,9 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
pdd->sdma_past_activity_counter += sdma_val;
}
- list_del(&q->list);
- qpd->queue_count--;
if (q->properties.is_active) {
decrement_queue_count(dqm, qpd, q);
+ q->properties.is_active = false;
if (!dqm->dev->kfd->shared_resources.enable_mes) {
retval = execute_queues_cpsch(dqm,
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0,
@@ -2139,6 +2557,8 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
retval = remove_queue_mes(dqm, q, qpd);
}
}
+ list_del(&q->list);
+ qpd->queue_count--;
/*
* Unconditionally decrement this counter, regardless of the queue's
@@ -2168,20 +2588,13 @@ failed_try_destroy_debugged_queue:
return retval;
}
-/*
- * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to
- * stay in user mode.
- */
-#define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL
-/* APE1 limit is inclusive and 64K aligned. */
-#define APE1_LIMIT_ALIGNMENT 0xFFFF
-
static bool set_cache_memory_policy(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
enum cache_policy default_policy,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
- uint64_t alternate_aperture_size)
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties)
{
bool retval = true;
@@ -2190,41 +2603,17 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm,
dqm_lock(dqm);
- if (alternate_aperture_size == 0) {
- /* base > limit disables APE1 */
- qpd->sh_mem_ape1_base = 1;
- qpd->sh_mem_ape1_limit = 0;
- } else {
- /*
- * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]},
- * SH_MEM_APE1_BASE[31:0], 0x0000 }
- * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]},
- * SH_MEM_APE1_LIMIT[31:0], 0xFFFF }
- * Verify that the base and size parameters can be
- * represented in this format and convert them.
- * Additionally restrict APE1 to user-mode addresses.
- */
-
- uint64_t base = (uintptr_t)alternate_aperture_base;
- uint64_t limit = base + alternate_aperture_size - 1;
-
- if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 ||
- (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) {
- retval = false;
- goto out;
- }
-
- qpd->sh_mem_ape1_base = base >> 16;
- qpd->sh_mem_ape1_limit = limit >> 16;
- }
-
retval = dqm->asic_ops.set_cache_memory_policy(
dqm,
qpd,
default_policy,
alternate_policy,
alternate_aperture_base,
- alternate_aperture_size);
+ alternate_aperture_size,
+ misc_process_properties);
+
+ if (retval)
+ goto out;
if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
program_sh_mem_settings(dqm, qpd);
@@ -2427,10 +2816,12 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
if (!dqm->dev->kfd->shared_resources.enable_mes)
retval = execute_queues_cpsch(dqm, filter, 0, USE_DEFAULT_GRACE_PERIOD);
- if ((!dqm->is_hws_hang) && (retval || qpd->reset_wavefronts)) {
+ if ((retval || qpd->reset_wavefronts) &&
+ down_read_trylock(&dqm->dev->adev->reset_domain->sem)) {
pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev);
dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process);
qpd->reset_wavefronts = false;
+ up_read(&dqm->dev->adev->reset_domain->sem);
}
/* Lastly, free mqd resources.
@@ -2537,7 +2928,8 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev)
dqm->ops.initialize = initialize_cpsch;
dqm->ops.start = start_cpsch;
dqm->ops.stop = stop_cpsch;
- dqm->ops.pre_reset = pre_reset;
+ dqm->ops.halt = halt_cpsch;
+ dqm->ops.unhalt = unhalt_cpsch;
dqm->ops.destroy_queue = destroy_queue_cpsch;
dqm->ops.update_queue = update_queue;
dqm->ops.register_process = register_process;
@@ -2558,7 +2950,6 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev)
/* initialize dqm for no cp scheduling */
dqm->ops.start = start_nocpsch;
dqm->ops.stop = stop_nocpsch;
- dqm->ops.pre_reset = pre_reset;
dqm->ops.create_queue = create_queue_nocpsch;
dqm->ops.destroy_queue = destroy_queue_nocpsch;
dqm->ops.update_queue = update_queue;
@@ -2597,7 +2988,9 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev)
break;
default:
- if (KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0))
+ if (KFD_GC_VERSION(dev) >= IP_VERSION(12, 0, 0))
+ device_queue_manager_init_v12(&dqm->asic_ops);
+ else if (KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0))
device_queue_manager_init_v11(&dqm->asic_ops);
else if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1))
device_queue_manager_init_v10(&dqm->asic_ops);
@@ -2633,7 +3026,7 @@ static void deallocate_hiq_sdma_mqd(struct kfd_node *dev,
{
WARN(!mqd, "No hiq sdma mqd trunk to free");
- amdgpu_amdkfd_free_gtt_mem(dev->adev, mqd->gtt_mem);
+ amdgpu_amdkfd_free_gtt_mem(dev->adev, &mqd->gtt_mem);
}
void device_queue_manager_uninit(struct device_queue_manager *dqm)
@@ -2645,28 +3038,112 @@ void device_queue_manager_uninit(struct device_queue_manager *dqm)
kfree(dqm);
}
-int kfd_dqm_evict_pasid(struct device_queue_manager *dqm, u32 pasid)
+int kfd_dqm_suspend_bad_queue_mes(struct kfd_node *knode, u32 pasid, u32 doorbell_id)
{
- struct kfd_process_device *pdd;
- struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
+ struct kfd_process_device *pdd = NULL;
+ struct kfd_process *p = kfd_lookup_process_by_pasid(pasid, &pdd);
+ struct device_queue_manager *dqm = knode->dqm;
+ struct device *dev = dqm->dev->adev->dev;
+ struct qcm_process_device *qpd;
+ struct queue *q = NULL;
int ret = 0;
- if (!p)
+ if (!pdd)
return -EINVAL;
- WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid);
- pdd = kfd_get_process_device_data(dqm->dev, p);
- if (pdd)
- ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd);
+
+ dqm_lock(dqm);
+
+ if (pdd) {
+ qpd = &pdd->qpd;
+
+ list_for_each_entry(q, &qpd->queues_list, list) {
+ if (q->doorbell_id == doorbell_id && q->properties.is_active) {
+ ret = suspend_all_queues_mes(dqm);
+ if (ret) {
+ dev_err(dev, "Suspending all queues failed");
+ goto out;
+ }
+
+ q->properties.is_evicted = true;
+ q->properties.is_active = false;
+ decrement_queue_count(dqm, qpd, q);
+
+ ret = remove_queue_mes(dqm, q, qpd);
+ if (ret) {
+ dev_err(dev, "Removing bad queue failed");
+ goto out;
+ }
+
+ ret = resume_all_queues_mes(dqm);
+ if (ret)
+ dev_err(dev, "Resuming all queues failed");
+
+ break;
+ }
+ }
+ }
+
+out:
+ dqm_unlock(dqm);
kfd_unref_process(p);
+ return ret;
+}
+static int kfd_dqm_evict_pasid_mes(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd)
+{
+ struct device *dev = dqm->dev->adev->dev;
+ int ret = 0;
+
+ /* Check if process is already evicted */
+ dqm_lock(dqm);
+ if (qpd->evicted) {
+ /* Increment the evicted count to make sure the
+ * process stays evicted before its terminated.
+ */
+ qpd->evicted++;
+ dqm_unlock(dqm);
+ goto out;
+ }
+ dqm_unlock(dqm);
+
+ ret = suspend_all_queues_mes(dqm);
+ if (ret) {
+ dev_err(dev, "Suspending all queues failed");
+ goto out;
+ }
+
+ ret = dqm->ops.evict_process_queues(dqm, qpd);
+ if (ret) {
+ dev_err(dev, "Evicting process queues failed");
+ goto out;
+ }
+
+ ret = resume_all_queues_mes(dqm);
+ if (ret)
+ dev_err(dev, "Resuming all queues failed");
+
+out:
return ret;
}
-static void kfd_process_hw_exception(struct work_struct *work)
+int kfd_evict_process_device(struct kfd_process_device *pdd)
{
- struct device_queue_manager *dqm = container_of(work,
- struct device_queue_manager, hw_exception_work);
- amdgpu_amdkfd_gpu_reset(dqm->dev->adev);
+ struct device_queue_manager *dqm;
+ struct kfd_process *p;
+ int ret = 0;
+
+ p = pdd->process;
+ dqm = pdd->dev->dqm;
+
+ WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid);
+
+ if (dqm->dev->kfd->shared_resources.enable_mes)
+ ret = kfd_dqm_evict_pasid_mes(dqm, &pdd->qpd);
+ else
+ ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd);
+
+ return ret;
}
int reserve_debug_trap_vmid(struct device_queue_manager *dqm,
@@ -2792,7 +3269,7 @@ struct copy_context_work_handler_workarea {
struct kfd_process *p;
};
-static void copy_context_work_handler (struct work_struct *work)
+static void copy_context_work_handler(struct work_struct *work)
{
struct copy_context_work_handler_workarea *workarea;
struct mqd_manager *mqd_mgr;
@@ -2819,6 +3296,9 @@ static void copy_context_work_handler (struct work_struct *work)
struct qcm_process_device *qpd = &pdd->qpd;
list_for_each_entry(q, &qpd->queues_list, list) {
+ if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE)
+ continue;
+
mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP];
/* We ignore the return value from get_wave_state
@@ -3159,6 +3639,30 @@ int debug_refresh_runlist(struct device_queue_manager *dqm)
return debug_map_and_unlock(dqm);
}
+bool kfd_dqm_is_queue_in_process(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ int doorbell_off, u32 *queue_format)
+{
+ struct queue *q;
+ bool r = false;
+
+ if (!queue_format)
+ return r;
+
+ dqm_lock(dqm);
+
+ list_for_each_entry(q, &qpd->queues_list, list) {
+ if (q->properties.doorbell_off == doorbell_off) {
+ *queue_format = q->properties.format;
+ r = true;
+ goto out;
+ }
+ }
+
+out:
+ dqm_unlock(dqm);
+ return r;
+}
#if defined(CONFIG_DEBUG_FS)
static void seq_reg_dump(struct seq_file *m,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index cf7e182588f8..74a61b5b2f0b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -37,7 +37,6 @@
#define KFD_MES_PROCESS_QUANTUM 100000
#define KFD_MES_GANG_QUANTUM 10000
-#define USE_DEFAULT_GRACE_PERIOD 0xffffffff
struct device_process_node {
struct qcm_process_device *qpd;
@@ -106,6 +105,12 @@ union GRBM_GFX_INDEX_BITS {
* @uninitialize: Destroys all the device queue manager resources allocated in
* initialize routine.
*
+ * @halt: This routine unmaps queues from runlist and set halt status to true
+ * so no more queues will be mapped to runlist until unhalt.
+ *
+ * @unhalt: This routine unset halt status to flase and maps queues back to
+ * runlist.
+ *
* @create_kernel_queue: Creates kernel queue. Used for debug queue.
*
* @destroy_kernel_queue: Destroys kernel queue. Used for debug queue.
@@ -152,8 +157,9 @@ struct device_queue_manager_ops {
int (*initialize)(struct device_queue_manager *dqm);
int (*start)(struct device_queue_manager *dqm);
int (*stop)(struct device_queue_manager *dqm);
- void (*pre_reset)(struct device_queue_manager *dqm);
void (*uninitialize)(struct device_queue_manager *dqm);
+ int (*halt)(struct device_queue_manager *dqm);
+ int (*unhalt)(struct device_queue_manager *dqm);
int (*create_kernel_queue)(struct device_queue_manager *dqm,
struct kernel_queue *kq,
struct qcm_process_device *qpd);
@@ -167,7 +173,8 @@ struct device_queue_manager_ops {
enum cache_policy default_policy,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
- uint64_t alternate_aperture_size);
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties);
int (*process_termination)(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
@@ -203,7 +210,8 @@ struct device_queue_manager_asic_ops {
enum cache_policy default_policy,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
- uint64_t alternate_aperture_size);
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties);
void (*init_sdma_vm)(struct device_queue_manager *dqm,
struct queue *q,
struct qcm_process_device *qpd);
@@ -211,6 +219,13 @@ struct device_queue_manager_asic_ops {
struct kfd_node *dev);
};
+struct dqm_detect_hang_info {
+ int pipe_id;
+ int queue_id;
+ int xcc_id;
+ uint64_t queue_address;
+};
+
/**
* struct device_queue_manager
*
@@ -255,9 +270,9 @@ struct device_queue_manager {
/* hw exception */
bool is_hws_hang;
bool is_resetting;
- struct work_struct hw_exception_work;
struct kfd_mem_obj hiq_sdma_mqd;
bool sched_running;
+ bool sched_halt;
/* used for GFX 9.4.3 only */
uint32_t current_logical_xcc_start;
@@ -265,6 +280,11 @@ struct device_queue_manager {
uint32_t wait_times;
wait_queue_head_t destroy_wait;
+
+ /* for per-queue reset support */
+ struct dqm_detect_hang_info *detect_hang_info;
+ size_t detect_hang_info_size;
+ int detect_hang_count;
};
void device_queue_manager_init_cik(
@@ -277,6 +297,8 @@ void device_queue_manager_init_v10(
struct device_queue_manager_asic_ops *asic_ops);
void device_queue_manager_init_v11(
struct device_queue_manager_asic_ops *asic_ops);
+void device_queue_manager_init_v12(
+ struct device_queue_manager_asic_ops *asic_ops);
void program_sh_mem_settings(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
unsigned int get_cp_queues_num(struct device_queue_manager *dqm);
@@ -302,6 +324,9 @@ void set_queue_snapshot_entry(struct queue *q,
int debug_lock_and_unmap(struct device_queue_manager *dqm);
int debug_map_and_unlock(struct device_queue_manager *dqm);
int debug_refresh_runlist(struct device_queue_manager *dqm);
+bool kfd_dqm_is_queue_in_process(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ int doorbell_off, u32 *queue_format);
static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd)
{
@@ -334,4 +359,14 @@ static inline int read_sdma_queue_counter(uint64_t __user *q_rptr, uint64_t *val
/* SDMA activity counter is stored at queue's RPTR + 0x8 location. */
return get_user(*val, q_rptr + 1);
}
+
+static inline void update_dqm_wait_times(struct device_queue_manager *dqm)
+{
+ if (dqm->dev->kfd2kgd->get_iq_wait_times)
+ dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev,
+ &dqm->wait_times,
+ ffs(dqm->dev->xcc_mask) - 1);
+}
+
+
#endif /* KFD_DEVICE_QUEUE_MANAGER_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
index d4d95c7f2e5d..0508ef5a41d7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
@@ -27,12 +27,21 @@
#include "oss/oss_2_4_sh_mask.h"
#include "gca/gfx_7_2_sh_mask.h"
+/*
+ * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to
+ * stay in user mode.
+ */
+#define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL
+/* APE1 limit is inclusive and 64K aligned. */
+#define APE1_LIMIT_ALIGNMENT 0xFFFF
+
static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
enum cache_policy default_policy,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
- uint64_t alternate_aperture_size);
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties);
static int update_qpd_cik(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
static void init_sdma_vm(struct device_queue_manager *dqm,
@@ -80,10 +89,41 @@ static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
enum cache_policy default_policy,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
- uint64_t alternate_aperture_size)
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties)
{
uint32_t default_mtype;
uint32_t ape1_mtype;
+ unsigned int temp;
+ bool retval = true;
+
+ if (alternate_aperture_size == 0) {
+ /* base > limit disables APE1 */
+ qpd->sh_mem_ape1_base = 1;
+ qpd->sh_mem_ape1_limit = 0;
+ } else {
+ /*
+ * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]},
+ * SH_MEM_APE1_BASE[31:0], 0x0000 }
+ * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]},
+ * SH_MEM_APE1_LIMIT[31:0], 0xFFFF }
+ * Verify that the base and size parameters can be
+ * represented in this format and convert them.
+ * Additionally restrict APE1 to user-mode addresses.
+ */
+
+ uint64_t base = (uintptr_t)alternate_aperture_base;
+ uint64_t limit = base + alternate_aperture_size - 1;
+
+ if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 ||
+ (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) {
+ retval = false;
+ goto out;
+ }
+
+ qpd->sh_mem_ape1_base = base >> 16;
+ qpd->sh_mem_ape1_limit = limit >> 16;
+ }
default_mtype = (default_policy == cache_policy_coherent) ?
MTYPE_NONCACHED :
@@ -97,37 +137,22 @@ static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
| ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
| DEFAULT_MTYPE(default_mtype)
| APE1_MTYPE(ape1_mtype);
-
- return true;
-}
-
-static int update_qpd_cik(struct device_queue_manager *dqm,
- struct qcm_process_device *qpd)
-{
- struct kfd_process_device *pdd;
- unsigned int temp;
-
- pdd = qpd_to_pdd(qpd);
-
- /* check if sh_mem_config register already configured */
- if (qpd->sh_mem_config == 0) {
- qpd->sh_mem_config =
- ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) |
- DEFAULT_MTYPE(MTYPE_NONCACHED) |
- APE1_MTYPE(MTYPE_NONCACHED);
- qpd->sh_mem_ape1_limit = 0;
- qpd->sh_mem_ape1_base = 0;
- }
-
/* On dGPU we're always in GPUVM64 addressing mode with 64-bit
* aperture addresses.
*/
- temp = get_sh_mem_bases_nybble_64(pdd);
+ temp = get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd));
qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
pr_debug("is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n",
qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases);
+out:
+ return retval;
+}
+
+static int update_qpd_cik(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd)
+{
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c
index 245a90dfc2f6..ba6e3d747ccd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c
@@ -31,10 +31,18 @@ static int update_qpd_v10(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
static void init_sdma_vm_v10(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd);
+static bool set_cache_memory_policy_v10(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ enum cache_policy default_policy,
+ enum cache_policy alternate_policy,
+ void __user *alternate_aperture_base,
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties);
void device_queue_manager_init_v10(
struct device_queue_manager_asic_ops *asic_ops)
{
+ asic_ops->set_cache_memory_policy = set_cache_memory_policy_v10;
asic_ops->update_qpd = update_qpd_v10;
asic_ops->init_sdma_vm = init_sdma_vm_v10;
asic_ops->mqd_manager_init = mqd_manager_init_v10;
@@ -49,27 +57,28 @@ static uint32_t compute_sh_mem_bases_64bit(struct kfd_process_device *pdd)
private_base;
}
-static int update_qpd_v10(struct device_queue_manager *dqm,
- struct qcm_process_device *qpd)
+static bool set_cache_memory_policy_v10(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ enum cache_policy default_policy,
+ enum cache_policy alternate_policy,
+ void __user *alternate_aperture_base,
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties)
{
- struct kfd_process_device *pdd;
-
- pdd = qpd_to_pdd(qpd);
-
- /* check if sh_mem_config register already configured */
- if (qpd->sh_mem_config == 0) {
- qpd->sh_mem_config =
- (SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
- SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) |
- (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT);
- qpd->sh_mem_ape1_limit = 0;
- qpd->sh_mem_ape1_base = 0;
- }
-
- qpd->sh_mem_bases = compute_sh_mem_bases_64bit(pdd);
+ qpd->sh_mem_config = (SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
+ SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) |
+ (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT);
+ qpd->sh_mem_ape1_limit = 0;
+ qpd->sh_mem_ape1_base = 0;
+ qpd->sh_mem_bases = compute_sh_mem_bases_64bit(qpd_to_pdd(qpd));
pr_debug("sh_mem_bases 0x%X\n", qpd->sh_mem_bases);
+ return true;
+}
+static int update_qpd_v10(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd)
+{
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c
index 2e129da7acb4..8b447d04558f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c
@@ -30,10 +30,18 @@ static int update_qpd_v11(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
static void init_sdma_vm_v11(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd);
+static bool set_cache_memory_policy_v11(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ enum cache_policy default_policy,
+ enum cache_policy alternate_policy,
+ void __user *alternate_aperture_base,
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties);
void device_queue_manager_init_v11(
struct device_queue_manager_asic_ops *asic_ops)
{
+ asic_ops->set_cache_memory_policy = set_cache_memory_policy_v11;
asic_ops->update_qpd = update_qpd_v11;
asic_ops->init_sdma_vm = init_sdma_vm_v11;
asic_ops->mqd_manager_init = mqd_manager_init_v11;
@@ -48,28 +56,29 @@ static uint32_t compute_sh_mem_bases_64bit(struct kfd_process_device *pdd)
private_base;
}
-static int update_qpd_v11(struct device_queue_manager *dqm,
- struct qcm_process_device *qpd)
+static bool set_cache_memory_policy_v11(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ enum cache_policy default_policy,
+ enum cache_policy alternate_policy,
+ void __user *alternate_aperture_base,
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties)
{
- struct kfd_process_device *pdd;
-
- pdd = qpd_to_pdd(qpd);
-
- /* check if sh_mem_config register already configured */
- if (qpd->sh_mem_config == 0) {
- qpd->sh_mem_config =
- (SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
- SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) |
- (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT);
-
- qpd->sh_mem_ape1_limit = 0;
- qpd->sh_mem_ape1_base = 0;
- }
+ qpd->sh_mem_config = (SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
+ SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) |
+ (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT);
- qpd->sh_mem_bases = compute_sh_mem_bases_64bit(pdd);
+ qpd->sh_mem_ape1_limit = 0;
+ qpd->sh_mem_ape1_base = 0;
+ qpd->sh_mem_bases = compute_sh_mem_bases_64bit(qpd_to_pdd(qpd));
pr_debug("sh_mem_bases 0x%X\n", qpd->sh_mem_bases);
+ return true;
+}
+static int update_qpd_v11(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd)
+{
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v12.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v12.c
new file mode 100644
index 000000000000..3550da3a46f9
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v12.c
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "kfd_device_queue_manager.h"
+#include "gc/gc_12_0_0_sh_mask.h"
+#include "soc24_enum.h"
+
+static int update_qpd_v12(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd);
+static void init_sdma_vm_v12(struct device_queue_manager *dqm, struct queue *q,
+ struct qcm_process_device *qpd);
+static bool set_cache_memory_policy_v12(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ enum cache_policy default_policy,
+ enum cache_policy alternate_policy,
+ void __user *alternate_aperture_base,
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties);
+
+void device_queue_manager_init_v12(
+ struct device_queue_manager_asic_ops *asic_ops)
+{
+ asic_ops->set_cache_memory_policy = set_cache_memory_policy_v12;
+ asic_ops->update_qpd = update_qpd_v12;
+ asic_ops->init_sdma_vm = init_sdma_vm_v12;
+ asic_ops->mqd_manager_init = mqd_manager_init_v12;
+}
+
+static uint32_t compute_sh_mem_bases_64bit(struct kfd_process_device *pdd)
+{
+ uint32_t shared_base = pdd->lds_base >> 48;
+ uint32_t private_base = pdd->scratch_base >> 48;
+
+ return (shared_base << SH_MEM_BASES__SHARED_BASE__SHIFT) |
+ private_base;
+}
+
+static bool set_cache_memory_policy_v12(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ enum cache_policy default_policy,
+ enum cache_policy alternate_policy,
+ void __user *alternate_aperture_base,
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties)
+{
+ qpd->sh_mem_config = (SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
+ SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) |
+ (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT);
+
+ qpd->sh_mem_ape1_limit = 0;
+ qpd->sh_mem_ape1_base = 0;
+ qpd->sh_mem_bases = compute_sh_mem_bases_64bit(qpd_to_pdd(qpd));
+
+ pr_debug("sh_mem_bases 0x%X\n", qpd->sh_mem_bases);
+ return true;
+}
+
+static int update_qpd_v12(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd)
+{
+ return 0;
+}
+
+static void init_sdma_vm_v12(struct device_queue_manager *dqm, struct queue *q,
+ struct qcm_process_device *qpd)
+{
+ /* Not needed on SDMAv4 onwards any more */
+ q->properties.sdma_vm_addr = 0;
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
index 54eb1bff903c..9fcc8c6e57b7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
@@ -30,10 +30,18 @@ static int update_qpd_v9(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
static void init_sdma_vm_v9(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd);
+static bool set_cache_memory_policy_v9(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ enum cache_policy default_policy,
+ enum cache_policy alternate_policy,
+ void __user *alternate_aperture_base,
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties);
void device_queue_manager_init_v9(
struct device_queue_manager_asic_ops *asic_ops)
{
+ asic_ops->set_cache_memory_policy = set_cache_memory_policy_v9;
asic_ops->update_qpd = update_qpd_v9;
asic_ops->init_sdma_vm = init_sdma_vm_v9;
asic_ops->mqd_manager_init = mqd_manager_init_v9;
@@ -48,10 +56,42 @@ static uint32_t compute_sh_mem_bases_64bit(struct kfd_process_device *pdd)
private_base;
}
+static bool set_cache_memory_policy_v9(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ enum cache_policy default_policy,
+ enum cache_policy alternate_policy,
+ void __user *alternate_aperture_base,
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties)
+{
+ qpd->sh_mem_config = SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
+ SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
+
+ if (dqm->dev->kfd->noretry)
+ qpd->sh_mem_config |= 1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
+
+ if (KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 3) ||
+ KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 4))
+ qpd->sh_mem_config |= (1 << SH_MEM_CONFIG__F8_MODE__SHIFT);
+
+ if (KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 5, 0)) {
+ if (misc_process_properties & KFD_PROC_FLAG_MFMA_HIGH_PRECISION)
+ qpd->sh_mem_config |= 1 << SH_MEM_CONFIG__PRECISION_MODE__SHIFT;
+ }
+
+ qpd->sh_mem_ape1_limit = 0;
+ qpd->sh_mem_ape1_base = 0;
+ qpd->sh_mem_bases = compute_sh_mem_bases_64bit(qpd_to_pdd(qpd));
+
+ pr_debug("sh_mem_bases 0x%X sh_mem_config 0x%X\n", qpd->sh_mem_bases,
+ qpd->sh_mem_config);
+ return true;
+}
+
static int update_qpd_v9(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
- struct kfd_process_device *pdd;
+ struct kfd_process_device *pdd = qpd_to_pdd(qpd);
pdd = qpd_to_pdd(qpd);
@@ -63,7 +103,8 @@ static int update_qpd_v9(struct device_queue_manager *dqm,
if (dqm->dev->kfd->noretry)
qpd->sh_mem_config |= 1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
- if (KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 3))
+ if (KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 3) ||
+ KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 4))
qpd->sh_mem_config |=
(1 << SH_MEM_CONFIG__F8_MODE__SHIFT);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
index b291ee0fab94..dad83356e976 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
@@ -27,12 +27,21 @@
#include "gca/gfx_8_0_sh_mask.h"
#include "oss/oss_3_0_sh_mask.h"
+/*
+ * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to
+ * stay in user mode.
+ */
+#define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL
+/* APE1 limit is inclusive and 64K aligned. */
+#define APE1_LIMIT_ALIGNMENT 0xFFFF
+
static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
enum cache_policy default_policy,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
- uint64_t alternate_aperture_size);
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties);
static int update_qpd_vi(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
static void init_sdma_vm(struct device_queue_manager *dqm,
@@ -81,10 +90,41 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
enum cache_policy default_policy,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
- uint64_t alternate_aperture_size)
+ uint64_t alternate_aperture_size,
+ u32 misc_process_properties)
{
uint32_t default_mtype;
uint32_t ape1_mtype;
+ unsigned int temp;
+ bool retval = true;
+
+ if (alternate_aperture_size == 0) {
+ /* base > limit disables APE1 */
+ qpd->sh_mem_ape1_base = 1;
+ qpd->sh_mem_ape1_limit = 0;
+ } else {
+ /*
+ * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]},
+ * SH_MEM_APE1_BASE[31:0], 0x0000 }
+ * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]},
+ * SH_MEM_APE1_LIMIT[31:0], 0xFFFF }
+ * Verify that the base and size parameters can be
+ * represented in this format and convert them.
+ * Additionally restrict APE1 to user-mode addresses.
+ */
+
+ uint64_t base = (uintptr_t)alternate_aperture_base;
+ uint64_t limit = base + alternate_aperture_size - 1;
+
+ if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 ||
+ (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) {
+ retval = false;
+ goto out;
+ }
+
+ qpd->sh_mem_ape1_base = base >> 16;
+ qpd->sh_mem_ape1_limit = limit >> 16;
+ }
default_mtype = (default_policy == cache_policy_coherent) ?
MTYPE_UC :
@@ -100,40 +140,21 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
default_mtype << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
ape1_mtype << SH_MEM_CONFIG__APE1_MTYPE__SHIFT;
- return true;
-}
-
-static int update_qpd_vi(struct device_queue_manager *dqm,
- struct qcm_process_device *qpd)
-{
- struct kfd_process_device *pdd;
- unsigned int temp;
-
- pdd = qpd_to_pdd(qpd);
-
- /* check if sh_mem_config register already configured */
- if (qpd->sh_mem_config == 0) {
- qpd->sh_mem_config =
- SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
- SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
- MTYPE_UC <<
- SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
- MTYPE_UC <<
- SH_MEM_CONFIG__APE1_MTYPE__SHIFT;
-
- qpd->sh_mem_ape1_limit = 0;
- qpd->sh_mem_ape1_base = 0;
- }
-
/* On dGPU we're always in GPUVM64 addressing mode with 64-bit
* aperture addresses.
*/
- temp = get_sh_mem_bases_nybble_64(pdd);
+ temp = get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd));
qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
pr_debug("sh_mem_bases nybble: 0x%X and register 0x%X\n",
temp, qpd->sh_mem_bases);
+out:
+ return retval;
+}
+static int update_qpd_vi(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd)
+{
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 9b33d9d2c9ad..2b294ada3ec0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -31,6 +31,7 @@
#include <linux/memory.h>
#include "kfd_priv.h"
#include "kfd_events.h"
+#include "kfd_device_queue_manager.h"
#include <linux/device.h>
/*
@@ -726,7 +727,7 @@ void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id,
* to process context, kfd_process could attempt to exit while we are
* running so the lookup function increments the process ref count.
*/
- struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
+ struct kfd_process *p = kfd_lookup_process_by_pasid(pasid, NULL);
if (!p)
return; /* Presumably process exited. */
@@ -747,6 +748,16 @@ void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id,
uint64_t *slots = page_slots(p->signal_page);
uint32_t id;
+ /*
+ * If id is valid but slot is not signaled, GPU may signal the same event twice
+ * before driver have chance to process the first interrupt, then signal slot is
+ * auto-reset after set_event wakeup the user space, just drop the second event as
+ * the application only need wakeup once.
+ */
+ if ((valid_id_bits > 31 || (1U << valid_id_bits) >= KFD_SIGNAL_EVENT_LIMIT) &&
+ partial_id < KFD_SIGNAL_EVENT_LIMIT && slots[partial_id] == UNSIGNALED_EVENT_SLOT)
+ goto out_unlock;
+
if (valid_id_bits)
pr_debug_ratelimited("Partial ID invalid: %u (%u valid bits)\n",
partial_id, valid_id_bits);
@@ -775,6 +786,7 @@ void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id,
}
}
+out_unlock:
rcu_read_unlock();
kfd_unref_process(p);
}
@@ -1127,8 +1139,8 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p,
if (type == KFD_EVENT_TYPE_MEMORY) {
dev_warn(kfd_device,
- "Sending SIGSEGV to process %d (pasid 0x%x)",
- p->lead_thread->pid, p->pasid);
+ "Sending SIGSEGV to process pid %d",
+ p->lead_thread->pid);
send_sig(SIGSEGV, p->lead_thread, 0);
}
@@ -1136,13 +1148,13 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p,
if (send_signal) {
if (send_sigterm) {
dev_warn(kfd_device,
- "Sending SIGTERM to process %d (pasid 0x%x)",
- p->lead_thread->pid, p->pasid);
+ "Sending SIGTERM to process pid %d",
+ p->lead_thread->pid);
send_sig(SIGTERM, p->lead_thread, 0);
} else {
dev_err(kfd_device,
- "Process %d (pasid 0x%x) got unhandled exception",
- p->lead_thread->pid, p->pasid);
+ "Process pid %d got unhandled exception",
+ p->lead_thread->pid);
}
}
@@ -1156,7 +1168,7 @@ void kfd_signal_hw_exception_event(u32 pasid)
* to process context, kfd_process could attempt to exit while we are
* running so the lookup function increments the process ref count.
*/
- struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
+ struct kfd_process *p = kfd_lookup_process_by_pasid(pasid, NULL);
if (!p)
return; /* Presumably process exited. */
@@ -1165,22 +1177,39 @@ void kfd_signal_hw_exception_event(u32 pasid)
kfd_unref_process(p);
}
-void kfd_signal_vm_fault_event(struct kfd_node *dev, u32 pasid,
+void kfd_signal_vm_fault_event_with_userptr(struct kfd_process *p, uint64_t gpu_va)
+{
+ struct kfd_process_device *pdd;
+ struct kfd_hsa_memory_exception_data exception_data;
+ int i;
+
+ memset(&exception_data, 0, sizeof(exception_data));
+ exception_data.va = gpu_va;
+ exception_data.failure.NotPresent = 1;
+
+ // Send VM seg fault to all kfd process device
+ for (i = 0; i < p->n_pdds; i++) {
+ pdd = p->pdds[i];
+ exception_data.gpu_id = pdd->user_gpu_id;
+ kfd_evict_process_device(pdd);
+ kfd_signal_vm_fault_event(pdd, NULL, &exception_data);
+ }
+}
+
+void kfd_signal_vm_fault_event(struct kfd_process_device *pdd,
struct kfd_vm_fault_info *info,
struct kfd_hsa_memory_exception_data *data)
{
struct kfd_event *ev;
uint32_t id;
- struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
+ struct kfd_process *p = pdd->process;
struct kfd_hsa_memory_exception_data memory_exception_data;
int user_gpu_id;
- if (!p)
- return; /* Presumably process exited. */
-
- user_gpu_id = kfd_process_get_user_gpu_id(p, dev->id);
+ user_gpu_id = kfd_process_get_user_gpu_id(p, pdd->dev->id);
if (unlikely(user_gpu_id == -EINVAL)) {
- WARN_ONCE(1, "Could not get user_gpu_id from dev->id:%x\n", dev->id);
+ WARN_ONCE(1, "Could not get user_gpu_id from dev->id:%x\n",
+ pdd->dev->id);
return;
}
@@ -1217,7 +1246,6 @@ void kfd_signal_vm_fault_event(struct kfd_node *dev, u32 pasid,
}
rcu_read_unlock();
- kfd_unref_process(p);
}
void kfd_signal_reset_event(struct kfd_node *dev)
@@ -1244,12 +1272,41 @@ void kfd_signal_reset_event(struct kfd_node *dev)
idx = srcu_read_lock(&kfd_processes_srcu);
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
int user_gpu_id = kfd_process_get_user_gpu_id(p, dev->id);
+ struct kfd_process_device *pdd = kfd_get_process_device_data(dev, p);
if (unlikely(user_gpu_id == -EINVAL)) {
WARN_ONCE(1, "Could not get user_gpu_id from dev->id:%x\n", dev->id);
continue;
}
+ if (unlikely(!pdd)) {
+ WARN_ONCE(1, "Could not get device data from process pid:%d\n",
+ p->lead_thread->pid);
+ continue;
+ }
+
+ if (dev->dqm->detect_hang_count && !pdd->has_reset_queue)
+ continue;
+
+ if (dev->dqm->detect_hang_count) {
+ struct amdgpu_task_info *ti;
+ struct amdgpu_fpriv *drv_priv;
+
+ if (unlikely(amdgpu_file_to_fpriv(pdd->drm_file, &drv_priv))) {
+ WARN_ONCE(1, "Could not get vm for device %x from pid:%d\n",
+ dev->id, p->lead_thread->pid);
+ continue;
+ }
+
+ ti = amdgpu_vm_get_task_info_vm(&drv_priv->vm);
+ if (ti) {
+ dev_err(dev->adev->dev,
+ "Queues reset on process %s tid %d thread %s pid %d\n",
+ ti->process_name, ti->tgid, ti->task_name, ti->pid);
+ amdgpu_vm_put_task_info(ti);
+ }
+ }
+
rcu_read_lock();
id = KFD_FIRST_NONSIGNAL_EVENT_ID;
@@ -1278,7 +1335,7 @@ void kfd_signal_reset_event(struct kfd_node *dev)
void kfd_signal_poison_consumed_event(struct kfd_node *dev, u32 pasid)
{
- struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
+ struct kfd_process *p = kfd_lookup_process_by_pasid(pasid, NULL);
struct kfd_hsa_memory_exception_data memory_exception_data;
struct kfd_hsa_hw_exception_data hw_exception_data;
struct kfd_event *ev;
@@ -1293,6 +1350,7 @@ void kfd_signal_poison_consumed_event(struct kfd_node *dev, u32 pasid)
user_gpu_id = kfd_process_get_user_gpu_id(p, dev->id);
if (unlikely(user_gpu_id == -EINVAL)) {
WARN_ONCE(1, "Could not get user_gpu_id from dev->id:%x\n", dev->id);
+ kfd_unref_process(p);
return;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
index 4a64307bc438..dbcb60eb54b2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
@@ -380,7 +380,8 @@ int kfd_init_apertures(struct kfd_process *process)
pdd = kfd_create_process_device_data(dev, process);
if (!pdd) {
- pr_err("Failed to create process device data\n");
+ dev_err(dev->adev->dev,
+ "Failed to create process device data\n");
return -ENOMEM;
}
/*
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
index 9a06c6fb6605..3e1ad8974797 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
@@ -129,60 +129,6 @@ enum SQ_INTERRUPT_ERROR_TYPE {
KFD_DEBUG_CP_BAD_OP_ECODE_MASK) \
>> KFD_DEBUG_CP_BAD_OP_ECODE_SHIFT)
-static void event_interrupt_poison_consumption(struct kfd_node *dev,
- uint16_t pasid, uint16_t client_id)
-{
- enum amdgpu_ras_block block = 0;
- int old_poison, ret = -EINVAL;
- struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
-
- if (!p)
- return;
-
- /* all queues of a process will be unmapped in one time */
- old_poison = atomic_cmpxchg(&p->poison, 0, 1);
- kfd_unref_process(p);
- if (old_poison)
- return;
-
- switch (client_id) {
- case SOC15_IH_CLIENTID_SE0SH:
- case SOC15_IH_CLIENTID_SE1SH:
- case SOC15_IH_CLIENTID_SE2SH:
- case SOC15_IH_CLIENTID_SE3SH:
- case SOC15_IH_CLIENTID_UTCL2:
- ret = kfd_dqm_evict_pasid(dev->dqm, pasid);
- block = AMDGPU_RAS_BLOCK__GFX;
- break;
- case SOC15_IH_CLIENTID_SDMA0:
- case SOC15_IH_CLIENTID_SDMA1:
- case SOC15_IH_CLIENTID_SDMA2:
- case SOC15_IH_CLIENTID_SDMA3:
- case SOC15_IH_CLIENTID_SDMA4:
- block = AMDGPU_RAS_BLOCK__SDMA;
- break;
- default:
- break;
- }
-
- kfd_signal_poison_consumed_event(dev, pasid);
-
- /* resetting queue passes, do page retirement without gpu reset
- * resetting queue fails, fallback to gpu reset solution
- */
- if (!ret) {
- dev_warn(dev->adev->dev,
- "RAS poison consumption, unmap queue flow succeeded: client id %d\n",
- client_id);
- amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, block, false);
- } else {
- dev_warn(dev->adev->dev,
- "RAS poison consumption, fall back to gpu reset flow: client id %d\n",
- client_id);
- amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, block, true);
- }
-}
-
static bool event_interrupt_isr_v10(struct kfd_node *dev,
const uint32_t *ih_ring_entry,
uint32_t *patched_ihre,
@@ -222,14 +168,14 @@ static bool event_interrupt_isr_v10(struct kfd_node *dev,
client_id != SOC15_IH_CLIENTID_SE3SH)
return false;
- pr_debug("client id 0x%x, source id %d, vmid %d, pasid 0x%x. raw data:\n",
- client_id, source_id, vmid, pasid);
- pr_debug("%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n",
- data[0], data[1], data[2], data[3],
- data[4], data[5], data[6], data[7]);
+ dev_dbg(dev->adev->dev,
+ "client id 0x%x, source id %d, vmid %d, pasid 0x%x. raw data:\n",
+ client_id, source_id, vmid, pasid);
+ dev_dbg(dev->adev->dev, "%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n",
+ data[0], data[1], data[2], data[3], data[4], data[5], data[6],
+ data[7]);
- /* If there is no valid PASID, it's likely a bug */
- if (WARN_ONCE(pasid == 0, "Bug: No PASID in KFD interrupt"))
+ if (pasid == 0)
return 0;
/* Interrupt types we care about: various signals and faults.
@@ -271,37 +217,66 @@ static void event_interrupt_wq_v10(struct kfd_node *dev,
SQ_INTERRUPT_WORD_WAVE_CTXID1, ENCODING);
switch (encoding) {
case SQ_INTERRUPT_WORD_ENCODING_AUTO:
- pr_debug_ratelimited(
+ dev_dbg_ratelimited(
+ dev->adev->dev,
"sq_intr: auto, se %d, ttrace %d, wlt %d, ttrac_buf0_full %d, ttrac_buf1_full %d, ttrace_utc_err %d\n",
- REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_AUTO_CTXID1,
- SE_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
- THREAD_TRACE),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
- WLT),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
- THREAD_TRACE_BUF0_FULL),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
- THREAD_TRACE_BUF1_FULL),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
- THREAD_TRACE_UTC_ERROR));
+ REG_GET_FIELD(
+ context_id1,
+ SQ_INTERRUPT_WORD_AUTO_CTXID1,
+ SE_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID0,
+ THREAD_TRACE),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID0,
+ WLT),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID0,
+ THREAD_TRACE_BUF0_FULL),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID0,
+ THREAD_TRACE_BUF1_FULL),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID0,
+ THREAD_TRACE_UTC_ERROR));
break;
case SQ_INTERRUPT_WORD_ENCODING_INST:
- pr_debug_ratelimited("sq_intr: inst, se %d, data 0x%x, sa %d, priv %d, wave_id %d, simd_id %d, wgp_id %d\n",
- REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1,
- SE_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
- DATA),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
- SA_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
- PRIV),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
- WAVE_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
- SIMD_ID),
- REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1,
- WGP_ID));
+ dev_dbg_ratelimited(
+ dev->adev->dev,
+ "sq_intr: inst, se %d, data 0x%x, sa %d, priv %d, wave_id %d, simd_id %d, wgp_id %d\n",
+ REG_GET_FIELD(
+ context_id1,
+ SQ_INTERRUPT_WORD_WAVE_CTXID1,
+ SE_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID0,
+ DATA),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID0,
+ SA_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID0,
+ PRIV),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID0,
+ WAVE_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID0,
+ SIMD_ID),
+ REG_GET_FIELD(
+ context_id1,
+ SQ_INTERRUPT_WORD_WAVE_CTXID1,
+ WGP_ID));
if (context_id0 & SQ_INTERRUPT_WORD_WAVE_CTXID0__PRIV_MASK) {
if (kfd_set_dbg_ev_from_interrupt(dev, pasid,
KFD_DEBUG_DOORBELL_ID(context_id0),
@@ -313,33 +288,45 @@ static void event_interrupt_wq_v10(struct kfd_node *dev,
case SQ_INTERRUPT_WORD_ENCODING_ERROR:
sq_intr_err_type = REG_GET_FIELD(context_id0, KFD_CTXID0,
ERR_TYPE);
- pr_warn_ratelimited("sq_intr: error, se %d, data 0x%x, sa %d, priv %d, wave_id %d, simd_id %d, wgp_id %d, err_type %d\n",
- REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1,
- SE_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
- DATA),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
- SA_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
- PRIV),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
- WAVE_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
- SIMD_ID),
- REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1,
- WGP_ID),
+ dev_warn_ratelimited(
+ dev->adev->dev,
+ "sq_intr: error, se %d, data 0x%x, sa %d, priv %d, wave_id %d, simd_id %d, wgp_id %d, err_type %d\n",
+ REG_GET_FIELD(
+ context_id1,
+ SQ_INTERRUPT_WORD_WAVE_CTXID1,
+ SE_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID0,
+ DATA),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID0,
+ SA_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID0,
+ PRIV),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID0,
+ WAVE_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID0,
+ SIMD_ID),
+ REG_GET_FIELD(
+ context_id1,
+ SQ_INTERRUPT_WORD_WAVE_CTXID1,
+ WGP_ID),
sq_intr_err_type);
- if (sq_intr_err_type != SQ_INTERRUPT_ERROR_TYPE_ILLEGAL_INST &&
- sq_intr_err_type != SQ_INTERRUPT_ERROR_TYPE_MEMVIOL) {
- event_interrupt_poison_consumption(dev, pasid, source_id);
- return;
- }
break;
default:
break;
}
kfd_signal_event_interrupt(pasid, context_id0 & 0x7fffff, 23);
- } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) {
+ } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE &&
+ KFD_DBG_EC_TYPE_IS_PACKET(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0))) {
kfd_set_dbg_ev_from_interrupt(dev, pasid,
KFD_DEBUG_DOORBELL_ID(context_id0),
KFD_EC_MASK(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0)),
@@ -358,9 +345,6 @@ static void event_interrupt_wq_v10(struct kfd_node *dev,
client_id == SOC15_IH_CLIENTID_SDMA7) {
if (source_id == SOC15_INTSRC_SDMA_TRAP) {
kfd_signal_event_interrupt(pasid, context_id0 & 0xfffffff, 28);
- } else if (source_id == SOC15_INTSRC_SDMA_ECC) {
- event_interrupt_poison_consumption(dev, pasid, source_id);
- return;
}
} else if (client_id == SOC15_IH_CLIENTID_VMC ||
client_id == SOC15_IH_CLIENTID_VMC1 ||
@@ -369,12 +353,6 @@ static void event_interrupt_wq_v10(struct kfd_node *dev,
uint16_t ring_id = SOC15_RING_ID_FROM_IH_ENTRY(ih_ring_entry);
struct kfd_hsa_memory_exception_data exception_data;
- if (client_id == SOC15_IH_CLIENTID_UTCL2 &&
- amdgpu_amdkfd_ras_query_utcl2_poison_status(dev->adev)) {
- event_interrupt_poison_consumption(dev, pasid, client_id);
- return;
- }
-
info.vmid = vmid;
info.mc_id = client_id;
info.page_addr = ih_ring_entry[4] |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c
index 7e2859736a55..2788a52714d1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c
@@ -148,44 +148,69 @@ enum SQ_INTERRUPT_ERROR_TYPE {
#define KFD_CTXID0_DOORBELL_ID(ctxid0) ((ctxid0) & \
KFD_CTXID0_DOORBELL_ID_MASK)
-static void print_sq_intr_info_auto(uint32_t context_id0, uint32_t context_id1)
+static void print_sq_intr_info_auto(struct kfd_node *dev, uint32_t context_id0,
+ uint32_t context_id1)
{
- pr_debug_ratelimited(
+ dev_dbg_ratelimited(
+ dev->adev->dev,
"sq_intr: auto, ttrace %d, wlt %d, ttrace_buf_full %d, reg_tms %d, cmd_tms %d, host_cmd_ovf %d, host_reg_ovf %d, immed_ovf %d, ttrace_utc_err %d\n",
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, THREAD_TRACE),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
+ THREAD_TRACE),
REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, WLT),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, THREAD_TRACE_BUF_FULL),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, REG_TIMESTAMP),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, CMD_TIMESTAMP),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, HOST_CMD_OVERFLOW),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, HOST_REG_OVERFLOW),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, IMMED_OVERFLOW),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, THREAD_TRACE_UTC_ERROR));
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
+ THREAD_TRACE_BUF_FULL),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
+ REG_TIMESTAMP),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
+ CMD_TIMESTAMP),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
+ HOST_CMD_OVERFLOW),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
+ HOST_REG_OVERFLOW),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
+ IMMED_OVERFLOW),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
+ THREAD_TRACE_UTC_ERROR));
}
-static void print_sq_intr_info_inst(uint32_t context_id0, uint32_t context_id1)
+static void print_sq_intr_info_inst(struct kfd_node *dev, uint32_t context_id0,
+ uint32_t context_id1)
{
- pr_debug_ratelimited(
+ dev_dbg_ratelimited(
+ dev->adev->dev,
"sq_intr: inst, data 0x%08x, sh %d, priv %d, wave_id %d, simd_id %d, wgp_id %d\n",
REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, DATA),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, SH_ID),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
+ SH_ID),
REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, PRIV),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, WAVE_ID),
- REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1, SIMD_ID),
- REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1, WGP_ID));
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
+ WAVE_ID),
+ REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1,
+ SIMD_ID),
+ REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1,
+ WGP_ID));
}
-static void print_sq_intr_info_error(uint32_t context_id0, uint32_t context_id1)
+static void print_sq_intr_info_error(struct kfd_node *dev, uint32_t context_id0,
+ uint32_t context_id1)
{
- pr_warn_ratelimited(
+ dev_warn_ratelimited(
+ dev->adev->dev,
"sq_intr: error, detail 0x%08x, type %d, sh %d, priv %d, wave_id %d, simd_id %d, wgp_id %d\n",
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, DETAIL),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, TYPE),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, SH_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, PRIV),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, WAVE_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID1, SIMD_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID1, WGP_ID));
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0,
+ DETAIL),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0,
+ TYPE),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0,
+ SH_ID),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0,
+ PRIV),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0,
+ WAVE_ID),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID1,
+ SIMD_ID),
+ REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID1,
+ WGP_ID));
}
static void event_interrupt_poison_consumption_v11(struct kfd_node *dev,
@@ -193,7 +218,8 @@ static void event_interrupt_poison_consumption_v11(struct kfd_node *dev,
{
enum amdgpu_ras_block block = 0;
int ret = -EINVAL;
- struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
+ uint32_t reset = 0;
+ struct kfd_process *p = kfd_lookup_process_by_pasid(pasid, NULL);
if (!p)
return;
@@ -212,10 +238,13 @@ static void event_interrupt_poison_consumption_v11(struct kfd_node *dev,
if (dev->dqm->ops.reset_queues)
ret = dev->dqm->ops.reset_queues(dev->dqm, pasid);
block = AMDGPU_RAS_BLOCK__GFX;
+ if (ret)
+ reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
break;
case SOC21_INTSRC_SDMA_ECC:
default:
block = AMDGPU_RAS_BLOCK__GFX;
+ reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
break;
}
@@ -223,10 +252,7 @@ static void event_interrupt_poison_consumption_v11(struct kfd_node *dev,
/* resetting queue passes, do page retirement without gpu reset
resetting queue fails, fallback to gpu reset solution */
- if (!ret)
- amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, block, false);
- else
- amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, block, true);
+ amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, block, reset);
}
static bool event_interrupt_isr_v11(struct kfd_node *dev,
@@ -254,14 +280,14 @@ static bool event_interrupt_isr_v11(struct kfd_node *dev,
(context_id0 & AMDGPU_FENCE_MES_QUEUE_FLAG))
return false;
- pr_debug("client id 0x%x, source id %d, vmid %d, pasid 0x%x. raw data:\n",
- client_id, source_id, vmid, pasid);
- pr_debug("%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n",
- data[0], data[1], data[2], data[3],
- data[4], data[5], data[6], data[7]);
+ dev_dbg(dev->adev->dev,
+ "client id 0x%x, source id %d, vmid %d, pasid 0x%x. raw data:\n",
+ client_id, source_id, vmid, pasid);
+ dev_dbg(dev->adev->dev, "%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n",
+ data[0], data[1], data[2], data[3], data[4], data[5], data[6],
+ data[7]);
- /* If there is no valid PASID, it's likely a bug */
- if (WARN_ONCE(pasid == 0, "Bug: No PASID in KFD interrupt"))
+ if (pasid == 0)
return false;
/* Interrupt types we care about: various signals and faults.
@@ -328,11 +354,15 @@ static void event_interrupt_wq_v11(struct kfd_node *dev,
/* CP */
if (source_id == SOC15_INTSRC_CP_END_OF_PIPE)
kfd_signal_event_interrupt(pasid, context_id0, 32);
- else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE)
- kfd_set_dbg_ev_from_interrupt(dev, pasid,
- KFD_CTXID0_DOORBELL_ID(context_id0),
+ else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE &&
+ KFD_DBG_EC_TYPE_IS_PACKET(KFD_CTXID0_CP_BAD_OP_ECODE(context_id0))) {
+ u32 doorbell_id = KFD_CTXID0_DOORBELL_ID(context_id0);
+
+ kfd_set_dbg_ev_from_interrupt(dev, pasid, doorbell_id,
KFD_EC_MASK(KFD_CTXID0_CP_BAD_OP_ECODE(context_id0)),
NULL, 0);
+ kfd_dqm_suspend_bad_queue_mes(dev, pasid, doorbell_id);
+ }
/* SDMA */
else if (source_id == SOC21_INTSRC_SDMA_TRAP)
@@ -348,10 +378,10 @@ static void event_interrupt_wq_v11(struct kfd_node *dev,
SQ_INTERRUPT_WORD_WAVE_CTXID1, ENCODING);
switch (sq_int_enc) {
case SQ_INTERRUPT_WORD_ENCODING_AUTO:
- print_sq_intr_info_auto(context_id0, context_id1);
+ print_sq_intr_info_auto(dev, context_id0, context_id1);
break;
case SQ_INTERRUPT_WORD_ENCODING_INST:
- print_sq_intr_info_inst(context_id0, context_id1);
+ print_sq_intr_info_inst(dev, context_id0, context_id1);
sq_int_priv = REG_GET_FIELD(context_id0,
SQ_INTERRUPT_WORD_WAVE_CTXID0, PRIV);
if (sq_int_priv && (kfd_set_dbg_ev_from_interrupt(dev, pasid,
@@ -361,7 +391,7 @@ static void event_interrupt_wq_v11(struct kfd_node *dev,
return;
break;
case SQ_INTERRUPT_WORD_ENCODING_ERROR:
- print_sq_intr_info_error(context_id0, context_id1);
+ print_sq_intr_info_error(dev, context_id0, context_id1);
sq_int_errtype = REG_GET_FIELD(context_id0,
SQ_INTERRUPT_WORD_ERROR_CTXID0, TYPE);
if (sq_int_errtype != SQ_INTERRUPT_ERROR_TYPE_ILLEGAL_INST &&
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
index 91dd5e045b51..4ceb251312a6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
@@ -27,6 +27,7 @@
#include "soc15_int.h"
#include "kfd_device_queue_manager.h"
#include "kfd_smi_events.h"
+#include "amdgpu_ras.h"
/*
* GFX9 SQ Interrupts
@@ -144,8 +145,11 @@ static void event_interrupt_poison_consumption_v9(struct kfd_node *dev,
uint16_t pasid, uint16_t client_id)
{
enum amdgpu_ras_block block = 0;
- int old_poison, ret = -EINVAL;
- struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
+ uint32_t reset = 0;
+ struct kfd_process *p = kfd_lookup_process_by_pasid(pasid, NULL);
+ enum ras_event_type type = RAS_EVENT_TYPE_POISON_CONSUMPTION;
+ u64 event_id;
+ int old_poison, ret;
if (!p)
return;
@@ -162,8 +166,30 @@ static void event_interrupt_poison_consumption_v9(struct kfd_node *dev,
case SOC15_IH_CLIENTID_SE2SH:
case SOC15_IH_CLIENTID_SE3SH:
case SOC15_IH_CLIENTID_UTCL2:
- ret = kfd_dqm_evict_pasid(dev->dqm, pasid);
block = AMDGPU_RAS_BLOCK__GFX;
+ if (amdgpu_ip_version(dev->adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) {
+ /* driver mode-2 for gfx poison is only supported by
+ * pmfw 0x00557300 and onwards */
+ if (dev->adev->pm.fw_version < 0x00557300)
+ reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET;
+ else
+ reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
+ } else if (amdgpu_ip_version(dev->adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) {
+ /* driver mode-2 for gfx poison is only supported by
+ * pmfw 0x05550C00 and onwards */
+ if (dev->adev->pm.fw_version < 0x05550C00)
+ reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET;
+ else
+ reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
+ } else {
+ reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
+ }
+ amdgpu_ras_set_err_poison(dev->adev, AMDGPU_RAS_BLOCK__GFX);
+ break;
+ case SOC15_IH_CLIENTID_VMC:
+ case SOC15_IH_CLIENTID_VMC1:
+ block = AMDGPU_RAS_BLOCK__MMHUB;
+ reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET;
break;
case SOC15_IH_CLIENTID_SDMA0:
case SOC15_IH_CLIENTID_SDMA1:
@@ -171,27 +197,44 @@ static void event_interrupt_poison_consumption_v9(struct kfd_node *dev,
case SOC15_IH_CLIENTID_SDMA3:
case SOC15_IH_CLIENTID_SDMA4:
block = AMDGPU_RAS_BLOCK__SDMA;
+ if (amdgpu_ip_version(dev->adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 2)) {
+ /* driver mode-2 for gfx poison is only supported by
+ * pmfw 0x00557300 and onwards */
+ if (dev->adev->pm.fw_version < 0x00557300)
+ reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET;
+ else
+ reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
+ } else if (amdgpu_ip_version(dev->adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 5)) {
+ /* driver mode-2 for gfx poison is only supported by
+ * pmfw 0x05550C00 and onwards */
+ if (dev->adev->pm.fw_version < 0x05550C00)
+ reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET;
+ else
+ reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
+ } else {
+ reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
+ }
+ amdgpu_ras_set_err_poison(dev->adev, AMDGPU_RAS_BLOCK__SDMA);
break;
default:
- break;
+ dev_warn(dev->adev->dev,
+ "client %d does not support poison consumption\n", client_id);
+ return;
}
+ ret = amdgpu_ras_mark_ras_event(dev->adev, type);
+ if (ret)
+ return;
+
kfd_signal_poison_consumed_event(dev, pasid);
- /* resetting queue passes, do page retirement without gpu reset
- * resetting queue fails, fallback to gpu reset solution
- */
- if (!ret) {
- dev_warn(dev->adev->dev,
- "RAS poison consumption, unmap queue flow succeeded: client id %d\n",
- client_id);
- amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, block, false);
- } else {
- dev_warn(dev->adev->dev,
- "RAS poison consumption, fall back to gpu reset flow: client id %d\n",
- client_id);
- amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, block, true);
- }
+ event_id = amdgpu_ras_acquire_event_id(dev->adev, type);
+
+ RAS_EVENT_LOG(dev->adev, event_id,
+ "poison is consumed by client %d, kick off gpu reset flow\n", client_id);
+
+ amdgpu_amdkfd_ras_pasid_poison_consumption_handler(dev->adev,
+ block, pasid, NULL, NULL, reset);
}
static bool context_id_expected(struct kfd_dev *dev)
@@ -271,11 +314,12 @@ static bool event_interrupt_isr_v9(struct kfd_node *dev,
& ~pasid_mask) | pasid);
}
- pr_debug("client id 0x%x, source id %d, vmid %d, pasid 0x%x. raw data:\n",
- client_id, source_id, vmid, pasid);
- pr_debug("%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n",
- data[0], data[1], data[2], data[3],
- data[4], data[5], data[6], data[7]);
+ dev_dbg(dev->adev->dev,
+ "client id 0x%x, source id %d, vmid %d, pasid 0x%x. raw data:\n",
+ client_id, source_id, vmid, pasid);
+ dev_dbg(dev->adev->dev, "%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n",
+ data[0], data[1], data[2], data[3], data[4], data[5], data[6],
+ data[7]);
/* If there is no valid PASID, it's likely a bug */
if (WARN_ONCE(pasid == 0, "Bug: No PASID in KFD interrupt"))
@@ -336,28 +380,82 @@ static void event_interrupt_wq_v9(struct kfd_node *dev,
encoding = REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, ENCODING);
switch (encoding) {
case SQ_INTERRUPT_WORD_ENCODING_AUTO:
- pr_debug_ratelimited(
+ dev_dbg_ratelimited(
+ dev->adev->dev,
"sq_intr: auto, se %d, ttrace %d, wlt %d, ttrac_buf_full %d, reg_tms %d, cmd_tms %d, host_cmd_ovf %d, host_reg_ovf %d, immed_ovf %d, ttrace_utc_err %d\n",
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, SE_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, THREAD_TRACE),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, WLT),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, THREAD_TRACE_BUF_FULL),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, REG_TIMESTAMP),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, CMD_TIMESTAMP),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, HOST_CMD_OVERFLOW),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, HOST_REG_OVERFLOW),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, IMMED_OVERFLOW),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, THREAD_TRACE_UTC_ERROR));
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID,
+ SE_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID,
+ THREAD_TRACE),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID,
+ WLT),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID,
+ THREAD_TRACE_BUF_FULL),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID,
+ REG_TIMESTAMP),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID,
+ CMD_TIMESTAMP),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID,
+ HOST_CMD_OVERFLOW),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID,
+ HOST_REG_OVERFLOW),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID,
+ IMMED_OVERFLOW),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_AUTO_CTXID,
+ THREAD_TRACE_UTC_ERROR));
break;
case SQ_INTERRUPT_WORD_ENCODING_INST:
- pr_debug_ratelimited("sq_intr: inst, se %d, data 0x%x, sh %d, priv %d, wave_id %d, simd_id %d, cu_id %d, intr_data 0x%x\n",
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SE_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, DATA),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SH_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, PRIV),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, WAVE_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SIMD_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, CU_ID),
+ dev_dbg_ratelimited(
+ dev->adev->dev,
+ "sq_intr: inst, se %d, data 0x%x, sh %d, priv %d, wave_id %d, simd_id %d, cu_id %d, intr_data 0x%x\n",
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID,
+ SE_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID,
+ DATA),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID,
+ SH_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID,
+ PRIV),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID,
+ WAVE_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID,
+ SIMD_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID,
+ CU_ID),
sq_int_data);
if (context_id0 & SQ_INTERRUPT_WORD_WAVE_CTXID__PRIV_MASK) {
if (kfd_set_dbg_ev_from_interrupt(dev, pasid,
@@ -369,14 +467,37 @@ static void event_interrupt_wq_v9(struct kfd_node *dev,
break;
case SQ_INTERRUPT_WORD_ENCODING_ERROR:
sq_intr_err = REG_GET_FIELD(sq_int_data, KFD_SQ_INT_DATA, ERR_TYPE);
- pr_warn_ratelimited("sq_intr: error, se %d, data 0x%x, sh %d, priv %d, wave_id %d, simd_id %d, cu_id %d, err_type %d\n",
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SE_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, DATA),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SH_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, PRIV),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, WAVE_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SIMD_ID),
- REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, CU_ID),
+ dev_warn_ratelimited(
+ dev->adev->dev,
+ "sq_intr: error, se %d, data 0x%x, sh %d, priv %d, wave_id %d, simd_id %d, cu_id %d, err_type %d\n",
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID,
+ SE_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID,
+ DATA),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID,
+ SH_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID,
+ PRIV),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID,
+ WAVE_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID,
+ SIMD_ID),
+ REG_GET_FIELD(
+ context_id0,
+ SQ_INTERRUPT_WORD_WAVE_CTXID,
+ CU_ID),
sq_intr_err);
if (sq_intr_err != SQ_INTERRUPT_ERROR_TYPE_ILLEGAL_INST &&
sq_intr_err != SQ_INTERRUPT_ERROR_TYPE_MEMVIOL) {
@@ -388,7 +509,8 @@ static void event_interrupt_wq_v9(struct kfd_node *dev,
break;
}
kfd_signal_event_interrupt(pasid, sq_int_data, 24);
- } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) {
+ } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE &&
+ KFD_DBG_EC_TYPE_IS_PACKET(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0))) {
kfd_set_dbg_ev_from_interrupt(dev, pasid,
KFD_DEBUG_DOORBELL_ID(context_id0),
KFD_EC_MASK(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0)),
@@ -415,8 +537,7 @@ static void event_interrupt_wq_v9(struct kfd_node *dev,
uint16_t ring_id = SOC15_RING_ID_FROM_IH_ENTRY(ih_ring_entry);
struct kfd_hsa_memory_exception_data exception_data;
- if (client_id == SOC15_IH_CLIENTID_UTCL2 &&
- amdgpu_amdkfd_ras_query_utcl2_poison_status(dev->adev)) {
+ if (source_id == SOC15_INTSRC_VMC_UTCL2_POISON) {
event_interrupt_poison_consumption_v9(dev, pasid, client_id);
return;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
index dd3c43c1ad70..783c2f5a04e4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
@@ -46,7 +46,7 @@
#include <linux/kfifo.h>
#include "kfd_priv.h"
-#define KFD_IH_NUM_ENTRIES 8192
+#define KFD_IH_NUM_ENTRIES 16384
static void interrupt_wq(struct work_struct *);
@@ -62,11 +62,14 @@ int kfd_interrupt_init(struct kfd_node *node)
return r;
}
- node->ih_wq = alloc_workqueue("KFD IH", WQ_HIGHPRI, 1);
- if (unlikely(!node->ih_wq)) {
- kfifo_free(&node->ih_fifo);
- dev_err(node->adev->dev, "Failed to allocate KFD IH workqueue\n");
- return -ENOMEM;
+ if (!node->kfd->ih_wq) {
+ node->kfd->ih_wq = alloc_workqueue("KFD IH", WQ_HIGHPRI | WQ_UNBOUND,
+ node->kfd->num_nodes);
+ if (unlikely(!node->kfd->ih_wq)) {
+ kfifo_free(&node->ih_fifo);
+ dev_err(node->adev->dev, "Failed to allocate KFD IH workqueue\n");
+ return -ENOMEM;
+ }
}
spin_lock_init(&node->interrupt_lock);
@@ -96,14 +99,6 @@ void kfd_interrupt_exit(struct kfd_node *node)
spin_lock_irqsave(&node->interrupt_lock, flags);
node->interrupts_active = false;
spin_unlock_irqrestore(&node->interrupt_lock, flags);
-
- /*
- * flush_work ensures that there are no outstanding
- * work-queue items that will access interrupt_ring. New work items
- * can't be created because we stopped interrupt handling above.
- */
- flush_workqueue(node->ih_wq);
-
kfifo_free(&node->ih_fifo);
}
@@ -112,55 +107,48 @@ void kfd_interrupt_exit(struct kfd_node *node)
*/
bool enqueue_ih_ring_entry(struct kfd_node *node, const void *ih_ring_entry)
{
- int count;
-
- count = kfifo_in(&node->ih_fifo, ih_ring_entry,
- node->kfd->device_info.ih_ring_entry_size);
- if (count != node->kfd->device_info.ih_ring_entry_size) {
- dev_dbg_ratelimited(node->adev->dev,
- "Interrupt ring overflow, dropping interrupt %d\n",
- count);
+ if (kfifo_is_full(&node->ih_fifo)) {
+ dev_warn_ratelimited(node->adev->dev, "KFD node %d ih_fifo overflow\n",
+ node->node_id);
return false;
}
+ kfifo_in(&node->ih_fifo, ih_ring_entry, node->kfd->device_info.ih_ring_entry_size);
return true;
}
/*
* Assumption: single reader/writer. This function is not re-entrant
*/
-static bool dequeue_ih_ring_entry(struct kfd_node *node, void *ih_ring_entry)
+static bool dequeue_ih_ring_entry(struct kfd_node *node, u32 **ih_ring_entry)
{
int count;
- count = kfifo_out(&node->ih_fifo, ih_ring_entry,
- node->kfd->device_info.ih_ring_entry_size);
-
- WARN_ON(count && count != node->kfd->device_info.ih_ring_entry_size);
+ if (kfifo_is_empty(&node->ih_fifo))
+ return false;
+ count = kfifo_out_linear_ptr(&node->ih_fifo, ih_ring_entry,
+ node->kfd->device_info.ih_ring_entry_size);
+ WARN_ON(count != node->kfd->device_info.ih_ring_entry_size);
return count == node->kfd->device_info.ih_ring_entry_size;
}
static void interrupt_wq(struct work_struct *work)
{
- struct kfd_node *dev = container_of(work, struct kfd_node,
- interrupt_work);
- uint32_t ih_ring_entry[KFD_MAX_RING_ENTRY_SIZE];
+ struct kfd_node *dev = container_of(work, struct kfd_node, interrupt_work);
+ uint32_t *ih_ring_entry;
unsigned long start_jiffies = jiffies;
- if (dev->kfd->device_info.ih_ring_entry_size > sizeof(ih_ring_entry)) {
- dev_err_once(dev->adev->dev, "Ring entry too small\n");
- return;
- }
-
- while (dequeue_ih_ring_entry(dev, ih_ring_entry)) {
+ while (dequeue_ih_ring_entry(dev, &ih_ring_entry)) {
dev->kfd->device_info.event_interrupt_class->interrupt_wq(dev,
ih_ring_entry);
+ kfifo_skip_count(&dev->ih_fifo, dev->kfd->device_info.ih_ring_entry_size);
+
if (time_is_before_jiffies(start_jiffies + HZ)) {
/* If we spent more than a second processing signals,
* reschedule the worker to avoid soft-lockup warnings
*/
- queue_work(dev->ih_wq, &dev->interrupt_work);
+ queue_work(dev->kfd->ih_wq, &dev->interrupt_work);
break;
}
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
index 32c926986dbb..2b0a830f5b29 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
@@ -32,6 +32,7 @@
#include "kfd_device_queue_manager.h"
#include "kfd_pm4_headers.h"
#include "kfd_pm4_opcodes.h"
+#include "amdgpu_reset.h"
#define PM4_COUNT_ZERO (((1 << 15) - 1) << 16)
@@ -68,7 +69,7 @@ static bool kq_initialize(struct kernel_queue *kq, struct kfd_node *dev,
kq->mqd_mgr = dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ];
break;
default:
- pr_err("Invalid queue type %d\n", type);
+ dev_err(dev->adev->dev, "Invalid queue type %d\n", type);
return false;
}
@@ -78,13 +79,14 @@ static bool kq_initialize(struct kernel_queue *kq, struct kfd_node *dev,
prop.doorbell_ptr = kfd_get_kernel_doorbell(dev->kfd, &prop.doorbell_off);
if (!prop.doorbell_ptr) {
- pr_err("Failed to initialize doorbell");
+ dev_err(dev->adev->dev, "Failed to initialize doorbell");
goto err_get_kernel_doorbell;
}
retval = kfd_gtt_sa_allocate(dev, queue_size, &kq->pq);
if (retval != 0) {
- pr_err("Failed to init pq queues size %d\n", queue_size);
+ dev_err(dev->adev->dev, "Failed to init pq queues size %d\n",
+ queue_size);
goto err_pq_allocate_vidmem;
}
@@ -123,7 +125,7 @@ static bool kq_initialize(struct kernel_queue *kq, struct kfd_node *dev,
memset(kq->pq_kernel_addr, 0, queue_size);
memset(kq->rptr_kernel, 0, sizeof(*kq->rptr_kernel));
- memset(kq->wptr_kernel, 0, sizeof(*kq->wptr_kernel));
+ memset(kq->wptr_kernel, 0, dev->kfd->device_info.doorbell_size);
prop.queue_size = queue_size;
prop.is_interop = false;
@@ -196,15 +198,17 @@ err_get_kernel_doorbell:
}
/* Uninitialize a kernel queue and free all its memory usages. */
-static void kq_uninitialize(struct kernel_queue *kq, bool hanging)
+static void kq_uninitialize(struct kernel_queue *kq)
{
- if (kq->queue->properties.type == KFD_QUEUE_TYPE_HIQ && !hanging)
+ if (kq->queue->properties.type == KFD_QUEUE_TYPE_HIQ && down_read_trylock(&kq->dev->adev->reset_domain->sem)) {
kq->mqd_mgr->destroy_mqd(kq->mqd_mgr,
kq->queue->mqd,
KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
KFD_UNMAP_LATENCY_MS,
kq->queue->pipe,
kq->queue->queue);
+ up_read(&kq->dev->adev->reset_domain->sem);
+ }
else if (kq->queue->properties.type == KFD_QUEUE_TYPE_DIQ)
kfd_gtt_sa_free(kq->dev, kq->fence_mem_obj);
@@ -302,12 +306,17 @@ int kq_submit_packet(struct kernel_queue *kq)
if (amdgpu_amdkfd_is_fed(kq->dev->adev))
return -EIO;
+ /* Make sure ring buffer is updated before wptr updated */
+ mb();
+
if (kq->dev->kfd->device_info.doorbell_size == 8) {
*kq->wptr64_kernel = kq->pending_wptr64;
+ mb(); /* Make sure wptr updated before ring doorbell */
write_kernel_doorbell64(kq->queue->properties.doorbell_ptr,
kq->pending_wptr64);
} else {
*kq->wptr_kernel = kq->pending_wptr;
+ mb(); /* Make sure wptr updated before ring doorbell */
write_kernel_doorbell(kq->queue->properties.doorbell_ptr,
kq->pending_wptr);
}
@@ -338,15 +347,15 @@ struct kernel_queue *kernel_queue_init(struct kfd_node *dev,
if (kq_initialize(kq, dev, type, KFD_KERNEL_QUEUE_SIZE))
return kq;
- pr_err("Failed to init kernel queue\n");
+ dev_err(dev->adev->dev, "Failed to init kernel queue\n");
kfree(kq);
return NULL;
}
-void kernel_queue_uninit(struct kernel_queue *kq, bool hanging)
+void kernel_queue_uninit(struct kernel_queue *kq)
{
- kq_uninitialize(kq, hanging);
+ kq_uninitialize(kq);
kfree(kq);
}
@@ -357,26 +366,26 @@ static __attribute__((unused)) void test_kq(struct kfd_node *dev)
uint32_t *buffer, i;
int retval;
- pr_err("Starting kernel queue test\n");
+ dev_err(dev->adev->dev, "Starting kernel queue test\n");
kq = kernel_queue_init(dev, KFD_QUEUE_TYPE_HIQ);
if (unlikely(!kq)) {
- pr_err(" Failed to initialize HIQ\n");
- pr_err("Kernel queue test failed\n");
+ dev_err(dev->adev->dev, " Failed to initialize HIQ\n");
+ dev_err(dev->adev->dev, "Kernel queue test failed\n");
return;
}
retval = kq_acquire_packet_buffer(kq, 5, &buffer);
if (unlikely(retval != 0)) {
- pr_err(" Failed to acquire packet buffer\n");
- pr_err("Kernel queue test failed\n");
+ dev_err(dev->adev->dev, " Failed to acquire packet buffer\n");
+ dev_err(dev->adev->dev, "Kernel queue test failed\n");
return;
}
for (i = 0; i < 5; i++)
buffer[i] = kq->nop_packet;
kq_submit_packet(kq);
- pr_err("Ending kernel queue test\n");
+ dev_err(dev->adev->dev, "Ending kernel queue test\n");
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index bdc01ca9609a..79251f22b702 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -77,7 +77,7 @@ svm_migrate_gart_map(struct amdgpu_ring *ring, uint64_t npages,
dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
- dst_addr, num_bytes, false);
+ dst_addr, num_bytes, 0);
amdgpu_ring_pad_ib(ring, &job->ibs[0]);
WARN_ON(job->ibs[0].length_dw > num_dw);
@@ -153,7 +153,7 @@ svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys,
}
r = amdgpu_copy_buffer(ring, gart_s, gart_d, size * PAGE_SIZE,
- NULL, &next, false, true, false);
+ NULL, &next, false, true, 0);
if (r) {
dev_err(adev->dev, "fail %d to copy memory\n", r);
goto out_unlock;
@@ -278,10 +278,11 @@ svm_migrate_copy_to_vram(struct kfd_node *node, struct svm_range *prange,
struct migrate_vma *migrate, struct dma_fence **mfence,
dma_addr_t *scratch, uint64_t ttm_res_offset)
{
- uint64_t npages = migrate->cpages;
+ uint64_t npages = migrate->npages;
struct amdgpu_device *adev = node->adev;
struct device *dev = adev->dev;
struct amdgpu_res_cursor cursor;
+ uint64_t mpages = 0;
dma_addr_t *src;
uint64_t *dst;
uint64_t i, j;
@@ -295,18 +296,20 @@ svm_migrate_copy_to_vram(struct kfd_node *node, struct svm_range *prange,
amdgpu_res_first(prange->ttm_res, ttm_res_offset,
npages << PAGE_SHIFT, &cursor);
- for (i = j = 0; i < npages; i++) {
+ for (i = j = 0; (i < npages) && (mpages < migrate->cpages); i++) {
struct page *spage;
- dst[i] = cursor.start + (j << PAGE_SHIFT);
- migrate->dst[i] = svm_migrate_addr_to_pfn(adev, dst[i]);
- svm_migrate_get_vram_page(prange, migrate->dst[i]);
- migrate->dst[i] = migrate_pfn(migrate->dst[i]);
-
+ if (migrate->src[i] & MIGRATE_PFN_MIGRATE) {
+ dst[i] = cursor.start + (j << PAGE_SHIFT);
+ migrate->dst[i] = svm_migrate_addr_to_pfn(adev, dst[i]);
+ svm_migrate_get_vram_page(prange, migrate->dst[i]);
+ migrate->dst[i] = migrate_pfn(migrate->dst[i]);
+ mpages++;
+ }
spage = migrate_pfn_to_page(migrate->src[i]);
if (spage && !is_zone_device_page(spage)) {
src[i] = dma_map_page(dev, spage, 0, PAGE_SIZE,
- DMA_TO_DEVICE);
+ DMA_BIDIRECTIONAL);
r = dma_mapping_error(dev, src[i]);
if (r) {
dev_err(dev, "%s: fail %d dma_map_page\n",
@@ -353,9 +356,12 @@ svm_migrate_copy_to_vram(struct kfd_node *node, struct svm_range *prange,
out_free_vram_pages:
if (r) {
pr_debug("failed %d to copy memory to vram\n", r);
- while (i--) {
+ for (i = 0; i < npages && mpages; i++) {
+ if (!dst[i])
+ continue;
svm_migrate_put_vram_page(adev, dst[i]);
migrate->dst[i] = 0;
+ mpages--;
}
}
@@ -445,14 +451,13 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange,
pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n",
mpages, cpages, migrate.npages);
- kfd_smi_event_migration_end(node, p->lead_thread->pid,
- start >> PAGE_SHIFT, end >> PAGE_SHIFT,
- 0, node->id, trigger);
-
svm_range_dma_unmap_dev(adev->dev, scratch, 0, npages);
out_free:
kvfree(buf);
+ kfd_smi_event_migration_end(node, p->lead_thread->pid,
+ start >> PAGE_SHIFT, end >> PAGE_SHIFT,
+ 0, node->id, trigger, r);
out:
if (!r && mpages) {
pdd = svm_range_get_pdd_by_node(prange, node);
@@ -509,10 +514,19 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
start = start_mgr << PAGE_SHIFT;
end = (last_mgr + 1) << PAGE_SHIFT;
+ r = amdgpu_amdkfd_reserve_mem_limit(node->adev,
+ prange->npages * PAGE_SIZE,
+ KFD_IOC_ALLOC_MEM_FLAGS_VRAM,
+ node->xcp ? node->xcp->id : 0);
+ if (r) {
+ dev_dbg(node->adev->dev, "failed to reserve VRAM, r: %ld\n", r);
+ return -ENOSPC;
+ }
+
r = svm_range_vram_node_new(node, prange, true);
if (r) {
dev_dbg(node->adev->dev, "fail %ld to alloc vram\n", r);
- return r;
+ goto out;
}
ttm_res_offset = (start_mgr - prange->start + prange->offset) << PAGE_SHIFT;
@@ -545,6 +559,11 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
svm_range_vram_node_free(prange);
}
+out:
+ amdgpu_amdkfd_unreserve_mem_limit(node->adev,
+ prange->npages * PAGE_SIZE,
+ KFD_IOC_ALLOC_MEM_FLAGS_VRAM,
+ node->xcp ? node->xcp->id : 0);
return r < 0 ? r : 0;
}
@@ -616,7 +635,7 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
goto out_oom;
}
- dst[i] = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_FROM_DEVICE);
+ dst[i] = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
r = dma_mapping_error(dev, dst[i]);
if (r) {
dev_err(adev->dev, "%s: fail %d dma_map_page\n", __func__, r);
@@ -737,14 +756,13 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange,
svm_migrate_copy_done(adev, mfence);
migrate_vma_finalize(&migrate);
- kfd_smi_event_migration_end(node, p->lead_thread->pid,
- start >> PAGE_SHIFT, end >> PAGE_SHIFT,
- node->id, 0, trigger);
-
svm_range_dma_unmap_dev(adev->dev, scratch, 0, npages);
out_free:
kvfree(buf);
+ kfd_smi_event_migration_end(node, p->lead_thread->pid,
+ start >> PAGE_SHIFT, end >> PAGE_SHIFT,
+ node->id, 0, trigger, r);
out:
if (!r && cpages) {
mpages = cpages - upages;
@@ -1009,7 +1027,7 @@ int kgd2kfd_init_zone_device(struct amdgpu_device *adev)
if (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(9, 0, 1))
return -EINVAL;
- if (adev->gmc.is_app_apu)
+ if (adev->apu_prefer_gtt)
return 0;
pgmap = &kfddev->pgmap;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
index 050a6936ff84..d9ae854b6908 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
@@ -118,18 +118,20 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
* attention grabbing.
*/
if (gfx_info->max_shader_engines > KFD_MAX_NUM_SE) {
- pr_err("Exceeded KFD_MAX_NUM_SE, chip reports %d\n",
- gfx_info->max_shader_engines);
+ dev_err(mm->dev->adev->dev,
+ "Exceeded KFD_MAX_NUM_SE, chip reports %d\n",
+ gfx_info->max_shader_engines);
return;
}
if (gfx_info->max_sh_per_se > KFD_MAX_NUM_SH_PER_SE) {
- pr_err("Exceeded KFD_MAX_NUM_SH, chip reports %d\n",
+ dev_err(mm->dev->adev->dev,
+ "Exceeded KFD_MAX_NUM_SH, chip reports %d\n",
gfx_info->max_sh_per_se * gfx_info->max_shader_engines);
return;
}
cu_bitmap_sh_mul = (KFD_GC_VERSION(mm->dev) >= IP_VERSION(11, 0, 0) &&
- KFD_GC_VERSION(mm->dev) < IP_VERSION(12, 0, 0)) ? 2 : 1;
+ KFD_GC_VERSION(mm->dev) < IP_VERSION(13, 0, 0)) ? 2 : 1;
/* Count active CUs per SH.
*
@@ -223,7 +225,7 @@ void kfd_free_mqd_cp(struct mqd_manager *mm, void *mqd,
struct kfd_mem_obj *mqd_mem_obj)
{
if (mqd_mem_obj->gtt_mem) {
- amdgpu_amdkfd_free_gtt_mem(mm->dev->adev, mqd_mem_obj->gtt_mem);
+ amdgpu_amdkfd_free_gtt_mem(mm->dev->adev, &mqd_mem_obj->gtt_mem);
kfree(mqd_mem_obj);
} else {
kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
@@ -290,3 +292,21 @@ uint64_t kfd_mqd_stride(struct mqd_manager *mm,
{
return mm->mqd_size;
}
+
+bool kfd_check_hiq_mqd_doorbell_id(struct kfd_node *node, uint32_t doorbell_id,
+ uint32_t inst)
+{
+ if (doorbell_id) {
+ struct device *dev = node->adev->dev;
+
+ if (node->adev->xcp_mgr && node->adev->xcp_mgr->num_xcps > 0)
+ dev_err(dev, "XCC %d: Queue preemption failed for queue with doorbell_id: %x\n",
+ inst, doorbell_id);
+ else
+ dev_err(dev, "Queue preemption failed for queue with doorbell_id: %x\n",
+ doorbell_id);
+ return true;
+ }
+
+ return false;
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
index e5cc697a3ca8..17cc1f25c8d0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
@@ -119,7 +119,7 @@ struct mqd_manager {
#if defined(CONFIG_DEBUG_FS)
int (*debugfs_show_mqd)(struct seq_file *m, void *data);
#endif
- uint32_t (*read_doorbell_id)(void *mqd);
+ bool (*check_preemption_failed)(struct mqd_manager *mm, void *mqd);
uint64_t (*mqd_stride)(struct mqd_manager *mm,
struct queue_properties *p);
@@ -198,4 +198,6 @@ void kfd_get_hiq_xcc_mqd(struct kfd_node *dev,
uint64_t kfd_hiq_mqd_stride(struct kfd_node *dev);
uint64_t kfd_mqd_stride(struct mqd_manager *mm,
struct queue_properties *q);
+bool kfd_check_hiq_mqd_doorbell_id(struct kfd_node *node, uint32_t doorbell_id,
+ uint32_t inst);
#endif /* KFD_MQD_MANAGER_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
index 1a4a69943c71..05f3ac2eaef9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
@@ -206,11 +206,11 @@ static void __update_mqd(struct mqd_manager *mm, void *mqd,
q->is_active = QUEUE_IS_ACTIVE(*q);
}
-static uint32_t read_doorbell_id(void *mqd)
+static bool check_preemption_failed(struct mqd_manager *mm, void *mqd)
{
struct cik_mqd *m = (struct cik_mqd *)mqd;
- return m->queue_doorbell_id0;
+ return kfd_check_hiq_mqd_doorbell_id(mm->dev, m->queue_doorbell_id0, 0);
}
static void update_mqd(struct mqd_manager *mm, void *mqd,
@@ -423,7 +423,7 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
- mqd->read_doorbell_id = read_doorbell_id;
+ mqd->check_preemption_failed = check_preemption_failed;
break;
case KFD_MQD_TYPE_DIQ:
mqd->allocate_mqd = allocate_mqd;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
index 22cbfa1bdadd..1695dd78ede8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
@@ -107,6 +107,8 @@ static void init_mqd(struct mqd_manager *mm, void **mqd,
m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK |
0x53 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT;
+ m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT;
+ m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK;
m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT;
m->cp_mqd_base_addr_lo = lower_32_bits(addr);
@@ -167,10 +169,10 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
m = get_mqd(mqd);
- m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT;
+ m->cp_hqd_pq_control &= ~CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK;
m->cp_hqd_pq_control |=
ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1;
- m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK;
+
pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control);
m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
@@ -224,11 +226,11 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
q->is_active = QUEUE_IS_ACTIVE(*q);
}
-static uint32_t read_doorbell_id(void *mqd)
+static bool check_preemption_failed(struct mqd_manager *mm, void *mqd)
{
struct v10_compute_mqd *m = (struct v10_compute_mqd *)mqd;
- return m->queue_doorbell_id0;
+ return kfd_check_hiq_mqd_doorbell_id(mm->dev, m->queue_doorbell_id0, 0);
}
static int get_wave_state(struct mqd_manager *mm, void *mqd,
@@ -488,7 +490,7 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type,
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
- mqd->read_doorbell_id = read_doorbell_id;
+ mqd->check_preemption_failed = check_preemption_failed;
pr_debug("%s@%i\n", __func__, __LINE__);
break;
case KFD_MQD_TYPE_DIQ:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
index 826bc4f6c8a7..3c0ae28c5923 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
@@ -154,6 +154,8 @@ static void init_mqd(struct mqd_manager *mm, void **mqd,
m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK |
0x55 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT;
+ m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT;
+ m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK;
m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT;
m->cp_mqd_base_addr_lo = lower_32_bits(addr);
@@ -221,10 +223,9 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
m = get_mqd(mqd);
- m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT;
+ m->cp_hqd_pq_control &= ~CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK;
m->cp_hqd_pq_control |=
ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1;
- m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK;
pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control);
m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
@@ -278,11 +279,11 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
q->is_active = QUEUE_IS_ACTIVE(*q);
}
-static uint32_t read_doorbell_id(void *mqd)
+static bool check_preemption_failed(struct mqd_manager *mm, void *mqd)
{
struct v11_compute_mqd *m = (struct v11_compute_mqd *)mqd;
- return m->queue_doorbell_id0;
+ return kfd_check_hiq_mqd_doorbell_id(mm->dev, m->queue_doorbell_id0, 0);
}
static int get_wave_state(struct mqd_manager *mm, void *mqd,
@@ -517,7 +518,7 @@ struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE type,
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
- mqd->read_doorbell_id = read_doorbell_id;
+ mqd->check_preemption_failed = check_preemption_failed;
pr_debug("%s@%i\n", __func__, __LINE__);
break;
case KFD_MQD_TYPE_DIQ:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c
new file mode 100644
index 000000000000..565858b9044d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c
@@ -0,0 +1,459 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include "kfd_priv.h"
+#include "kfd_mqd_manager.h"
+#include "v12_structs.h"
+#include "gc/gc_12_0_0_sh_mask.h"
+#include "amdgpu_amdkfd.h"
+
+static inline struct v12_compute_mqd *get_mqd(void *mqd)
+{
+ return (struct v12_compute_mqd *)mqd;
+}
+
+static inline struct v12_sdma_mqd *get_sdma_mqd(void *mqd)
+{
+ return (struct v12_sdma_mqd *)mqd;
+}
+
+static void update_cu_mask(struct mqd_manager *mm, void *mqd,
+ struct mqd_update_info *minfo)
+{
+ struct v12_compute_mqd *m;
+ uint32_t se_mask[KFD_MAX_NUM_SE] = {0};
+
+ if (!minfo || !minfo->cu_mask.ptr)
+ return;
+
+ mqd_symmetrically_map_cu_mask(mm,
+ minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask, 0);
+
+ m = get_mqd(mqd);
+ m->compute_static_thread_mgmt_se0 = se_mask[0];
+ m->compute_static_thread_mgmt_se1 = se_mask[1];
+ m->compute_static_thread_mgmt_se2 = se_mask[2];
+ m->compute_static_thread_mgmt_se3 = se_mask[3];
+ m->compute_static_thread_mgmt_se4 = se_mask[4];
+ m->compute_static_thread_mgmt_se5 = se_mask[5];
+ m->compute_static_thread_mgmt_se6 = se_mask[6];
+ m->compute_static_thread_mgmt_se7 = se_mask[7];
+
+ pr_debug("update cu mask to %#x %#x %#x %#x %#x %#x %#x %#x\n",
+ m->compute_static_thread_mgmt_se0,
+ m->compute_static_thread_mgmt_se1,
+ m->compute_static_thread_mgmt_se2,
+ m->compute_static_thread_mgmt_se3,
+ m->compute_static_thread_mgmt_se4,
+ m->compute_static_thread_mgmt_se5,
+ m->compute_static_thread_mgmt_se6,
+ m->compute_static_thread_mgmt_se7);
+}
+
+static void set_priority(struct v12_compute_mqd *m, struct queue_properties *q)
+{
+ m->cp_hqd_pipe_priority = pipe_priority_map[q->priority];
+ m->cp_hqd_queue_priority = q->priority;
+}
+
+static struct kfd_mem_obj *allocate_mqd(struct kfd_node *node,
+ struct queue_properties *q)
+{
+ struct kfd_mem_obj *mqd_mem_obj;
+
+ /*
+ * Allocate one PAGE_SIZE memory for MQD as MES writes to areas beyond
+ * struct MQD size.
+ */
+ if (kfd_gtt_sa_allocate(node, PAGE_SIZE, &mqd_mem_obj))
+ return NULL;
+
+ return mqd_mem_obj;
+}
+
+static void init_mqd(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *q)
+{
+ uint64_t addr;
+ struct v12_compute_mqd *m;
+
+ m = (struct v12_compute_mqd *) mqd_mem_obj->cpu_ptr;
+ addr = mqd_mem_obj->gpu_addr;
+
+ memset(m, 0, PAGE_SIZE);
+
+ m->header = 0xC0310800;
+ m->compute_pipelinestat_enable = 1;
+ m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF;
+ m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF;
+ m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF;
+ m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF;
+ m->compute_static_thread_mgmt_se4 = 0xFFFFFFFF;
+ m->compute_static_thread_mgmt_se5 = 0xFFFFFFFF;
+ m->compute_static_thread_mgmt_se6 = 0xFFFFFFFF;
+ m->compute_static_thread_mgmt_se7 = 0xFFFFFFFF;
+
+ m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK |
+ 0x55 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT;
+
+ m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT;
+ m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK;
+ m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT;
+
+ m->cp_mqd_base_addr_lo = lower_32_bits(addr);
+ m->cp_mqd_base_addr_hi = upper_32_bits(addr);
+
+ m->cp_hqd_quantum = 1 << CP_HQD_QUANTUM__QUANTUM_EN__SHIFT |
+ 1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT |
+ 1 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
+
+ /* Set cp_hqd_hq_status0.c_queue_debug_en to 1 to have the CP set up the
+ * DISPATCH_PTR. This is required for the kfd debugger
+ */
+ m->cp_hqd_hq_status0 = 1 << 14;
+
+ if (amdgpu_amdkfd_have_atomics_support(mm->dev->adev))
+ m->cp_hqd_hq_status0 |= 1 << 29;
+
+ if (q->format == KFD_QUEUE_FORMAT_AQL) {
+ m->cp_hqd_aql_control =
+ 1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT;
+ }
+
+ if (mm->dev->kfd->cwsr_enabled) {
+ m->cp_hqd_persistent_state |=
+ (1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT);
+ m->cp_hqd_ctx_save_base_addr_lo =
+ lower_32_bits(q->ctx_save_restore_area_address);
+ m->cp_hqd_ctx_save_base_addr_hi =
+ upper_32_bits(q->ctx_save_restore_area_address);
+ m->cp_hqd_ctx_save_size = q->ctx_save_restore_area_size;
+ m->cp_hqd_cntl_stack_size = q->ctl_stack_size;
+ m->cp_hqd_cntl_stack_offset = q->ctl_stack_size;
+ m->cp_hqd_wg_state_offset = q->ctl_stack_size;
+ }
+
+ *mqd = m;
+ if (gart_addr)
+ *gart_addr = addr;
+ mm->update_mqd(mm, m, q, NULL);
+}
+
+static int load_mqd(struct mqd_manager *mm, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ struct queue_properties *p, struct mm_struct *mms)
+{
+ int r = 0;
+ /* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */
+ uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
+
+ r = mm->dev->kfd2kgd->hqd_load(mm->dev->adev, mqd, pipe_id, queue_id,
+ (uint32_t __user *)p->write_ptr,
+ wptr_shift, 0, mms, 0);
+ return r;
+}
+
+static void update_mqd(struct mqd_manager *mm, void *mqd,
+ struct queue_properties *q,
+ struct mqd_update_info *minfo)
+{
+ struct v12_compute_mqd *m;
+
+ m = get_mqd(mqd);
+
+ m->cp_hqd_pq_control &= ~CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK;
+ m->cp_hqd_pq_control |=
+ ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1;
+ pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control);
+
+ m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
+ m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
+
+ m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
+ m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
+ m->cp_hqd_pq_wptr_poll_addr_lo = lower_32_bits((uint64_t)q->write_ptr);
+ m->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits((uint64_t)q->write_ptr);
+
+ m->cp_hqd_pq_doorbell_control =
+ q->doorbell_off <<
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
+ pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
+ m->cp_hqd_pq_doorbell_control);
+
+ m->cp_hqd_ib_control = 3 << CP_HQD_IB_CONTROL__MIN_IB_AVAIL_SIZE__SHIFT;
+
+ /*
+ * HW does not clamp this field correctly. Maximum EOP queue size
+ * is constrained by per-SE EOP done signal count, which is 8-bit.
+ * Limit is 0xFF EOP entries (= 0x7F8 dwords). CP will not submit
+ * more than (EOP entry count - 1) so a queue size of 0x800 dwords
+ * is safe, giving a maximum field value of 0xA.
+ */
+ m->cp_hqd_eop_control = min(0xA,
+ ffs(q->eop_ring_buffer_size / sizeof(unsigned int)) - 1 - 1);
+ m->cp_hqd_eop_base_addr_lo =
+ lower_32_bits(q->eop_ring_buffer_address >> 8);
+ m->cp_hqd_eop_base_addr_hi =
+ upper_32_bits(q->eop_ring_buffer_address >> 8);
+
+ m->cp_hqd_iq_timer = 0;
+
+ m->cp_hqd_vmid = q->vmid;
+
+ if (q->format == KFD_QUEUE_FORMAT_AQL) {
+ /* GC 10 removed WPP_CLAMP from PQ Control */
+ m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK |
+ 2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT |
+ 1 << CP_HQD_PQ_CONTROL__QUEUE_FULL_EN__SHIFT;
+ m->cp_hqd_pq_doorbell_control |=
+ 1 << CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_BIF_DROP__SHIFT;
+ }
+ if (mm->dev->kfd->cwsr_enabled)
+ m->cp_hqd_ctx_save_control = 0;
+
+ update_cu_mask(mm, mqd, minfo);
+ set_priority(m, q);
+
+ q->is_active = QUEUE_IS_ACTIVE(*q);
+}
+
+static bool check_preemption_failed(struct mqd_manager *mm, void *mqd)
+{
+ struct v12_compute_mqd *m = (struct v12_compute_mqd *)mqd;
+
+ return kfd_check_hiq_mqd_doorbell_id(mm->dev, m->queue_doorbell_id0, 0);
+}
+
+static int get_wave_state(struct mqd_manager *mm, void *mqd,
+ struct queue_properties *q,
+ void __user *ctl_stack,
+ u32 *ctl_stack_used_size,
+ u32 *save_area_used_size)
+{
+ struct v12_compute_mqd *m;
+ struct mqd_user_context_save_area_header header;
+
+ m = get_mqd(mqd);
+
+ /* Control stack is written backwards, while workgroup context data
+ * is written forwards. Both starts from m->cp_hqd_cntl_stack_size.
+ * Current position is at m->cp_hqd_cntl_stack_offset and
+ * m->cp_hqd_wg_state_offset, respectively.
+ */
+ *ctl_stack_used_size = m->cp_hqd_cntl_stack_size -
+ m->cp_hqd_cntl_stack_offset;
+ *save_area_used_size = m->cp_hqd_wg_state_offset -
+ m->cp_hqd_cntl_stack_size;
+
+ /* Control stack is not copied to user mode for GFXv12 because
+ * it's part of the context save area that is already
+ * accessible to user mode
+ */
+ header.control_stack_size = *ctl_stack_used_size;
+ header.wave_state_size = *save_area_used_size;
+
+ header.wave_state_offset = m->cp_hqd_wg_state_offset;
+ header.control_stack_offset = m->cp_hqd_cntl_stack_offset;
+
+ if (copy_to_user(ctl_stack, &header, sizeof(header)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *q)
+{
+ struct v12_compute_mqd *m;
+
+ init_mqd(mm, mqd, mqd_mem_obj, gart_addr, q);
+
+ m = get_mqd(*mqd);
+
+ m->cp_hqd_pq_control |= 1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT |
+ 1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
+}
+
+static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *q)
+{
+ struct v12_sdma_mqd *m;
+
+ m = (struct v12_sdma_mqd *) mqd_mem_obj->cpu_ptr;
+
+ memset(m, 0, sizeof(struct v12_sdma_mqd));
+
+ *mqd = m;
+ if (gart_addr)
+ *gart_addr = mqd_mem_obj->gpu_addr;
+
+ mm->update_mqd(mm, m, q, NULL);
+}
+
+#define SDMA_RLC_DUMMY_DEFAULT 0xf
+
+static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
+ struct queue_properties *q,
+ struct mqd_update_info *minfo)
+{
+ struct v12_sdma_mqd *m;
+
+ m = get_sdma_mqd(mqd);
+ m->sdmax_rlcx_rb_cntl = (ffs(q->queue_size / sizeof(unsigned int)) - 1)
+ << SDMA0_QUEUE0_RB_CNTL__RB_SIZE__SHIFT |
+ q->vmid << SDMA0_QUEUE0_RB_CNTL__RB_VMID__SHIFT |
+ 1 << SDMA0_QUEUE0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
+ 6 << SDMA0_QUEUE0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT |
+ 1 << SDMA0_QUEUE0_RB_CNTL__MCU_WPTR_POLL_ENABLE__SHIFT;
+
+ m->sdmax_rlcx_rb_base = lower_32_bits(q->queue_address >> 8);
+ m->sdmax_rlcx_rb_base_hi = upper_32_bits(q->queue_address >> 8);
+ m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
+ m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
+ m->sdmax_rlcx_rb_wptr_poll_addr_lo = lower_32_bits((uint64_t)q->write_ptr);
+ m->sdmax_rlcx_rb_wptr_poll_addr_hi = upper_32_bits((uint64_t)q->write_ptr);
+ m->sdmax_rlcx_doorbell_offset =
+ q->doorbell_off << SDMA0_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT;
+
+ m->sdmax_rlcx_sched_cntl = (amdgpu_sdma_phase_quantum
+ << SDMA0_QUEUE0_SCHEDULE_CNTL__CONTEXT_QUANTUM__SHIFT)
+ & SDMA0_QUEUE0_SCHEDULE_CNTL__CONTEXT_QUANTUM_MASK;
+
+ m->sdma_engine_id = q->sdma_engine_id;
+ m->sdma_queue_id = q->sdma_queue_id;
+
+ m->sdmax_rlcx_dummy_reg = SDMA_RLC_DUMMY_DEFAULT;
+
+ q->is_active = QUEUE_IS_ACTIVE(*q);
+}
+
+#if defined(CONFIG_DEBUG_FS)
+
+static int debugfs_show_mqd(struct seq_file *m, void *data)
+{
+ seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4,
+ data, sizeof(struct v12_compute_mqd), false);
+ return 0;
+}
+
+static int debugfs_show_mqd_sdma(struct seq_file *m, void *data)
+{
+ seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4,
+ data, sizeof(struct v12_sdma_mqd), false);
+ return 0;
+}
+
+#endif
+
+struct mqd_manager *mqd_manager_init_v12(enum KFD_MQD_TYPE type,
+ struct kfd_node *dev)
+{
+ struct mqd_manager *mqd;
+
+ if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
+ return NULL;
+
+ mqd = kzalloc(sizeof(*mqd), GFP_KERNEL);
+ if (!mqd)
+ return NULL;
+
+ mqd->dev = dev;
+
+ switch (type) {
+ case KFD_MQD_TYPE_CP:
+ pr_debug("%s@%i\n", __func__, __LINE__);
+ mqd->allocate_mqd = allocate_mqd;
+ mqd->init_mqd = init_mqd;
+ mqd->free_mqd = kfd_free_mqd_cp;
+ mqd->load_mqd = load_mqd;
+ mqd->update_mqd = update_mqd;
+ mqd->destroy_mqd = kfd_destroy_mqd_cp;
+ mqd->is_occupied = kfd_is_occupied_cp;
+ mqd->mqd_size = sizeof(struct v12_compute_mqd);
+ mqd->get_wave_state = get_wave_state;
+ mqd->mqd_stride = kfd_mqd_stride;
+#if defined(CONFIG_DEBUG_FS)
+ mqd->debugfs_show_mqd = debugfs_show_mqd;
+#endif
+ pr_debug("%s@%i\n", __func__, __LINE__);
+ break;
+ case KFD_MQD_TYPE_HIQ:
+ pr_debug("%s@%i\n", __func__, __LINE__);
+ mqd->allocate_mqd = allocate_hiq_mqd;
+ mqd->init_mqd = init_mqd_hiq;
+ mqd->free_mqd = free_mqd_hiq_sdma;
+ mqd->load_mqd = kfd_hiq_load_mqd_kiq;
+ mqd->update_mqd = update_mqd;
+ mqd->destroy_mqd = kfd_destroy_mqd_cp;
+ mqd->is_occupied = kfd_is_occupied_cp;
+ mqd->mqd_size = sizeof(struct v12_compute_mqd);
+ mqd->mqd_stride = kfd_mqd_stride;
+#if defined(CONFIG_DEBUG_FS)
+ mqd->debugfs_show_mqd = debugfs_show_mqd;
+#endif
+ mqd->check_preemption_failed = check_preemption_failed;
+ pr_debug("%s@%i\n", __func__, __LINE__);
+ break;
+ case KFD_MQD_TYPE_DIQ:
+ mqd->allocate_mqd = allocate_mqd;
+ mqd->init_mqd = init_mqd_hiq;
+ mqd->free_mqd = kfd_free_mqd_cp;
+ mqd->load_mqd = load_mqd;
+ mqd->update_mqd = update_mqd;
+ mqd->destroy_mqd = kfd_destroy_mqd_cp;
+ mqd->is_occupied = kfd_is_occupied_cp;
+ mqd->mqd_size = sizeof(struct v12_compute_mqd);
+#if defined(CONFIG_DEBUG_FS)
+ mqd->debugfs_show_mqd = debugfs_show_mqd;
+#endif
+ break;
+ case KFD_MQD_TYPE_SDMA:
+ pr_debug("%s@%i\n", __func__, __LINE__);
+ mqd->allocate_mqd = allocate_mqd;
+ mqd->init_mqd = init_mqd_sdma;
+ mqd->free_mqd = kfd_free_mqd_cp;
+ mqd->load_mqd = kfd_load_mqd_sdma;
+ mqd->update_mqd = update_mqd_sdma;
+ mqd->destroy_mqd = kfd_destroy_mqd_sdma;
+ mqd->is_occupied = kfd_is_occupied_sdma;
+ mqd->mqd_size = sizeof(struct v12_sdma_mqd);
+ mqd->mqd_stride = kfd_mqd_stride;
+#if defined(CONFIG_DEBUG_FS)
+ mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
+#endif
+ pr_debug("%s@%i\n", __func__, __LINE__);
+ break;
+ default:
+ kfree(mqd);
+ return NULL;
+ }
+
+ return mqd;
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
index 697b6d530d12..97933d2a3803 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -77,7 +77,9 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
m->compute_static_thread_mgmt_se1 = se_mask[1];
m->compute_static_thread_mgmt_se2 = se_mask[2];
m->compute_static_thread_mgmt_se3 = se_mask[3];
- if (KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 3)) {
+ if (KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 3) &&
+ KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 4) &&
+ KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 5, 0)) {
m->compute_static_thread_mgmt_se4 = se_mask[4];
m->compute_static_thread_mgmt_se5 = se_mask[5];
m->compute_static_thread_mgmt_se6 = se_mask[6];
@@ -181,6 +183,9 @@ static void init_mqd(struct mqd_manager *mm, void **mqd,
m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK |
0x53 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT;
+ m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT;
+ m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK;
+
m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT;
m->cp_mqd_base_addr_lo = lower_32_bits(addr);
@@ -243,7 +248,7 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
m = get_mqd(mqd);
- m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT;
+ m->cp_hqd_pq_control &= ~CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK;
m->cp_hqd_pq_control |= order_base_2(q->queue_size / 4) - 1;
pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control);
@@ -299,7 +304,9 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
if (mm->dev->kfd->cwsr_enabled && q->ctx_save_restore_area_address)
m->cp_hqd_ctx_save_control = 0;
- if (KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 3))
+ if (KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 3) &&
+ KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 4) &&
+ KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 5, 0))
update_cu_mask(mm, mqd, minfo, 0);
set_priority(m, q);
@@ -316,11 +323,14 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
}
-static uint32_t read_doorbell_id(void *mqd)
+static bool check_preemption_failed(struct mqd_manager *mm, void *mqd)
{
struct v9_mqd *m = (struct v9_mqd *)mqd;
+ uint32_t doorbell_id = m->queue_doorbell_id0;
+
+ m->queue_doorbell_id0 = 0;
- return m->queue_doorbell_id0;
+ return kfd_check_hiq_mqd_doorbell_id(mm->dev, doorbell_id, 0);
}
static int get_wave_state(struct mqd_manager *mm, void *mqd,
@@ -485,6 +495,10 @@ static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
m->sdma_engine_id = q->sdma_engine_id;
m->sdma_queue_id = q->sdma_queue_id;
m->sdmax_rlcx_dummy_reg = SDMA_RLC_DUMMY_DEFAULT;
+ /* Allow context switch so we don't cross-process starve with a massive
+ * command buffer of long-running SDMA commands
+ */
+ m->sdmax_rlcx_ib_cntl |= SDMA0_GFX_IB_CNTL__SWITCH_INSIDE_IB_MASK;
q->is_active = QUEUE_IS_ACTIVE(*q);
}
@@ -544,6 +558,9 @@ static void init_mqd_hiq_v9_4_3(struct mqd_manager *mm, void **mqd,
m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK |
1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT |
1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
+ if (amdgpu_sriov_multi_vf_mode(mm->dev->adev))
+ m->cp_hqd_pq_doorbell_control |= 1 <<
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_MODE__SHIFT;
m->cp_mqd_stride_size = kfd_hiq_mqd_stride(mm->dev);
if (xcc == 0) {
/* Set no_update_rptr = 0 in Master XCC */
@@ -607,6 +624,25 @@ static int destroy_hiq_mqd_v9_4_3(struct mqd_manager *mm, void *mqd,
return err;
}
+static bool check_preemption_failed_v9_4_3(struct mqd_manager *mm, void *mqd)
+{
+ uint64_t hiq_mqd_size = kfd_hiq_mqd_stride(mm->dev);
+ uint32_t xcc_mask = mm->dev->xcc_mask;
+ int inst = 0, xcc_id;
+ struct v9_mqd *m;
+ bool ret = false;
+
+ for_each_inst(xcc_id, xcc_mask) {
+ m = get_mqd(mqd + hiq_mqd_size * inst);
+ ret |= kfd_check_hiq_mqd_doorbell_id(mm->dev,
+ m->queue_doorbell_id0, inst);
+ m->queue_doorbell_id0 = 0;
+ ++inst;
+ }
+
+ return ret;
+}
+
static void get_xcc_mqd(struct kfd_mem_obj *mqd_mem_obj,
struct kfd_mem_obj *xcc_mqd_mem_obj,
uint64_t offset)
@@ -635,7 +671,9 @@ static void init_mqd_v9_4_3(struct mqd_manager *mm, void **mqd,
get_xcc_mqd(mqd_mem_obj, &xcc_mqd_mem_obj, offset*xcc);
init_mqd(mm, (void **)&m, &xcc_mqd_mem_obj, &xcc_gart_addr, q);
-
+ if (amdgpu_sriov_multi_vf_mode(mm->dev->adev))
+ m->cp_hqd_pq_doorbell_control |= 1 <<
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_MODE__SHIFT;
m->cp_mqd_stride_size = offset;
/*
@@ -695,7 +733,10 @@ static void update_mqd_v9_4_3(struct mqd_manager *mm, void *mqd,
m = get_mqd(mqd + size * xcc);
update_mqd(mm, m, q, minfo);
- update_cu_mask(mm, mqd, minfo, xcc);
+ if (amdgpu_sriov_multi_vf_mode(mm->dev->adev))
+ m->cp_hqd_pq_doorbell_control |= 1 <<
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_MODE__SHIFT;
+ update_cu_mask(mm, m, minfo, xcc);
if (q->format == KFD_QUEUE_FORMAT_AQL) {
switch (xcc) {
@@ -717,6 +758,21 @@ static void update_mqd_v9_4_3(struct mqd_manager *mm, void *mqd,
}
}
+static void restore_mqd_v9_4_3(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *qp,
+ const void *mqd_src,
+ const void *ctl_stack_src, u32 ctl_stack_size)
+{
+ restore_mqd(mm, mqd, mqd_mem_obj, gart_addr, qp, mqd_src, ctl_stack_src, ctl_stack_size);
+ if (amdgpu_sriov_multi_vf_mode(mm->dev->adev)) {
+ struct v9_mqd *m;
+
+ m = (struct v9_mqd *) mqd_mem_obj->cpu_ptr;
+ m->cp_hqd_pq_doorbell_control |= 1 <<
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_MODE__SHIFT;
+ }
+}
static int destroy_mqd_v9_4_3(struct mqd_manager *mm, void *mqd,
enum kfd_preempt_type type, unsigned int timeout,
uint32_t pipe_id, uint32_t queue_id)
@@ -851,22 +907,25 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
mqd->is_occupied = kfd_is_occupied_cp;
mqd->get_checkpoint_info = get_checkpoint_info;
mqd->checkpoint_mqd = checkpoint_mqd;
- mqd->restore_mqd = restore_mqd;
mqd->mqd_size = sizeof(struct v9_mqd);
mqd->mqd_stride = mqd_stride_v9;
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
- if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3)) {
+ if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) ||
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4) ||
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0)) {
mqd->init_mqd = init_mqd_v9_4_3;
mqd->load_mqd = load_mqd_v9_4_3;
mqd->update_mqd = update_mqd_v9_4_3;
+ mqd->restore_mqd = restore_mqd_v9_4_3;
mqd->destroy_mqd = destroy_mqd_v9_4_3;
mqd->get_wave_state = get_wave_state_v9_4_3;
} else {
mqd->init_mqd = init_mqd;
mqd->load_mqd = load_mqd;
mqd->update_mqd = update_mqd;
+ mqd->restore_mqd = restore_mqd;
mqd->destroy_mqd = kfd_destroy_mqd_cp;
mqd->get_wave_state = get_wave_state;
}
@@ -881,15 +940,19 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
- mqd->read_doorbell_id = read_doorbell_id;
- if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3)) {
+ mqd->check_preemption_failed = check_preemption_failed;
+ if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) ||
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4) ||
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0)) {
mqd->init_mqd = init_mqd_hiq_v9_4_3;
mqd->load_mqd = hiq_load_mqd_kiq_v9_4_3;
mqd->destroy_mqd = destroy_hiq_mqd_v9_4_3;
+ mqd->check_preemption_failed = check_preemption_failed_v9_4_3;
} else {
mqd->init_mqd = init_mqd_hiq;
mqd->load_mqd = kfd_hiq_load_mqd_kiq;
mqd->destroy_mqd = destroy_hiq_mqd;
+ mqd->check_preemption_failed = check_preemption_failed;
}
break;
case KFD_MQD_TYPE_DIQ:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
index 3e1a574d4ea6..c1fafc502515 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
@@ -237,11 +237,11 @@ static void __update_mqd(struct mqd_manager *mm, void *mqd,
q->is_active = QUEUE_IS_ACTIVE(*q);
}
-static uint32_t read_doorbell_id(void *mqd)
+static bool check_preemption_failed(struct mqd_manager *mm, void *mqd)
{
struct vi_mqd *m = (struct vi_mqd *)mqd;
- return m->queue_doorbell_id0;
+ return kfd_check_hiq_mqd_doorbell_id(mm->dev, m->queue_doorbell_id0, 0);
}
static void update_mqd(struct mqd_manager *mm, void *mqd,
@@ -482,7 +482,7 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
- mqd->read_doorbell_id = read_doorbell_id;
+ mqd->check_preemption_failed = check_preemption_failed;
break;
case KFD_MQD_TYPE_DIQ:
mqd->allocate_mqd = allocate_mqd;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index d6f65f39072b..b1a6eb349bb3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -28,6 +28,11 @@
#include "kfd_kernel_queue.h"
#include "kfd_priv.h"
+#define OVER_SUBSCRIPTION_PROCESS_COUNT (1 << 0)
+#define OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT (1 << 1)
+#define OVER_SUBSCRIPTION_GWS_QUEUE_COUNT (1 << 2)
+#define OVER_SUBSCRIPTION_XNACK_CONFLICT (1 << 3)
+
static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes,
unsigned int buffer_size_bytes)
{
@@ -40,12 +45,14 @@ static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes,
static void pm_calc_rlib_size(struct packet_manager *pm,
unsigned int *rlib_size,
- bool *over_subscription)
+ int *over_subscription,
+ int xnack_conflict)
{
unsigned int process_count, queue_count, compute_queue_count, gws_queue_count;
unsigned int map_queue_size;
unsigned int max_proc_per_quantum = 1;
- struct kfd_node *dev = pm->dqm->dev;
+ struct kfd_node *node = pm->dqm->dev;
+ struct device *dev = node->adev->dev;
process_count = pm->dqm->processes_count;
queue_count = pm->dqm->active_queue_count;
@@ -57,17 +64,22 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
* hws_max_conc_proc has been done in
* kgd2kfd_device_init().
*/
- *over_subscription = false;
+ *over_subscription = 0;
- if (dev->max_proc_per_quantum > 1)
- max_proc_per_quantum = dev->max_proc_per_quantum;
+ if (node->max_proc_per_quantum > 1)
+ max_proc_per_quantum = node->max_proc_per_quantum;
- if ((process_count > max_proc_per_quantum) ||
- compute_queue_count > get_cp_queues_num(pm->dqm) ||
- gws_queue_count > 1) {
- *over_subscription = true;
- pr_debug("Over subscribed runlist\n");
- }
+ if (process_count > max_proc_per_quantum)
+ *over_subscription |= OVER_SUBSCRIPTION_PROCESS_COUNT;
+ if (compute_queue_count > get_cp_queues_num(pm->dqm))
+ *over_subscription |= OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT;
+ if (gws_queue_count > 1)
+ *over_subscription |= OVER_SUBSCRIPTION_GWS_QUEUE_COUNT;
+ if (xnack_conflict && (node->adev->gmc.xnack_flags & AMDGPU_GMC_XNACK_FLAG_CHAIN))
+ *over_subscription |= OVER_SUBSCRIPTION_XNACK_CONFLICT;
+
+ if (*over_subscription)
+ dev_dbg(dev, "Over subscribed runlist\n");
map_queue_size = pm->pmf->map_queues_size;
/* calculate run list ib allocation size */
@@ -81,29 +93,32 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
if (*over_subscription)
*rlib_size += pm->pmf->runlist_size;
- pr_debug("runlist ib size %d\n", *rlib_size);
+ dev_dbg(dev, "runlist ib size %d\n", *rlib_size);
}
static int pm_allocate_runlist_ib(struct packet_manager *pm,
unsigned int **rl_buffer,
uint64_t *rl_gpu_buffer,
unsigned int *rl_buffer_size,
- bool *is_over_subscription)
+ int *is_over_subscription,
+ int xnack_conflict)
{
+ struct kfd_node *node = pm->dqm->dev;
+ struct device *dev = node->adev->dev;
int retval;
if (WARN_ON(pm->allocated))
return -EINVAL;
- pm_calc_rlib_size(pm, rl_buffer_size, is_over_subscription);
+ pm_calc_rlib_size(pm, rl_buffer_size, is_over_subscription,
+ xnack_conflict);
mutex_lock(&pm->lock);
- retval = kfd_gtt_sa_allocate(pm->dqm->dev, *rl_buffer_size,
- &pm->ib_buffer_obj);
+ retval = kfd_gtt_sa_allocate(node, *rl_buffer_size, &pm->ib_buffer_obj);
if (retval) {
- pr_err("Failed to allocate runlist IB\n");
+ dev_err(dev, "Failed to allocate runlist IB\n");
goto out;
}
@@ -125,32 +140,54 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
{
unsigned int alloc_size_bytes;
unsigned int *rl_buffer, rl_wptr, i;
+ struct kfd_node *node = pm->dqm->dev;
+ struct device *dev = node->adev->dev;
int retval, processes_mapped;
struct device_process_node *cur;
struct qcm_process_device *qpd;
struct queue *q;
struct kernel_queue *kq;
- bool is_over_subscription;
+ int is_over_subscription;
+ int xnack_enabled = -1;
+ bool xnack_conflict = 0;
rl_wptr = retval = processes_mapped = 0;
+ /* Check if processes set different xnack modes */
+ list_for_each_entry(cur, queues, list) {
+ qpd = cur->qpd;
+ if (xnack_enabled < 0)
+ /* First process */
+ xnack_enabled = qpd->pqm->process->xnack_enabled;
+ else if (qpd->pqm->process->xnack_enabled != xnack_enabled) {
+ /* Found a process with a different xnack mode */
+ xnack_conflict = 1;
+ break;
+ }
+ }
+
retval = pm_allocate_runlist_ib(pm, &rl_buffer, rl_gpu_addr,
- &alloc_size_bytes, &is_over_subscription);
+ &alloc_size_bytes, &is_over_subscription,
+ xnack_conflict);
if (retval)
return retval;
*rl_size_bytes = alloc_size_bytes;
pm->ib_size_bytes = alloc_size_bytes;
- pr_debug("Building runlist ib process count: %d queues count %d\n",
+ dev_dbg(dev, "Building runlist ib process count: %d queues count %d\n",
pm->dqm->processes_count, pm->dqm->active_queue_count);
+build_runlist_ib:
/* build the run list ib packet */
list_for_each_entry(cur, queues, list) {
qpd = cur->qpd;
+ /* group processes with the same xnack mode together */
+ if (qpd->pqm->process->xnack_enabled != xnack_enabled)
+ continue;
/* build map process packet */
if (processes_mapped >= pm->dqm->processes_count) {
- pr_debug("Not enough space left in runlist IB\n");
+ dev_dbg(dev, "Not enough space left in runlist IB\n");
pm_release_ib(pm);
return -ENOMEM;
}
@@ -167,7 +204,8 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
if (!kq->queue->properties.is_active)
continue;
- pr_debug("static_queue, mapping kernel q %d, is debug status %d\n",
+ dev_dbg(dev,
+ "static_queue, mapping kernel q %d, is debug status %d\n",
kq->queue->queue, qpd->is_debug);
retval = pm->pmf->map_queues(pm,
@@ -186,7 +224,8 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
if (!q->properties.is_active)
continue;
- pr_debug("static_queue, mapping user queue %d, is debug status %d\n",
+ dev_dbg(dev,
+ "static_queue, mapping user queue %d, is debug status %d\n",
q->queue, qpd->is_debug);
retval = pm->pmf->map_queues(pm,
@@ -202,18 +241,33 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
alloc_size_bytes);
}
}
+ if (xnack_conflict) {
+ /* pick up processes with the other xnack mode */
+ xnack_enabled = !xnack_enabled;
+ xnack_conflict = 0;
+ goto build_runlist_ib;
+ }
- pr_debug("Finished map process and queues to runlist\n");
+ dev_dbg(dev, "Finished map process and queues to runlist\n");
if (is_over_subscription) {
if (!pm->is_over_subscription)
- pr_warn("Runlist is getting oversubscribed. Expect reduced ROCm performance.\n");
+ dev_warn(dev, "Runlist is getting oversubscribed due to%s%s%s%s. Expect reduced ROCm performance.\n",
+ is_over_subscription & OVER_SUBSCRIPTION_PROCESS_COUNT ?
+ " too many processes" : "",
+ is_over_subscription & OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT ?
+ " too many queues" : "",
+ is_over_subscription & OVER_SUBSCRIPTION_GWS_QUEUE_COUNT ?
+ " multiple processes using cooperative launch" : "",
+ is_over_subscription & OVER_SUBSCRIPTION_XNACK_CONFLICT ?
+ " xnack on/off processes mixed on gfx9" : "");
+
retval = pm->pmf->runlist(pm, &rl_buffer[rl_wptr],
*rl_gpu_addr,
alloc_size_bytes / sizeof(uint32_t),
true);
}
- pm->is_over_subscription = is_over_subscription;
+ pm->is_over_subscription = !!is_over_subscription;
for (i = 0; i < alloc_size_bytes / sizeof(uint32_t); i++)
pr_debug("0x%2X ", rl_buffer[i]);
@@ -239,7 +293,9 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
break;
default:
if (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 2) ||
- KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 3))
+ KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 3) ||
+ KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 4) ||
+ KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 5, 0))
pm->pmf = &kfd_aldebaran_pm_funcs;
else if (KFD_GC_VERSION(dqm->dev) >= IP_VERSION(9, 0, 1))
pm->pmf = &kfd_v9_pm_funcs;
@@ -262,16 +318,18 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
return 0;
}
-void pm_uninit(struct packet_manager *pm, bool hanging)
+void pm_uninit(struct packet_manager *pm)
{
mutex_destroy(&pm->lock);
- kernel_queue_uninit(pm->priv_queue, hanging);
+ kernel_queue_uninit(pm->priv_queue);
pm->priv_queue = NULL;
}
int pm_send_set_resources(struct packet_manager *pm,
struct scheduling_resources *res)
{
+ struct kfd_node *node = pm->dqm->dev;
+ struct device *dev = node->adev->dev;
uint32_t *buffer, size;
int retval = 0;
@@ -281,7 +339,7 @@ int pm_send_set_resources(struct packet_manager *pm,
size / sizeof(uint32_t),
(unsigned int **)&buffer);
if (!buffer) {
- pr_err("Failed to allocate buffer on kernel queue\n");
+ dev_err(dev, "Failed to allocate buffer on kernel queue\n");
retval = -ENOMEM;
goto out;
}
@@ -343,6 +401,8 @@ fail_create_runlist_ib:
int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
uint64_t fence_value)
{
+ struct kfd_node *node = pm->dqm->dev;
+ struct device *dev = node->adev->dev;
uint32_t *buffer, size;
int retval = 0;
@@ -354,7 +414,7 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
kq_acquire_packet_buffer(pm->priv_queue,
size / sizeof(uint32_t), (unsigned int **)&buffer);
if (!buffer) {
- pr_err("Failed to allocate buffer on kernel queue\n");
+ dev_err(dev, "Failed to allocate buffer on kernel queue\n");
retval = -ENOMEM;
goto out;
}
@@ -370,12 +430,33 @@ out:
return retval;
}
-int pm_update_grace_period(struct packet_manager *pm, uint32_t grace_period)
+/* pm_config_dequeue_wait_counts: Configure dequeue timer Wait Counts
+ * by writing to CP_IQ_WAIT_TIME2 registers.
+ *
+ * @cmd: See emum kfd_config_dequeue_wait_counts_cmd definition
+ * @value: Depends on the cmd. This parameter is unused for
+ * KFD_DEQUEUE_WAIT_INIT and KFD_DEQUEUE_WAIT_RESET. For
+ * KFD_DEQUEUE_WAIT_SET_SCH_WAVE it holds value to be set
+ *
+ */
+int pm_config_dequeue_wait_counts(struct packet_manager *pm,
+ enum kfd_config_dequeue_wait_counts_cmd cmd,
+ uint32_t value)
{
+ struct kfd_node *node = pm->dqm->dev;
+ struct device *dev = node->adev->dev;
int retval = 0;
uint32_t *buffer, size;
- size = pm->pmf->set_grace_period_size;
+ if (!pm->pmf->config_dequeue_wait_counts ||
+ !pm->pmf->config_dequeue_wait_counts_size)
+ return 0;
+
+ if (cmd == KFD_DEQUEUE_WAIT_INIT && (KFD_GC_VERSION(pm->dqm->dev) < IP_VERSION(9, 4, 1) ||
+ KFD_GC_VERSION(pm->dqm->dev) >= IP_VERSION(10, 0, 0)))
+ return 0;
+
+ size = pm->pmf->config_dequeue_wait_counts_size;
mutex_lock(&pm->lock);
@@ -385,18 +466,24 @@ int pm_update_grace_period(struct packet_manager *pm, uint32_t grace_period)
(unsigned int **)&buffer);
if (!buffer) {
- pr_err("Failed to allocate buffer on kernel queue\n");
+ dev_err(dev,
+ "Failed to allocate buffer on kernel queue\n");
retval = -ENOMEM;
goto out;
}
- retval = pm->pmf->set_grace_period(pm, buffer, grace_period);
- if (!retval)
+ retval = pm->pmf->config_dequeue_wait_counts(pm, buffer,
+ cmd, value);
+ if (!retval) {
retval = kq_submit_packet(pm->priv_queue);
- else
+
+ /* If default value is modified, cache that in dqm->wait_times */
+ if (!retval && cmd == KFD_DEQUEUE_WAIT_INIT)
+ update_dqm_wait_times(pm->dqm);
+ } else {
kq_rollback_packet(pm->priv_queue);
+ }
}
-
out:
mutex_unlock(&pm->lock);
return retval;
@@ -406,6 +493,8 @@ int pm_send_unmap_queue(struct packet_manager *pm,
enum kfd_unmap_queues_filter filter,
uint32_t filter_param, bool reset)
{
+ struct kfd_node *node = pm->dqm->dev;
+ struct device *dev = node->adev->dev;
uint32_t *buffer, size;
int retval = 0;
@@ -414,7 +503,7 @@ int pm_send_unmap_queue(struct packet_manager *pm,
kq_acquire_packet_buffer(pm->priv_queue,
size / sizeof(uint32_t), (unsigned int **)&buffer);
if (!buffer) {
- pr_err("Failed to allocate buffer on kernel queue\n");
+ dev_err(dev, "Failed to allocate buffer on kernel queue\n");
retval = -ENOMEM;
goto out;
}
@@ -463,6 +552,8 @@ out:
int pm_debugfs_hang_hws(struct packet_manager *pm)
{
+ struct kfd_node *node = pm->dqm->dev;
+ struct device *dev = node->adev->dev;
uint32_t *buffer, size;
int r = 0;
@@ -474,16 +565,16 @@ int pm_debugfs_hang_hws(struct packet_manager *pm)
kq_acquire_packet_buffer(pm->priv_queue,
size / sizeof(uint32_t), (unsigned int **)&buffer);
if (!buffer) {
- pr_err("Failed to allocate buffer on kernel queue\n");
+ dev_err(dev, "Failed to allocate buffer on kernel queue\n");
r = -ENOMEM;
goto out;
}
memset(buffer, 0x55, size);
kq_submit_packet(pm->priv_queue);
- pr_info("Submitting %x %x %x %x %x %x %x to HIQ to hang the HWS.",
- buffer[0], buffer[1], buffer[2], buffer[3],
- buffer[4], buffer[5], buffer[6]);
+ dev_info(dev, "Submitting %x %x %x %x %x %x %x to HIQ to hang the HWS.",
+ buffer[0], buffer[1], buffer[2], buffer[3], buffer[4],
+ buffer[5], buffer[6]);
out:
mutex_unlock(&pm->lock);
return r;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
index 8ee2bedd301a..505036968a77 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
@@ -37,14 +37,17 @@ static int pm_map_process_v9(struct packet_manager *pm,
struct kfd_node *kfd = pm->dqm->dev;
struct kfd_process_device *pdd =
container_of(qpd, struct kfd_process_device, qpd);
+ struct amdgpu_device *adev = kfd->adev;
packet = (struct pm4_mes_map_process *)buffer;
memset(buffer, 0, sizeof(struct pm4_mes_map_process));
packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS,
sizeof(struct pm4_mes_map_process));
+ if (adev->enforce_isolation[kfd->node_id] == AMDGPU_ENFORCE_ISOLATION_ENABLE)
+ packet->bitfields2.exec_cleaner_shader = 1;
packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
packet->bitfields2.process_quantum = 10;
- packet->bitfields2.pasid = qpd->pqm->process->pasid;
+ packet->bitfields2.pasid = pdd->pasid;
packet->bitfields14.gds_size = qpd->gds_size & 0x3F;
packet->bitfields14.gds_size_hi = (qpd->gds_size >> 6) & 0xF;
packet->bitfields14.num_gws = (qpd->mapped_gws_queue) ? qpd->num_gws : 0;
@@ -89,17 +92,22 @@ static int pm_map_process_aldebaran(struct packet_manager *pm,
struct pm4_mes_map_process_aldebaran *packet;
uint64_t vm_page_table_base_addr = qpd->page_table_base;
struct kfd_dev *kfd = pm->dqm->dev->kfd;
+ struct kfd_node *knode = pm->dqm->dev;
struct kfd_process_device *pdd =
container_of(qpd, struct kfd_process_device, qpd);
int i;
+ struct amdgpu_device *adev = kfd->adev;
packet = (struct pm4_mes_map_process_aldebaran *)buffer;
memset(buffer, 0, sizeof(struct pm4_mes_map_process_aldebaran));
packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS,
sizeof(struct pm4_mes_map_process_aldebaran));
+ if (adev->enforce_isolation[knode->node_id] ==
+ AMDGPU_ENFORCE_ISOLATION_ENABLE)
+ packet->bitfields2.exec_cleaner_shader = 1;
packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
packet->bitfields2.process_quantum = 10;
- packet->bitfields2.pasid = qpd->pqm->process->pasid;
+ packet->bitfields2.pasid = pdd->pasid;
packet->bitfields14.gds_size = qpd->gds_size & 0x3F;
packet->bitfields14.gds_size_hi = (qpd->gds_size >> 6) & 0xF;
packet->bitfields14.num_gws = (qpd->mapped_gws_queue) ? qpd->num_gws : 0;
@@ -144,18 +152,23 @@ static int pm_runlist_v9(struct packet_manager *pm, uint32_t *buffer,
int concurrent_proc_cnt = 0;
struct kfd_node *kfd = pm->dqm->dev;
+ struct amdgpu_device *adev = kfd->adev;
/* Determine the number of processes to map together to HW:
* it can not exceed the number of VMIDs available to the
* scheduler, and it is determined by the smaller of the number
* of processes in the runlist and kfd module parameter
* hws_max_conc_proc.
+ * However, if enforce_isolation is set (toggle LDS/VGPRs/SGPRs
+ * cleaner between process switch), enable single-process mode
+ * in HWS.
* Note: the arbitration between the number of VMIDs and
* hws_max_conc_proc has been done in
* kgd2kfd_device_init().
*/
- concurrent_proc_cnt = min(pm->dqm->processes_count,
- kfd->max_proc_per_quantum);
+ concurrent_proc_cnt = (adev->enforce_isolation[kfd->node_id] ==
+ AMDGPU_ENFORCE_ISOLATION_ENABLE) ?
+ 1 : min(pm->dqm->processes_count, kfd->max_proc_per_quantum);
packet = (struct pm4_mes_runlist *)buffer;
@@ -190,6 +203,8 @@ static int pm_set_resources_v9(struct packet_manager *pm, uint32_t *buffer,
queue_type__mes_set_resources__hsa_interface_queue_hiq;
packet->bitfields2.vmid_mask = res->vmid_mask;
packet->bitfields2.unmap_latency = KFD_UNMAP_LATENCY_MS / 100;
+ if (pm->dqm->dev->adev->gmc.xnack_flags & AMDGPU_GMC_XNACK_FLAG_CHAIN)
+ packet->bitfields2.enb_xnack_retry_disable_check = 1;
packet->bitfields7.oac_mask = res->oac_mask;
packet->bitfields8.gds_heap_base = res->gds_heap_base;
packet->bitfields8.gds_heap_size = res->gds_heap_size;
@@ -213,7 +228,6 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
struct queue *q, bool is_static)
{
struct pm4_mes_map_queues *packet;
- bool use_static = is_static;
packet = (struct pm4_mes_map_queues *)buffer;
memset(buffer, 0, sizeof(struct pm4_mes_map_queues));
@@ -226,7 +240,7 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
packet->bitfields2.engine_sel =
engine_sel__mes_map_queues__compute_vi;
- packet->bitfields2.gws_control_queue = q->gws ? 1 : 0;
+ packet->bitfields2.gws_control_queue = q->properties.is_gws ? 1 : 0;
packet->bitfields2.extended_engine_sel =
extended_engine_sel__mes_map_queues__legacy_engine_sel;
packet->bitfields2.queue_type =
@@ -234,7 +248,7 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
switch (q->properties.type) {
case KFD_QUEUE_TYPE_COMPUTE:
- if (use_static)
+ if (is_static)
packet->bitfields2.queue_type =
queue_type__mes_map_queues__normal_latency_static_queue_vi;
break;
@@ -244,7 +258,6 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
break;
case KFD_QUEUE_TYPE_SDMA:
case KFD_QUEUE_TYPE_SDMA_XGMI:
- use_static = false; /* no static queues under SDMA */
if (q->properties.sdma_engine_id < 2 &&
!pm_use_ext_eng(q->device->kfd))
packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
@@ -287,23 +300,79 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
return 0;
}
-static int pm_set_grace_period_v9(struct packet_manager *pm,
+static inline void pm_build_dequeue_wait_counts_packet_info(struct packet_manager *pm,
+ uint32_t sch_value, uint32_t que_sleep, uint32_t *reg_offset,
+ uint32_t *reg_data)
+{
+ pm->dqm->dev->kfd2kgd->build_dequeue_wait_counts_packet_info(
+ pm->dqm->dev->adev,
+ pm->dqm->wait_times,
+ sch_value,
+ que_sleep,
+ reg_offset,
+ reg_data);
+}
+
+/* pm_config_dequeue_wait_counts_v9: Builds WRITE_DATA packet with
+ * register/value for configuring dequeue wait counts
+ *
+ * @return: -ve for failure and 0 for success and buffer is
+ * filled in with packet
+ *
+ **/
+static int pm_config_dequeue_wait_counts_v9(struct packet_manager *pm,
uint32_t *buffer,
- uint32_t grace_period)
+ enum kfd_config_dequeue_wait_counts_cmd cmd,
+ uint32_t value)
{
struct pm4_mec_write_data_mmio *packet;
uint32_t reg_offset = 0;
uint32_t reg_data = 0;
- pm->dqm->dev->kfd2kgd->build_grace_period_packet_info(
- pm->dqm->dev->adev,
- pm->dqm->wait_times,
- grace_period,
- &reg_offset,
- &reg_data);
+ switch (cmd) {
+ case KFD_DEQUEUE_WAIT_INIT: {
+ uint32_t sch_wave = 0, que_sleep = 1;
+
+ /* For all gfx9 ASICs > gfx941,
+ * Reduce CP_IQ_WAIT_TIME2.QUE_SLEEP to 0x1 from default 0x40.
+ * On a 1GHz machine this is roughly 1 microsecond, which is
+ * about how long it takes to load data out of memory during
+ * queue connect
+ * QUE_SLEEP: Wait Count for Dequeue Retry.
+ *
+ * Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU
+ */
+ if (KFD_GC_VERSION(pm->dqm->dev) < IP_VERSION(9, 4, 1) ||
+ KFD_GC_VERSION(pm->dqm->dev) >= IP_VERSION(10, 0, 0))
+ return -EPERM;
+
+ if (amdgpu_emu_mode == 0 && pm->dqm->dev->adev->gmc.is_app_apu &&
+ (KFD_GC_VERSION(pm->dqm->dev) == IP_VERSION(9, 4, 3)))
+ sch_wave = 1;
- if (grace_period == USE_DEFAULT_GRACE_PERIOD)
- reg_data = pm->dqm->wait_times;
+ pm_build_dequeue_wait_counts_packet_info(pm, sch_wave, que_sleep,
+ &reg_offset, &reg_data);
+
+ break;
+ }
+ case KFD_DEQUEUE_WAIT_RESET:
+ /* reg_data would be set to dqm->wait_times */
+ pm_build_dequeue_wait_counts_packet_info(pm, 0, 0, &reg_offset, &reg_data);
+ break;
+
+ case KFD_DEQUEUE_WAIT_SET_SCH_WAVE:
+ /* The CP cannot handle value 0 and it will result in
+ * an infinite grace period being set so set to 1 to prevent this. Also
+ * avoid debugger API breakage as it sets 0 and expects a low value.
+ */
+ if (!value)
+ value = 1;
+ pm_build_dequeue_wait_counts_packet_info(pm, value, 0, &reg_offset, &reg_data);
+ break;
+ default:
+ pr_err("Invalid dequeue wait cmd\n");
+ return -EINVAL;
+ }
packet = (struct pm4_mec_write_data_mmio *)buffer;
memset(buffer, 0, sizeof(struct pm4_mec_write_data_mmio));
@@ -405,7 +474,7 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
.set_resources = pm_set_resources_v9,
.map_queues = pm_map_queues_v9,
.unmap_queues = pm_unmap_queues_v9,
- .set_grace_period = pm_set_grace_period_v9,
+ .config_dequeue_wait_counts = pm_config_dequeue_wait_counts_v9,
.query_status = pm_query_status_v9,
.release_mem = NULL,
.map_process_size = sizeof(struct pm4_mes_map_process),
@@ -413,7 +482,7 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
.set_resources_size = sizeof(struct pm4_mes_set_resources),
.map_queues_size = sizeof(struct pm4_mes_map_queues),
.unmap_queues_size = sizeof(struct pm4_mes_unmap_queues),
- .set_grace_period_size = sizeof(struct pm4_mec_write_data_mmio),
+ .config_dequeue_wait_counts_size = sizeof(struct pm4_mec_write_data_mmio),
.query_status_size = sizeof(struct pm4_mes_query_status),
.release_mem_size = 0,
};
@@ -424,7 +493,7 @@ const struct packet_manager_funcs kfd_aldebaran_pm_funcs = {
.set_resources = pm_set_resources_v9,
.map_queues = pm_map_queues_v9,
.unmap_queues = pm_unmap_queues_v9,
- .set_grace_period = pm_set_grace_period_v9,
+ .config_dequeue_wait_counts = pm_config_dequeue_wait_counts_v9,
.query_status = pm_query_status_v9,
.release_mem = NULL,
.map_process_size = sizeof(struct pm4_mes_map_process_aldebaran),
@@ -432,7 +501,7 @@ const struct packet_manager_funcs kfd_aldebaran_pm_funcs = {
.set_resources_size = sizeof(struct pm4_mes_set_resources),
.map_queues_size = sizeof(struct pm4_mes_map_queues),
.unmap_queues_size = sizeof(struct pm4_mes_unmap_queues),
- .set_grace_period_size = sizeof(struct pm4_mec_write_data_mmio),
+ .config_dequeue_wait_counts_size = sizeof(struct pm4_mec_write_data_mmio),
.query_status_size = sizeof(struct pm4_mes_query_status),
.release_mem_size = 0,
};
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
index c1199d06d131..a1de5d7e173a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
@@ -42,6 +42,7 @@ unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size)
static int pm_map_process_vi(struct packet_manager *pm, uint32_t *buffer,
struct qcm_process_device *qpd)
{
+ struct kfd_process_device *pdd = qpd_to_pdd(qpd);
struct pm4_mes_map_process *packet;
packet = (struct pm4_mes_map_process *)buffer;
@@ -52,7 +53,7 @@ static int pm_map_process_vi(struct packet_manager *pm, uint32_t *buffer,
sizeof(struct pm4_mes_map_process));
packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
packet->bitfields2.process_quantum = 10;
- packet->bitfields2.pasid = qpd->pqm->process->pasid;
+ packet->bitfields2.pasid = pdd->pasid;
packet->bitfields3.page_table_base = qpd->page_table_base;
packet->bitfields10.gds_size = qpd->gds_size;
packet->bitfields10.num_gws = qpd->num_gws;
@@ -303,7 +304,7 @@ const struct packet_manager_funcs kfd_vi_pm_funcs = {
.set_resources = pm_set_resources_vi,
.map_queues = pm_map_queues_vi,
.unmap_queues = pm_unmap_queues_vi,
- .set_grace_period = NULL,
+ .config_dequeue_wait_counts = NULL,
.query_status = pm_query_status_vi,
.release_mem = pm_release_mem_vi,
.map_process_size = sizeof(struct pm4_mes_map_process),
@@ -311,7 +312,7 @@ const struct packet_manager_funcs kfd_vi_pm_funcs = {
.set_resources_size = sizeof(struct pm4_mes_set_resources),
.map_queues_size = sizeof(struct pm4_mes_map_queues),
.unmap_queues_size = sizeof(struct pm4_mes_unmap_queues),
- .set_grace_period_size = 0,
+ .config_dequeue_wait_counts_size = 0,
.query_status_size = sizeof(struct pm4_mes_query_status),
.release_mem_size = sizeof(struct pm4_mec_release_mem)
};
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c
deleted file mode 100644
index e3b250918f39..000000000000
--- a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c
+++ /dev/null
@@ -1,70 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0 OR MIT
-/*
- * Copyright 2014-2022 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include <linux/types.h>
-#include "kfd_priv.h"
-#include "amdgpu_ids.h"
-
-static unsigned int pasid_bits = 16;
-static bool pasids_allocated; /* = false */
-
-bool kfd_set_pasid_limit(unsigned int new_limit)
-{
- if (new_limit < 2)
- return false;
-
- if (new_limit < (1U << pasid_bits)) {
- if (pasids_allocated)
- /* We've already allocated user PASIDs, too late to
- * change the limit
- */
- return false;
-
- while (new_limit < (1U << pasid_bits))
- pasid_bits--;
- }
-
- return true;
-}
-
-unsigned int kfd_get_pasid_limit(void)
-{
- return 1U << pasid_bits;
-}
-
-u32 kfd_pasid_alloc(void)
-{
- int r = amdgpu_pasid_alloc(pasid_bits);
-
- if (r > 0) {
- pasids_allocated = true;
- return r;
- }
-
- return 0;
-}
-
-void kfd_pasid_free(u32 pasid)
-{
- amdgpu_pasid_free(pasid);
-}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h
index 8b6b2bd5c148..e356a207d03c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h
@@ -63,7 +63,8 @@ struct pm4_mes_set_resources {
struct {
uint32_t vmid_mask:16;
uint32_t unmap_latency:8;
- uint32_t reserved1:5;
+ uint32_t reserved1:4;
+ uint32_t enb_xnack_retry_disable_check:1;
enum mes_set_resources_queue_type_enum queue_type:3;
} bitfields2;
uint32_t ordinal2;
@@ -145,8 +146,9 @@ struct pm4_mes_map_process {
union {
struct {
- uint32_t pasid:16;
- uint32_t reserved1:2;
+ uint32_t pasid:16; /* 0 - 15 */
+ uint32_t reserved1:1; /* 16 */
+ uint32_t exec_cleaner_shader:1; /* 17 */
uint32_t debug_vmid:4;
uint32_t new_debug:1;
uint32_t reserved2:1;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h
index 38f5cb6a222a..e0ed62c4ade0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h
@@ -37,7 +37,7 @@ struct pm4_mes_map_process_aldebaran {
struct {
uint32_t pasid:16; /* 0 - 15 */
uint32_t single_memops:1; /* 16 */
- uint32_t reserved1:1; /* 17 */
+ uint32_t exec_cleaner_shader:1; /* 17 */
uint32_t debug_vmid:4; /* 18 - 21 */
uint32_t new_debug:1; /* 22 */
uint32_t tmz:1; /* 23 */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 42d40560cd30..d221c58dccc3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -32,7 +32,7 @@
#include <linux/atomic.h>
#include <linux/workqueue.h>
#include <linux/spinlock.h>
-#include <linux/kfd_ioctl.h>
+#include <uapi/linux/kfd_ioctl.h>
#include <linux/idr.h>
#include <linux/kfifo.h>
#include <linux/seq_file.h>
@@ -206,7 +206,9 @@ enum cache_policy {
#define KFD_IS_SOC15(dev) ((KFD_GC_VERSION(dev)) >= (IP_VERSION(9, 0, 1)))
#define KFD_SUPPORT_XNACK_PER_PROCESS(dev)\
((KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2)) || \
- (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3)))
+ (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3)) || \
+ (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4)) || \
+ (KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0)))
struct kfd_node;
@@ -272,7 +274,6 @@ struct kfd_node {
/* Interrupts */
struct kfifo ih_fifo;
- struct workqueue_struct *ih_wq;
struct work_struct interrupt_work;
spinlock_t interrupt_lock;
@@ -288,7 +289,6 @@ struct kfd_node {
/* Global GWS resource shared between processes */
void *gws;
- bool gws_debug_workaround;
/* Clients watching SMI events */
struct list_head smi_clients;
@@ -309,6 +309,10 @@ struct kfd_node {
struct kfd_local_mem_info local_mem_info;
struct kfd_dev *kfd;
+
+ /* Track per device allocated watch points */
+ uint32_t alloc_watch_ids;
+ spinlock_t watch_points_lock;
};
struct kfd_dev {
@@ -361,9 +365,7 @@ struct kfd_dev {
struct kfd_node *nodes[MAX_KFD_NODES];
unsigned int num_nodes;
- /* Track per device allocated watch points */
- uint32_t alloc_watch_ids;
- spinlock_t watch_points_lock;
+ struct workqueue_struct *ih_wq;
/* Kernel doorbells for KFD device */
struct amdgpu_bo *doorbells;
@@ -413,13 +415,16 @@ enum kfd_unmap_queues_filter {
* @KFD_QUEUE_TYPE_DIQ: DIQ queue type.
*
* @KFD_QUEUE_TYPE_SDMA_XGMI: Special SDMA queue for XGMI interface.
+ *
+ * @KFD_QUEUE_TYPE_SDMA_BY_ENG_ID: SDMA user mode queue with target SDMA engine ID.
*/
enum kfd_queue_type {
KFD_QUEUE_TYPE_COMPUTE,
KFD_QUEUE_TYPE_SDMA,
KFD_QUEUE_TYPE_HIQ,
KFD_QUEUE_TYPE_DIQ,
- KFD_QUEUE_TYPE_SDMA_XGMI
+ KFD_QUEUE_TYPE_SDMA_XGMI,
+ KFD_QUEUE_TYPE_SDMA_BY_ENG_ID
};
enum kfd_queue_format {
@@ -493,8 +498,8 @@ struct queue_properties {
uint64_t queue_size;
uint32_t priority;
uint32_t queue_percent;
- uint32_t *read_ptr;
- uint32_t *write_ptr;
+ void __user *read_ptr;
+ void __user *write_ptr;
void __iomem *doorbell_ptr;
uint32_t doorbell_off;
bool is_interop;
@@ -521,6 +526,12 @@ struct queue_properties {
uint64_t tba_addr;
uint64_t tma_addr;
uint64_t exception_status;
+
+ struct amdgpu_bo *wptr_bo;
+ struct amdgpu_bo *rptr_bo;
+ struct amdgpu_bo *ring_bo;
+ struct amdgpu_bo *eop_buf_bo;
+ struct amdgpu_bo *cwsr_bo;
};
#define QUEUE_IS_ACTIVE(q) ((q).queue_size > 0 && \
@@ -603,7 +614,7 @@ struct queue {
uint64_t gang_ctx_gpu_addr;
void *gang_ctx_cpu_ptr;
- struct amdgpu_bo *wptr_bo;
+ struct amdgpu_bo *wptr_bo_gart;
};
enum KFD_MQD_TYPE {
@@ -765,7 +776,7 @@ struct kfd_process_device {
enum kfd_pdd_bound bound;
/* VRAM usage */
- uint64_t vram_usage;
+ atomic64_t vram_usage;
struct attribute attr_vram;
char vram_filename[MAX_SYSFS_FILENAME_LEN];
@@ -836,6 +847,11 @@ struct kfd_process_device {
void *proc_ctx_bo;
uint64_t proc_ctx_gpu_addr;
void *proc_ctx_cpu_ptr;
+
+ /* Tracks queue reset status */
+ bool has_reset_queue;
+
+ u32 pasid;
};
#define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
@@ -853,6 +869,14 @@ struct svm_range_list {
struct delayed_work restore_work;
DECLARE_BITMAP(bitmap_supported, MAX_GPU_INSTANCE);
struct task_struct *faulting_task;
+ /* check point ts decides if page fault recovery need be dropped */
+ uint64_t checkpoint_ts[MAX_GPU_INSTANCE];
+
+ /* Default granularity to use in buffer migration
+ * and restoration of backing memory while handling
+ * recoverable page faults
+ */
+ uint8_t default_granularity;
};
/* Process data */
@@ -887,8 +911,6 @@ struct kfd_process {
/* We want to receive a notification when the mm_struct is destroyed */
struct mmu_notifier mmu_notifier;
- u32 pasid;
-
/*
* Array of kfd_process_device pointers,
* one for each device the process is using.
@@ -981,6 +1003,9 @@ struct kfd_process {
struct semaphore runtime_enable_sema;
bool is_runtime_retry;
struct kfd_runtime_info runtime_info;
+
+ /* if gpu page fault sent to KFD */
+ bool gpu_page_fault;
};
#define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */
@@ -1013,7 +1038,8 @@ void kfd_process_destroy_wq(void);
void kfd_cleanup_processes(void);
struct kfd_process *kfd_create_process(struct task_struct *thread);
struct kfd_process *kfd_get_process(const struct task_struct *task);
-struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid);
+struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid,
+ struct kfd_process_device **pdd);
struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm);
int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id);
@@ -1065,8 +1091,6 @@ struct kfd_process *kfd_lookup_process_by_pid(struct pid *pid);
/* PASIDs */
int kfd_pasid_init(void);
void kfd_pasid_exit(void);
-bool kfd_set_pasid_limit(unsigned int new_limit);
-unsigned int kfd_get_pasid_limit(void);
u32 kfd_pasid_alloc(void);
void kfd_pasid_free(u32 pasid);
@@ -1116,7 +1140,6 @@ struct kfd_topology_device *kfd_topology_device_by_proximity_domain_no_lock(
uint32_t proximity_domain);
struct kfd_topology_device *kfd_topology_device_by_id(uint32_t gpu_id);
struct kfd_node *kfd_device_by_id(uint32_t gpu_id);
-struct kfd_node *kfd_device_by_pci_dev(const struct pci_dev *pdev);
static inline bool kfd_irq_is_from_node(struct kfd_node *node, uint32_t node_id,
uint32_t vmid)
{
@@ -1128,7 +1151,9 @@ static inline struct kfd_node *kfd_node_by_irq_ids(struct amdgpu_device *adev,
struct kfd_dev *dev = adev->kfd.dev;
uint32_t i;
- if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3))
+ if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3) &&
+ KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 4) &&
+ KFD_GC_VERSION(dev) != IP_VERSION(9, 5, 0))
return dev->nodes[0];
for (i = 0; i < dev->num_nodes; i++)
@@ -1282,6 +1307,15 @@ int init_queue(struct queue **q, const struct queue_properties *properties);
void uninit_queue(struct queue *q);
void print_queue_properties(struct queue_properties *q);
void print_queue(struct queue *q);
+int kfd_queue_buffer_get(struct amdgpu_vm *vm, void __user *addr, struct amdgpu_bo **pbo,
+ u64 expected_size);
+void kfd_queue_buffer_put(struct amdgpu_bo **bo);
+int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_properties *properties);
+int kfd_queue_release_buffers(struct kfd_process_device *pdd, struct queue_properties *properties);
+void kfd_queue_unref_bo_va(struct amdgpu_vm *vm, struct amdgpu_bo **bo);
+int kfd_queue_unref_bo_vas(struct kfd_process_device *pdd,
+ struct queue_properties *properties);
+void kfd_queue_ctx_save_restore_size(struct kfd_topology_device *dev);
struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
struct kfd_node *dev);
@@ -1293,12 +1327,15 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type,
struct kfd_node *dev);
struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE type,
struct kfd_node *dev);
+struct mqd_manager *mqd_manager_init_v12(enum KFD_MQD_TYPE type,
+ struct kfd_node *dev);
struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev);
void device_queue_manager_uninit(struct device_queue_manager *dqm);
struct kernel_queue *kernel_queue_init(struct kfd_node *dev,
enum kfd_queue_type type);
-void kernel_queue_uninit(struct kernel_queue *kq, bool hanging);
-int kfd_dqm_evict_pasid(struct device_queue_manager *dqm, u32 pasid);
+void kernel_queue_uninit(struct kernel_queue *kq);
+int kfd_evict_process_device(struct kfd_process_device *pdd);
+int kfd_dqm_suspend_bad_queue_mes(struct kfd_node *knode, u32 pasid, u32 doorbell_id);
/* Process Queue Manager */
struct process_queue_node {
@@ -1313,10 +1350,8 @@ int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p);
void pqm_uninit(struct process_queue_manager *pqm);
int pqm_create_queue(struct process_queue_manager *pqm,
struct kfd_node *dev,
- struct file *f,
struct queue_properties *properties,
unsigned int *qid,
- struct amdgpu_bo *wptr_bo,
const struct kfd_criu_queue_priv_data *q_data,
const void *restore_mqd,
const void *restore_ctl_stack,
@@ -1328,8 +1363,6 @@ int pqm_update_mqd(struct process_queue_manager *pqm, unsigned int qid,
struct mqd_update_info *minfo);
int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
void *gws);
-struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm,
- unsigned int qid);
struct queue *pqm_get_user_queue(struct process_queue_manager *pqm,
unsigned int qid);
int pqm_get_wave_state(struct process_queue_manager *pqm,
@@ -1356,6 +1389,24 @@ int pqm_get_queue_checkpoint_info(struct process_queue_manager *pqm,
#define KFD_FENCE_COMPLETED (100)
#define KFD_FENCE_INIT (10)
+/**
+ * enum kfd_config_dequeue_wait_counts_cmd - Command for configuring
+ * dequeue wait counts.
+ *
+ * @KFD_DEQUEUE_WAIT_INIT: Set optimized dequeue wait counts for a
+ * certain ASICs. For these ASICs, this is default value used by RESET
+ * @KFD_DEQUEUE_WAIT_RESET: Reset dequeue wait counts to the optimized value
+ * for certain ASICs. For others set it to default hardware reset value
+ * @KFD_DEQUEUE_WAIT_SET_SCH_WAVE: Set context switch latency wait
+ *
+ */
+enum kfd_config_dequeue_wait_counts_cmd {
+ KFD_DEQUEUE_WAIT_INIT = 1,
+ KFD_DEQUEUE_WAIT_RESET = 2,
+ KFD_DEQUEUE_WAIT_SET_SCH_WAVE = 3
+};
+
+
struct packet_manager {
struct device_queue_manager *dqm;
struct kernel_queue *priv_queue;
@@ -1381,8 +1432,8 @@ struct packet_manager_funcs {
int (*unmap_queues)(struct packet_manager *pm, uint32_t *buffer,
enum kfd_unmap_queues_filter mode,
uint32_t filter_param, bool reset);
- int (*set_grace_period)(struct packet_manager *pm, uint32_t *buffer,
- uint32_t grace_period);
+ int (*config_dequeue_wait_counts)(struct packet_manager *pm, uint32_t *buffer,
+ enum kfd_config_dequeue_wait_counts_cmd cmd, uint32_t value);
int (*query_status)(struct packet_manager *pm, uint32_t *buffer,
uint64_t fence_address, uint64_t fence_value);
int (*release_mem)(uint64_t gpu_addr, uint32_t *buffer);
@@ -1393,7 +1444,7 @@ struct packet_manager_funcs {
int set_resources_size;
int map_queues_size;
int unmap_queues_size;
- int set_grace_period_size;
+ int config_dequeue_wait_counts_size;
int query_status_size;
int release_mem_size;
};
@@ -1403,7 +1454,7 @@ extern const struct packet_manager_funcs kfd_v9_pm_funcs;
extern const struct packet_manager_funcs kfd_aldebaran_pm_funcs;
int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm);
-void pm_uninit(struct packet_manager *pm, bool hanging);
+void pm_uninit(struct packet_manager *pm);
int pm_send_set_resources(struct packet_manager *pm,
struct scheduling_resources *res);
int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues);
@@ -1416,7 +1467,9 @@ int pm_send_unmap_queue(struct packet_manager *pm,
void pm_release_ib(struct packet_manager *pm);
-int pm_update_grace_period(struct packet_manager *pm, uint32_t grace_period);
+int pm_config_dequeue_wait_counts(struct packet_manager *pm,
+ enum kfd_config_dequeue_wait_counts_cmd cmd,
+ uint32_t wait_counts_config);
/* Following PM funcs can be shared among VI and AI */
unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size);
@@ -1454,7 +1507,9 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
int kfd_get_num_events(struct kfd_process *p);
int kfd_event_destroy(struct kfd_process *p, uint32_t event_id);
-void kfd_signal_vm_fault_event(struct kfd_node *dev, u32 pasid,
+void kfd_signal_vm_fault_event_with_userptr(struct kfd_process *p, uint64_t gpu_va);
+
+void kfd_signal_vm_fault_event(struct kfd_process_device *pdd,
struct kfd_vm_fault_info *info,
struct kfd_hsa_memory_exception_data *data);
@@ -1473,7 +1528,7 @@ static inline void kfd_flush_tlb(struct kfd_process_device *pdd,
static inline bool kfd_flush_tlb_after_unmap(struct kfd_dev *dev)
{
- return KFD_GC_VERSION(dev) > IP_VERSION(9, 4, 2) ||
+ return KFD_GC_VERSION(dev) >= IP_VERSION(9, 4, 2) ||
(KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 1) && dev->sdma_fw_version >= 18) ||
KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 0);
}
@@ -1528,10 +1583,15 @@ int kfd_debugfs_hang_hws(struct kfd_node *dev);
int pm_debugfs_hang_hws(struct packet_manager *pm);
int dqm_debugfs_hang_hws(struct device_queue_manager *dqm);
+void kfd_debugfs_add_process(struct kfd_process *p);
+void kfd_debugfs_remove_process(struct kfd_process *p);
+
#else
static inline void kfd_debugfs_init(void) {}
static inline void kfd_debugfs_fini(void) {}
+static inline void kfd_debugfs_add_process(struct kfd_process *p) {}
+static inline void kfd_debugfs_remove_process(struct kfd_process *p) {}
#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 717a60d7a4ea..722ac1662bdc 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -35,6 +35,7 @@
#include <linux/pm_runtime.h>
#include "amdgpu_amdkfd.h"
#include "amdgpu.h"
+#include "amdgpu_reset.h"
struct mm_struct;
@@ -270,6 +271,9 @@ static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer)
struct kfd_node *dev = NULL;
struct kfd_process *proc = NULL;
struct kfd_process_device *pdd = NULL;
+ int i;
+ struct kfd_cu_occupancy *cu_occupancy;
+ u32 queue_format;
pdd = container_of(attr, struct kfd_process_device, attr_cu_occupancy);
dev = pdd->dev;
@@ -279,41 +283,64 @@ static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer)
cu_cnt = 0;
proc = pdd->process;
if (pdd->qpd.queue_count == 0) {
- pr_debug("Gpu-Id: %d has no active queues for process %d\n",
- dev->id, proc->pasid);
+ pr_debug("Gpu-Id: %d has no active queues for process pid %d\n",
+ dev->id, (int)proc->lead_thread->pid);
return snprintf(buffer, PAGE_SIZE, "%d\n", cu_cnt);
}
/* Collect wave count from device if it supports */
wave_cnt = 0;
max_waves_per_cu = 0;
- dev->kfd2kgd->get_cu_occupancy(dev->adev, proc->pasid, &wave_cnt,
- &max_waves_per_cu, 0);
+
+ cu_occupancy = kcalloc(AMDGPU_MAX_QUEUES, sizeof(*cu_occupancy), GFP_KERNEL);
+ if (!cu_occupancy)
+ return -ENOMEM;
+
+ /*
+ * For GFX 9.4.3, fetch the CU occupancy from the first XCC in the partition.
+ * For AQL queues, because of cooperative dispatch we multiply the wave count
+ * by number of XCCs in the partition to get the total wave counts across all
+ * XCCs in the partition.
+ * For PM4 queues, there is no cooperative dispatch so wave_cnt stay as it is.
+ */
+ dev->kfd2kgd->get_cu_occupancy(dev->adev, cu_occupancy,
+ &max_waves_per_cu, ffs(dev->xcc_mask) - 1);
+
+ for (i = 0; i < AMDGPU_MAX_QUEUES; i++) {
+ if (cu_occupancy[i].wave_cnt != 0 &&
+ kfd_dqm_is_queue_in_process(dev->dqm, &pdd->qpd,
+ cu_occupancy[i].doorbell_off,
+ &queue_format)) {
+ if (unlikely(queue_format == KFD_QUEUE_FORMAT_PM4))
+ wave_cnt += cu_occupancy[i].wave_cnt;
+ else
+ wave_cnt += (NUM_XCC(dev->xcc_mask) *
+ cu_occupancy[i].wave_cnt);
+ }
+ }
/* Translate wave count to number of compute units */
cu_cnt = (wave_cnt + (max_waves_per_cu - 1)) / max_waves_per_cu;
+ kfree(cu_occupancy);
return snprintf(buffer, PAGE_SIZE, "%d\n", cu_cnt);
}
static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr,
char *buffer)
{
- if (strcmp(attr->name, "pasid") == 0) {
- struct kfd_process *p = container_of(attr, struct kfd_process,
- attr_pasid);
-
- return snprintf(buffer, PAGE_SIZE, "%d\n", p->pasid);
- } else if (strncmp(attr->name, "vram_", 5) == 0) {
+ if (strcmp(attr->name, "pasid") == 0)
+ return snprintf(buffer, PAGE_SIZE, "%d\n", 0);
+ else if (strncmp(attr->name, "vram_", 5) == 0) {
struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device,
attr_vram);
- return snprintf(buffer, PAGE_SIZE, "%llu\n", READ_ONCE(pdd->vram_usage));
+ return snprintf(buffer, PAGE_SIZE, "%llu\n", atomic64_read(&pdd->vram_usage));
} else if (strncmp(attr->name, "sdma_", 5) == 0) {
struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device,
attr_sdma);
struct kfd_sdma_activity_handler_workarea sdma_activity_work_handler;
- INIT_WORK(&sdma_activity_work_handler.sdma_activity_work,
- kfd_sdma_activity_worker);
+ INIT_WORK_ONSTACK(&sdma_activity_work_handler.sdma_activity_work,
+ kfd_sdma_activity_worker);
sdma_activity_work_handler.pdd = pdd;
sdma_activity_work_handler.sdma_activity_counter = 0;
@@ -321,6 +348,7 @@ static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr,
schedule_work(&sdma_activity_work_handler.sdma_activity_work);
flush_work(&sdma_activity_work_handler.sdma_activity_work);
+ destroy_work_on_stack(&sdma_activity_work_handler.sdma_activity_work);
return snprintf(buffer, PAGE_SIZE, "%llu\n",
(sdma_activity_work_handler.sdma_activity_counter)/
@@ -811,6 +839,14 @@ struct kfd_process *kfd_create_process(struct task_struct *thread)
return ERR_PTR(-EINVAL);
}
+ /* If the process just called exec(3), it is possible that the
+ * cleanup of the kfd_process (following the release of the mm
+ * of the old process image) is still in the cleanup work queue.
+ * Make sure to drain any job before trying to recreate any
+ * resource for this process.
+ */
+ flush_workqueue(kfd_process_wq);
+
/*
* take kfd processes mutex before starting of process creation
* so there won't be a case where two threads of the same process
@@ -819,13 +855,15 @@ struct kfd_process *kfd_create_process(struct task_struct *thread)
mutex_lock(&kfd_processes_mutex);
if (kfd_is_locked()) {
- mutex_unlock(&kfd_processes_mutex);
pr_debug("KFD is locked! Cannot create process");
- return ERR_PTR(-EINVAL);
+ process = ERR_PTR(-EINVAL);
+ goto out;
}
- /* A prior open of /dev/kfd could have already created the process. */
- process = find_process(thread, false);
+ /* A prior open of /dev/kfd could have already created the process.
+ * find_process will increase process kref in this case
+ */
+ process = find_process(thread, true);
if (process) {
pr_debug("Process already found\n");
} else {
@@ -862,11 +900,11 @@ struct kfd_process *kfd_create_process(struct task_struct *thread)
kfd_procfs_add_sysfs_files(process);
kfd_procfs_add_sysfs_counters(process);
+ kfd_debugfs_add_process(process);
+
init_waitqueue_head(&process->wait_irq_drain);
}
out:
- if (!IS_ERR(process))
- kref_get(&process->ref);
mutex_unlock(&kfd_processes_mutex);
mmput(thread->mm);
@@ -1018,17 +1056,15 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
for (i = 0; i < p->n_pdds; i++) {
struct kfd_process_device *pdd = p->pdds[i];
- pr_debug("Releasing pdd (topology id %d) for process (pasid 0x%x)\n",
- pdd->dev->id, p->pasid);
+ kfd_smi_event_process(pdd, false);
+ pr_debug("Releasing pdd (topology id %d, for pid %d)\n",
+ pdd->dev->id, p->lead_thread->pid);
kfd_process_device_destroy_cwsr_dgpu(pdd);
kfd_process_device_destroy_ib_mem(pdd);
- if (pdd->drm_file) {
- amdgpu_amdkfd_gpuvm_release_process_vm(
- pdd->dev->adev, pdd->drm_priv);
+ if (pdd->drm_file)
fput(pdd->drm_file);
- }
if (pdd->qpd.cwsr_kaddr && !pdd->qpd.cwsr_base)
free_pages((unsigned long)pdd->qpd.cwsr_kaddr,
@@ -1038,9 +1074,10 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
kfd_free_process_doorbells(pdd->dev->kfd, pdd);
- if (pdd->dev->kfd->shared_resources.enable_mes)
+ if (pdd->dev->kfd->shared_resources.enable_mes &&
+ pdd->proc_ctx_cpu_ptr)
amdgpu_amdkfd_free_gtt_mem(pdd->dev->adev,
- pdd->proc_ctx_bo);
+ &pdd->proc_ctx_bo);
/*
* before destroying pdd, make sure to report availability
* for auto suspend
@@ -1101,6 +1138,17 @@ static void kfd_process_remove_sysfs(struct kfd_process *p)
p->kobj = NULL;
}
+/*
+ * If any GPU is ongoing reset, wait for reset complete.
+ */
+static void kfd_process_wait_gpu_reset_complete(struct kfd_process *p)
+{
+ int i;
+
+ for (i = 0; i < p->n_pdds; i++)
+ flush_workqueue(p->pdds[i]->dev->adev->reset_domain->wq);
+}
+
/* No process locking is needed in this function, because the process
* is not findable any more. We must assume that no other thread is
* using it any more, otherwise we couldn't safely free the process
@@ -1115,15 +1163,22 @@ static void kfd_process_wq_release(struct work_struct *work)
kfd_process_dequeue_from_all_devices(p);
pqm_uninit(&p->pqm);
+ /*
+ * If GPU in reset, user queues may still running, wait for reset complete.
+ */
+ kfd_process_wait_gpu_reset_complete(p);
+
/* Signal the eviction fence after user mode queues are
* destroyed. This allows any BOs to be freed without
* triggering pointless evictions or waiting for fences.
*/
synchronize_rcu();
ef = rcu_access_pointer(p->ef);
- dma_fence_signal(ef);
+ if (ef)
+ dma_fence_signal(ef);
kfd_process_remove_sysfs(p);
+ kfd_debugfs_remove_process(p);
kfd_process_kunmap_signal_bo(p);
kfd_process_free_outstanding_kfd_bos(p);
@@ -1134,7 +1189,6 @@ static void kfd_process_wq_release(struct work_struct *work)
kfd_event_free_process(p);
- kfd_pasid_free(p->pasid);
mutex_destroy(&p->mutex);
put_task_struct(p->lead_thread);
@@ -1152,10 +1206,8 @@ static void kfd_process_ref_release(struct kref *ref)
static struct mmu_notifier *kfd_process_alloc_notifier(struct mm_struct *mm)
{
- int idx = srcu_read_lock(&kfd_processes_srcu);
- struct kfd_process *p = find_process_by_mm(mm);
-
- srcu_read_unlock(&kfd_processes_srcu, idx);
+ /* This increments p->ref counter if kfd process p exists */
+ struct kfd_process *p = kfd_lookup_process_by_mm(mm);
return p ? &p->mmu_notifier : ERR_PTR(-ESRCH);
}
@@ -1303,7 +1355,8 @@ int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep)
if (IS_ERR_VALUE(qpd->tba_addr)) {
int err = qpd->tba_addr;
- pr_err("Failure to set tba address. error %d.\n", err);
+ dev_err(dev->adev->dev,
+ "Failure to set tba address. error %d.\n", err);
qpd->tba_addr = 0;
qpd->cwsr_kaddr = NULL;
return err;
@@ -1486,12 +1539,6 @@ static struct kfd_process *create_process(const struct task_struct *thread)
atomic_set(&process->debugged_process_count, 0);
sema_init(&process->runtime_enable_sema, 0);
- process->pasid = kfd_pasid_alloc();
- if (process->pasid == 0) {
- err = -ENOSPC;
- goto err_alloc_pasid;
- }
-
err = pqm_init(&process->pqm, process);
if (err != 0)
goto err_process_pqm_init;
@@ -1545,8 +1592,6 @@ err_init_svm_range_list:
err_init_apertures:
pqm_uninit(&process->pqm);
err_process_pqm_init:
- kfd_pasid_free(process->pasid);
-err_alloc_pasid:
kfd_event_free_process(process);
err_event_init:
mutex_destroy(&process->mutex);
@@ -1571,7 +1616,6 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev,
struct kfd_process *p)
{
struct kfd_process_device *pdd = NULL;
- int retval = 0;
if (WARN_ON_ONCE(p->n_pdds >= MAX_GPU_INSTANCE))
return NULL;
@@ -1590,25 +1634,11 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev,
pdd->bound = PDD_UNBOUND;
pdd->already_dequeued = false;
pdd->runtime_inuse = false;
- pdd->vram_usage = 0;
+ atomic64_set(&pdd->vram_usage, 0);
pdd->sdma_past_activity_counter = 0;
pdd->user_gpu_id = dev->id;
atomic64_set(&pdd->evict_duration_counter, 0);
- if (dev->kfd->shared_resources.enable_mes) {
- retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev,
- AMDGPU_MES_PROC_CTX_SIZE,
- &pdd->proc_ctx_bo,
- &pdd->proc_ctx_gpu_addr,
- &pdd->proc_ctx_cpu_ptr,
- false);
- if (retval) {
- pr_err("failed to allocate process context bo\n");
- goto err_free_pdd;
- }
- memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
- }
-
p->pdds[p->n_pdds++] = pdd;
if (kfd_dbg_is_per_vmid_supported(pdd->dev))
pdd->spi_dbg_override = pdd->dev->kfd2kgd->disable_debug_trap(
@@ -1620,10 +1650,6 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev,
idr_init(&pdd->alloc_idr);
return pdd;
-
-err_free_pdd:
- kfree(pdd);
- return NULL;
}
/**
@@ -1666,12 +1692,15 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
ret = amdgpu_amdkfd_gpuvm_acquire_process_vm(dev->adev, avm,
&p->kgd_process_info,
- &ef);
+ p->ef ? NULL : &ef);
if (ret) {
- pr_err("Failed to create process VM object\n");
+ dev_err(dev->adev->dev, "Failed to create process VM object\n");
return ret;
}
- RCU_INIT_POINTER(p->ef, ef);
+
+ if (!p->ef)
+ RCU_INIT_POINTER(p->ef, ef);
+
pdd->drm_priv = drm_file->private_data;
ret = kfd_process_device_reserve_ib_mem(pdd);
@@ -1681,15 +1710,21 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
if (ret)
goto err_init_cwsr;
- ret = amdgpu_amdkfd_gpuvm_set_vm_pasid(dev->adev, avm, p->pasid);
- if (ret)
- goto err_set_pasid;
+ if (unlikely(!avm->pasid)) {
+ dev_warn(pdd->dev->adev->dev, "WARN: vm %p has no pasid associated",
+ avm);
+ ret = -EINVAL;
+ goto err_get_pasid;
+ }
+ pdd->pasid = avm->pasid;
pdd->drm_file = drm_file;
+ kfd_smi_event_process(pdd, true);
+
return 0;
-err_set_pasid:
+err_get_pasid:
kfd_process_device_destroy_cwsr_dgpu(pdd);
err_init_cwsr:
kfd_process_device_destroy_ib_mem(pdd);
@@ -1715,7 +1750,7 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_node *dev,
pdd = kfd_get_process_device_data(dev, p);
if (!pdd) {
- pr_err("Process device data doesn't exist\n");
+ dev_err(dev->adev->dev, "Process device data doesn't exist\n");
return ERR_PTR(-ENOMEM);
}
@@ -1775,25 +1810,50 @@ void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd,
idr_remove(&pdd->alloc_idr, handle);
}
-/* This increments the process->ref counter. */
-struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid)
+static struct kfd_process_device *kfd_lookup_process_device_by_pasid(u32 pasid)
{
- struct kfd_process *p, *ret_p = NULL;
+ struct kfd_process_device *ret_p = NULL;
+ struct kfd_process *p;
unsigned int temp;
-
- int idx = srcu_read_lock(&kfd_processes_srcu);
+ int i;
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
- if (p->pasid == pasid) {
- kref_get(&p->ref);
- ret_p = p;
- break;
+ for (i = 0; i < p->n_pdds; i++) {
+ if (p->pdds[i]->pasid == pasid) {
+ ret_p = p->pdds[i];
+ break;
+ }
}
+ if (ret_p)
+ break;
+ }
+ return ret_p;
+}
+
+/* This increments the process->ref counter. */
+struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid,
+ struct kfd_process_device **pdd)
+{
+ struct kfd_process_device *ret_p;
+
+ int idx = srcu_read_lock(&kfd_processes_srcu);
+
+ ret_p = kfd_lookup_process_device_by_pasid(pasid);
+ if (ret_p) {
+ if (pdd)
+ *pdd = ret_p;
+ kref_get(&ret_p->process->ref);
+
+ srcu_read_unlock(&kfd_processes_srcu, idx);
+ return ret_p->process;
}
srcu_read_unlock(&kfd_processes_srcu, idx);
- return ret_p;
+ if (pdd)
+ *pdd = NULL;
+
+ return NULL;
}
/* This increments the process->ref counter. */
@@ -1825,6 +1885,7 @@ int kfd_process_evict_queues(struct kfd_process *p, uint32_t trigger)
for (i = 0; i < p->n_pdds; i++) {
struct kfd_process_device *pdd = p->pdds[i];
+ struct device *dev = pdd->dev->adev->dev;
kfd_smi_event_queue_eviction(pdd->dev, p->lead_thread->pid,
trigger);
@@ -1836,10 +1897,12 @@ int kfd_process_evict_queues(struct kfd_process *p, uint32_t trigger)
* them been add back since they actually not be saved right now.
*/
if (r && r != -EIO) {
- pr_err("Failed to evict process queues\n");
+ dev_err(dev, "Failed to evict process queues\n");
goto fail;
}
n_evicted++;
+
+ pdd->dev->dqm->is_hws_hang = false;
}
return r;
@@ -1858,7 +1921,8 @@ fail:
if (pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
&pdd->qpd))
- pr_err("Failed to restore queues\n");
+ dev_err(pdd->dev->adev->dev,
+ "Failed to restore queues\n");
n_evicted--;
}
@@ -1874,13 +1938,14 @@ int kfd_process_restore_queues(struct kfd_process *p)
for (i = 0; i < p->n_pdds; i++) {
struct kfd_process_device *pdd = p->pdds[i];
+ struct device *dev = pdd->dev->adev->dev;
kfd_smi_event_queue_restore(pdd->dev, p->lead_thread->pid);
r = pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
&pdd->qpd);
if (r) {
- pr_err("Failed to restore process queues\n");
+ dev_err(dev, "Failed to restore process queues\n");
if (!ret)
ret = r;
}
@@ -1922,6 +1987,8 @@ static int signal_eviction_fence(struct kfd_process *p)
rcu_read_lock();
ef = dma_fence_get_rcu_safe(&p->ef);
rcu_read_unlock();
+ if (!ef)
+ return -EINVAL;
ret = dma_fence_signal(ef);
dma_fence_put(ef);
@@ -1942,22 +2009,21 @@ static void evict_process_worker(struct work_struct *work)
*/
p = container_of(dwork, struct kfd_process, eviction_work);
- pr_debug("Started evicting pasid 0x%x\n", p->pasid);
+ pr_debug("Started evicting process pid %d\n", p->lead_thread->pid);
ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_TRIGGER_TTM);
if (!ret) {
/* If another thread already signaled the eviction fence,
* they are responsible stopping the queues and scheduling
* the restore work.
*/
- if (!signal_eviction_fence(p))
- queue_delayed_work(kfd_restore_wq, &p->restore_work,
- msecs_to_jiffies(PROCESS_RESTORE_TIME_MS));
- else
+ if (signal_eviction_fence(p) ||
+ mod_delayed_work(kfd_restore_wq, &p->restore_work,
+ msecs_to_jiffies(PROCESS_RESTORE_TIME_MS)))
kfd_process_restore_queues(p);
- pr_debug("Finished evicting pasid 0x%x\n", p->pasid);
+ pr_debug("Finished evicting process pid %d\n", p->lead_thread->pid);
} else
- pr_err("Failed to evict queues of pasid 0x%x\n", p->pasid);
+ pr_err("Failed to evict queues of process pid %d\n", p->lead_thread->pid);
}
static int restore_process_helper(struct kfd_process *p)
@@ -1974,9 +2040,11 @@ static int restore_process_helper(struct kfd_process *p)
ret = kfd_process_restore_queues(p);
if (!ret)
- pr_debug("Finished restoring pasid 0x%x\n", p->pasid);
+ pr_debug("Finished restoring process pid %d\n",
+ p->lead_thread->pid);
else
- pr_err("Failed to restore queues of pasid 0x%x\n", p->pasid);
+ pr_err("Failed to restore queues of process pid %d\n",
+ p->lead_thread->pid);
return ret;
}
@@ -1993,7 +2061,7 @@ static void restore_process_worker(struct work_struct *work)
* lifetime of this thread, kfd_process p will be valid
*/
p = container_of(dwork, struct kfd_process, restore_work);
- pr_debug("Started restoring pasid 0x%x\n", p->pasid);
+ pr_debug("Started restoring process pasid %d\n", (int)p->lead_thread->pid);
/* Setting last_restore_timestamp before successful restoration.
* Otherwise this would have to be set by KGD (restore_process_bos)
@@ -2009,11 +2077,11 @@ static void restore_process_worker(struct work_struct *work)
ret = restore_process_helper(p);
if (ret) {
- pr_debug("Failed to restore BOs of pasid 0x%x, retry after %d ms\n",
- p->pasid, PROCESS_BACK_OFF_TIME_MS);
- ret = queue_delayed_work(kfd_restore_wq, &p->restore_work,
- msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS));
- WARN(!ret, "reschedule restore work failed\n");
+ pr_debug("Failed to restore BOs of process pid %d, retry after %d ms\n",
+ p->lead_thread->pid, PROCESS_BACK_OFF_TIME_MS);
+ if (mod_delayed_work(kfd_restore_wq, &p->restore_work,
+ msecs_to_jiffies(PROCESS_RESTORE_TIME_MS)))
+ kfd_process_restore_queues(p);
}
}
@@ -2026,7 +2094,7 @@ void kfd_suspend_all_processes(void)
WARN(debug_evictions, "Evicting all processes");
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
if (kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_TRIGGER_SUSPEND))
- pr_err("Failed to suspend process 0x%x\n", p->pasid);
+ pr_err("Failed to suspend process pid %d\n", p->lead_thread->pid);
signal_eviction_fence(p);
}
srcu_read_unlock(&kfd_processes_srcu, idx);
@@ -2040,8 +2108,8 @@ int kfd_resume_all_processes(void)
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
if (restore_process_helper(p)) {
- pr_err("Restore process %d failed during resume\n",
- p->pasid);
+ pr_err("Restore process pid %d failed during resume\n",
+ p->lead_thread->pid);
ret = -EFAULT;
}
}
@@ -2056,7 +2124,7 @@ int kfd_reserved_mem_mmap(struct kfd_node *dev, struct kfd_process *process,
struct qcm_process_device *qpd;
if ((vma->vm_end - vma->vm_start) != KFD_CWSR_TBA_TMA_SIZE) {
- pr_err("Incorrect CWSR mapping size.\n");
+ dev_err(dev->adev->dev, "Incorrect CWSR mapping size.\n");
return -EINVAL;
}
@@ -2068,7 +2136,8 @@ int kfd_reserved_mem_mmap(struct kfd_node *dev, struct kfd_process *process,
qpd->cwsr_kaddr = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
get_order(KFD_CWSR_TBA_TMA_SIZE));
if (!qpd->cwsr_kaddr) {
- pr_err("Error allocating per process CWSR buffer.\n");
+ dev_err(dev->adev->dev,
+ "Error allocating per process CWSR buffer.\n");
return -ENOMEM;
}
@@ -2095,12 +2164,14 @@ int kfd_process_drain_interrupts(struct kfd_process_device *pdd)
memset(irq_drain_fence, 0, sizeof(irq_drain_fence));
irq_drain_fence[0] = (KFD_IRQ_FENCE_SOURCEID << 8) |
KFD_IRQ_FENCE_CLIENTID;
- irq_drain_fence[3] = pdd->process->pasid;
+ irq_drain_fence[3] = pdd->pasid;
/*
- * For GFX 9.4.3, send the NodeId also in IH cookie DW[3]
+ * For GFX 9.4.3/9.5.0, send the NodeId also in IH cookie DW[3]
*/
- if (KFD_GC_VERSION(pdd->dev->kfd) == IP_VERSION(9, 4, 3)) {
+ if (KFD_GC_VERSION(pdd->dev->kfd) == IP_VERSION(9, 4, 3) ||
+ KFD_GC_VERSION(pdd->dev->kfd) == IP_VERSION(9, 4, 4) ||
+ KFD_GC_VERSION(pdd->dev->kfd) == IP_VERSION(9, 5, 0)) {
node_id = ffs(pdd->dev->interrupt_bitmap) - 1;
irq_drain_fence[3] |= node_id << 16;
}
@@ -2124,7 +2195,7 @@ void kfd_process_close_interrupt_drain(unsigned int pasid)
{
struct kfd_process *p;
- p = kfd_lookup_process_by_pasid(pasid);
+ p = kfd_lookup_process_by_pasid(pasid, NULL);
if (!p)
return;
@@ -2245,8 +2316,8 @@ int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data)
int idx = srcu_read_lock(&kfd_processes_srcu);
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
- seq_printf(m, "Process %d PASID 0x%x:\n",
- p->lead_thread->tgid, p->pasid);
+ seq_printf(m, "Process %d PASID %d:\n",
+ p->lead_thread->tgid, p->lead_thread->pid);
mutex_lock(&p->mutex);
r = pqm_debugfs_mqds(m, &p->pqm);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 4858112f9a53..c643e0ccec52 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -28,6 +28,7 @@
#include "kfd_priv.h"
#include "kfd_kernel_queue.h"
#include "amdgpu_amdkfd.h"
+#include "amdgpu_reset.h"
static inline struct process_queue_node *get_queue_by_qid(
struct process_queue_manager *pqm, unsigned int qid)
@@ -68,8 +69,8 @@ static int find_available_queue_slot(struct process_queue_manager *pqm,
pr_debug("The new slot id %lu\n", found);
if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
- pr_info("Cannot open more queues for process with pasid 0x%x\n",
- pqm->process->pasid);
+ pr_info("Cannot open more queues for process with pid %d\n",
+ pqm->process->lead_thread->pid);
return -ENOMEM;
}
@@ -85,10 +86,17 @@ void kfd_process_dequeue_from_device(struct kfd_process_device *pdd)
if (pdd->already_dequeued)
return;
-
+ /* The MES context flush needs to filter out the case which the
+ * KFD process is created without setting up the MES context and
+ * queue for creating a compute queue.
+ */
dev->dqm->ops.process_termination(dev->dqm, &pdd->qpd);
- if (dev->kfd->shared_resources.enable_mes)
- amdgpu_mes_flush_shader_debugger(dev->adev, pdd->proc_ctx_gpu_addr);
+ if (dev->kfd->shared_resources.enable_mes && !!pdd->proc_ctx_gpu_addr &&
+ down_read_trylock(&dev->adev->reset_domain->sem)) {
+ amdgpu_mes_flush_shader_debugger(dev->adev,
+ pdd->proc_ctx_gpu_addr);
+ up_read(&dev->adev->reset_domain->sem);
+ }
pdd->already_dequeued = true;
}
@@ -126,7 +134,10 @@ int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
if (!gws && pdd->qpd.num_gws == 0)
return -EINVAL;
- if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3) && !dev->kfd->shared_resources.enable_mes) {
+ if ((KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3) &&
+ KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 4) &&
+ KFD_GC_VERSION(dev) != IP_VERSION(9, 5, 0)) &&
+ !dev->kfd->shared_resources.enable_mes) {
if (gws)
ret = amdgpu_amdkfd_add_gws_to_process(pdd->process->kgd_process_info,
gws, &mem);
@@ -189,6 +200,8 @@ static void pqm_clean_queue_resource(struct process_queue_manager *pqm,
if (pqn->q->gws) {
if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) &&
+ KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 4) &&
+ KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 5, 0) &&
!dev->kfd->shared_resources.enable_mes)
amdgpu_amdkfd_remove_gws_from_process(
pqm->process->kgd_process_info, pqn->q->gws);
@@ -196,9 +209,8 @@ static void pqm_clean_queue_resource(struct process_queue_manager *pqm,
}
if (dev->kfd->shared_resources.enable_mes) {
- amdgpu_amdkfd_free_gtt_mem(dev->adev, pqn->q->gang_ctx_bo);
- if (pqn->q->wptr_bo)
- amdgpu_amdkfd_free_gtt_mem(dev->adev, pqn->q->wptr_bo);
+ amdgpu_amdkfd_free_gtt_mem(dev->adev, &pqn->q->gang_ctx_bo);
+ amdgpu_amdkfd_free_gtt_mem(dev->adev, (void **)&pqn->q->wptr_bo_gart);
}
}
@@ -207,8 +219,17 @@ void pqm_uninit(struct process_queue_manager *pqm)
struct process_queue_node *pqn, *next;
list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) {
- if (pqn->q)
+ if (pqn->q) {
+ struct kfd_process_device *pdd = kfd_get_process_device_data(pqn->q->device,
+ pqm->process);
+ if (pdd) {
+ kfd_queue_unref_bo_vas(pdd, &pqn->q->properties);
+ kfd_queue_release_buffers(pdd, &pqn->q->properties);
+ } else {
+ WARN_ON(!pdd);
+ }
pqm_clean_queue_resource(pqm, pqn);
+ }
kfd_procfs_del_queue(pqn->q);
uninit_queue(pqn->q);
@@ -223,7 +244,6 @@ void pqm_uninit(struct process_queue_manager *pqm)
static int init_user_queue(struct process_queue_manager *pqm,
struct kfd_node *dev, struct queue **q,
struct queue_properties *q_properties,
- struct file *f, struct amdgpu_bo *wptr_bo,
unsigned int qid)
{
int retval;
@@ -255,12 +275,29 @@ static int init_user_queue(struct process_queue_manager *pqm,
goto cleanup;
}
memset((*q)->gang_ctx_cpu_ptr, 0, AMDGPU_MES_GANG_CTX_SIZE);
- (*q)->wptr_bo = wptr_bo;
+
+ /* Starting with GFX11, wptr BOs must be mapped to GART for MES to determine work
+ * on unmapped queues for usermode queue oversubscription (no aggregated doorbell)
+ */
+ if (dev->adev != amdgpu_ttm_adev(q_properties->wptr_bo->tbo.bdev)) {
+ pr_err("Queue memory allocated to wrong device\n");
+ retval = -EINVAL;
+ goto free_gang_ctx_bo;
+ }
+
+ retval = amdgpu_amdkfd_map_gtt_bo_to_gart(q_properties->wptr_bo,
+ &(*q)->wptr_bo_gart);
+ if (retval) {
+ pr_err("Failed to map wptr bo to GART\n");
+ goto free_gang_ctx_bo;
+ }
}
pr_debug("PQM After init queue");
return 0;
+free_gang_ctx_bo:
+ amdgpu_amdkfd_free_gtt_mem(dev->adev, &(*q)->gang_ctx_bo);
cleanup:
uninit_queue(*q);
*q = NULL;
@@ -269,10 +306,8 @@ cleanup:
int pqm_create_queue(struct process_queue_manager *pqm,
struct kfd_node *dev,
- struct file *f,
struct queue_properties *properties,
unsigned int *qid,
- struct amdgpu_bo *wptr_bo,
const struct kfd_criu_queue_priv_data *q_data,
const void *restore_mqd,
const void *restore_ctl_stack,
@@ -287,10 +322,12 @@ int pqm_create_queue(struct process_queue_manager *pqm,
unsigned int max_queues = 127; /* HWS limit */
/*
- * On GFX 9.4.3, increase the number of queues that
- * can be created to 255. No HWS limit on GFX 9.4.3.
+ * On GFX 9.4.3/9.5.0, increase the number of queues that
+ * can be created to 255. No HWS limit on GFX 9.4.3/9.5.0.
*/
- if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3))
+ if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) ||
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4) ||
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0))
max_queues = 255;
q = NULL;
@@ -323,10 +360,26 @@ int pqm_create_queue(struct process_queue_manager *pqm,
if (retval != 0)
return retval;
+ /* Register process if this is the first queue */
if (list_empty(&pdd->qpd.queues_list) &&
list_empty(&pdd->qpd.priv_queue_list))
dev->dqm->ops.register_process(dev->dqm, &pdd->qpd);
+ /* Allocate proc_ctx_bo only if MES is enabled and this is the first queue */
+ if (!pdd->proc_ctx_cpu_ptr && dev->kfd->shared_resources.enable_mes) {
+ retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev,
+ AMDGPU_MES_PROC_CTX_SIZE,
+ &pdd->proc_ctx_bo,
+ &pdd->proc_ctx_gpu_addr,
+ &pdd->proc_ctx_cpu_ptr,
+ false);
+ if (retval) {
+ dev_err(dev->adev->dev, "failed to allocate process context bo\n");
+ return retval;
+ }
+ memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
+ }
+
pqn = kzalloc(sizeof(*pqn), GFP_KERNEL);
if (!pqn) {
retval = -ENOMEM;
@@ -336,13 +389,14 @@ int pqm_create_queue(struct process_queue_manager *pqm,
switch (type) {
case KFD_QUEUE_TYPE_SDMA:
case KFD_QUEUE_TYPE_SDMA_XGMI:
+ case KFD_QUEUE_TYPE_SDMA_BY_ENG_ID:
/* SDMA queues are always allocated statically no matter
* which scheduler mode is used. We also do not need to
* check whether a SDMA queue can be allocated here, because
* allocate_sdma_queue() in create_queue() has the
* corresponding check logic.
*/
- retval = init_user_queue(pqm, dev, &q, properties, f, wptr_bo, *qid);
+ retval = init_user_queue(pqm, dev, &q, properties, *qid);
if (retval != 0)
goto err_create_queue;
pqn->q = q;
@@ -363,7 +417,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
goto err_create_queue;
}
- retval = init_user_queue(pqm, dev, &q, properties, f, wptr_bo, *qid);
+ retval = init_user_queue(pqm, dev, &q, properties, *qid);
if (retval != 0)
goto err_create_queue;
pqn->q = q;
@@ -394,8 +448,15 @@ int pqm_create_queue(struct process_queue_manager *pqm,
}
if (retval != 0) {
- pr_err("Pasid 0x%x DQM create queue type %d failed. ret %d\n",
- pqm->process->pasid, type, retval);
+ if ((type == KFD_QUEUE_TYPE_SDMA ||
+ type == KFD_QUEUE_TYPE_SDMA_XGMI ||
+ type == KFD_QUEUE_TYPE_SDMA_BY_ENG_ID) &&
+ retval == -ENOMEM)
+ pr_warn("process pid %d DQM create queue type %d failed. ret %d\n",
+ pqm->process->lead_thread->pid, type, retval);
+ else
+ pr_err("process pid %d DQM create queue type %d failed. ret %d\n",
+ pqm->process->lead_thread->pid, type, retval);
goto err_create_queue;
}
@@ -430,7 +491,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
err_create_queue:
uninit_queue(q);
if (kq)
- kernel_queue_uninit(kq, false);
+ kernel_queue_uninit(kq);
kfree(pqn);
err_allocate_pqn:
/* check if queues list is empty unregister process from device */
@@ -477,21 +538,25 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
/* destroy kernel queue (DIQ) */
dqm = pqn->kq->dev->dqm;
dqm->ops.destroy_kernel_queue(dqm, pqn->kq, &pdd->qpd);
- kernel_queue_uninit(pqn->kq, false);
+ kernel_queue_uninit(pqn->kq);
}
if (pqn->q) {
- kfd_procfs_del_queue(pqn->q);
+ retval = kfd_queue_unref_bo_vas(pdd, &pqn->q->properties);
+ if (retval)
+ goto err_destroy_queue;
+
dqm = pqn->q->device->dqm;
retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q);
if (retval) {
pr_err("Pasid 0x%x destroy queue %d failed, ret %d\n",
- pqm->process->pasid,
+ pdd->pasid,
pqn->q->properties.queue_id, retval);
- if (retval != -ETIME)
+ if (retval != -ETIME && retval != -EIO)
goto err_destroy_queue;
}
-
+ kfd_procfs_del_queue(pqn->q);
+ kfd_queue_release_buffers(pdd, &pqn->q->properties);
pqm_clean_queue_resource(pqm, pqn);
uninit_queue(pqn->q);
}
@@ -515,11 +580,42 @@ int pqm_update_queue_properties(struct process_queue_manager *pqm,
struct process_queue_node *pqn;
pqn = get_queue_by_qid(pqm, qid);
- if (!pqn) {
+ if (!pqn || !pqn->q) {
pr_debug("No queue %d exists for update operation\n", qid);
return -EFAULT;
}
+ /*
+ * Update with NULL ring address is used to disable the queue
+ */
+ if (p->queue_address && p->queue_size) {
+ struct kfd_process_device *pdd;
+ struct amdgpu_vm *vm;
+ struct queue *q = pqn->q;
+ int err;
+
+ pdd = kfd_get_process_device_data(q->device, q->process);
+ if (!pdd)
+ return -ENODEV;
+ vm = drm_priv_to_vm(pdd->drm_priv);
+ err = amdgpu_bo_reserve(vm->root.bo, false);
+ if (err)
+ return err;
+
+ if (kfd_queue_buffer_get(vm, (void *)p->queue_address, &p->ring_bo,
+ p->queue_size)) {
+ pr_debug("ring buf 0x%llx size 0x%llx not mapped on GPU\n",
+ p->queue_address, p->queue_size);
+ return -EFAULT;
+ }
+
+ kfd_queue_unref_bo_va(vm, &pqn->q->properties.ring_bo);
+ kfd_queue_buffer_put(&pqn->q->properties.ring_bo);
+ amdgpu_bo_unreserve(vm->root.bo);
+
+ pqn->q->properties.ring_bo = p->ring_bo;
+ }
+
pqn->q->properties.queue_address = p->queue_address;
pqn->q->properties.queue_size = p->queue_size;
pqn->q->properties.queue_percent = p->queue_percent;
@@ -576,19 +672,6 @@ int pqm_update_mqd(struct process_queue_manager *pqm,
return 0;
}
-struct kernel_queue *pqm_get_kernel_queue(
- struct process_queue_manager *pqm,
- unsigned int qid)
-{
- struct process_queue_node *pqn;
-
- pqn = get_queue_by_qid(pqm, qid);
- if (pqn && pqn->kq)
- return pqn->kq;
-
- return NULL;
-}
-
struct queue *pqm_get_user_queue(struct process_queue_manager *pqm,
unsigned int qid)
{
@@ -962,8 +1045,7 @@ int kfd_criu_restore_queue(struct kfd_process *p,
print_queue_properties(&qp);
- ret = pqm_create_queue(&p->pqm, pdd->dev, NULL, &qp, &queue_id, NULL, q_data, mqd, ctl_stack,
- NULL);
+ ret = pqm_create_queue(&p->pqm, pdd->dev, &qp, &queue_id, q_data, mqd, ctl_stack, NULL);
if (ret) {
pr_err("Failed to create new queue err:%d\n", ret);
goto exit;
@@ -979,6 +1061,7 @@ exit:
pr_debug("Queue id %d was restored successfully\n", queue_id);
kfree(q_data);
+ kfree(q_extra_data);
return ret;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
index 0f6992b1895c..a65c67cf56ff 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
@@ -24,6 +24,8 @@
#include <linux/slab.h>
#include "kfd_priv.h"
+#include "kfd_topology.h"
+#include "kfd_svm.h"
void print_queue_properties(struct queue_properties *q)
{
@@ -82,3 +84,386 @@ void uninit_queue(struct queue *q)
{
kfree(q);
}
+
+#if IS_ENABLED(CONFIG_HSA_AMD_SVM)
+
+static int kfd_queue_buffer_svm_get(struct kfd_process_device *pdd, u64 addr, u64 size)
+{
+ struct kfd_process *p = pdd->process;
+ struct list_head update_list;
+ struct svm_range *prange;
+ int ret = -EINVAL;
+
+ INIT_LIST_HEAD(&update_list);
+ addr >>= PAGE_SHIFT;
+ size >>= PAGE_SHIFT;
+
+ mutex_lock(&p->svms.lock);
+
+ /*
+ * range may split to multiple svm pranges aligned to granularity boundaery.
+ */
+ while (size) {
+ uint32_t gpuid, gpuidx;
+ int r;
+
+ prange = svm_range_from_addr(&p->svms, addr, NULL);
+ if (!prange)
+ break;
+
+ if (!prange->mapped_to_gpu)
+ break;
+
+ r = kfd_process_gpuid_from_node(p, pdd->dev, &gpuid, &gpuidx);
+ if (r < 0)
+ break;
+ if (!test_bit(gpuidx, prange->bitmap_access) &&
+ !test_bit(gpuidx, prange->bitmap_aip))
+ break;
+
+ if (!(prange->flags & KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED))
+ break;
+
+ list_add(&prange->update_list, &update_list);
+
+ if (prange->last - prange->start + 1 >= size) {
+ size = 0;
+ break;
+ }
+
+ size -= prange->last - prange->start + 1;
+ addr += prange->last - prange->start + 1;
+ }
+ if (size) {
+ pr_debug("[0x%llx 0x%llx] not registered\n", addr, addr + size - 1);
+ goto out_unlock;
+ }
+
+ list_for_each_entry(prange, &update_list, update_list)
+ atomic_inc(&prange->queue_refcount);
+ ret = 0;
+
+out_unlock:
+ mutex_unlock(&p->svms.lock);
+ return ret;
+}
+
+static void kfd_queue_buffer_svm_put(struct kfd_process_device *pdd, u64 addr, u64 size)
+{
+ struct kfd_process *p = pdd->process;
+ struct svm_range *prange, *pchild;
+ struct interval_tree_node *node;
+ unsigned long last;
+
+ addr >>= PAGE_SHIFT;
+ last = addr + (size >> PAGE_SHIFT) - 1;
+
+ mutex_lock(&p->svms.lock);
+
+ node = interval_tree_iter_first(&p->svms.objects, addr, last);
+ while (node) {
+ struct interval_tree_node *next_node;
+ unsigned long next_start;
+
+ prange = container_of(node, struct svm_range, it_node);
+ next_node = interval_tree_iter_next(node, addr, last);
+ next_start = min(node->last, last) + 1;
+
+ if (atomic_add_unless(&prange->queue_refcount, -1, 0)) {
+ list_for_each_entry(pchild, &prange->child_list, child_list)
+ atomic_add_unless(&pchild->queue_refcount, -1, 0);
+ }
+
+ node = next_node;
+ addr = next_start;
+ }
+
+ mutex_unlock(&p->svms.lock);
+}
+#else
+
+static int kfd_queue_buffer_svm_get(struct kfd_process_device *pdd, u64 addr, u64 size)
+{
+ return -EINVAL;
+}
+
+static void kfd_queue_buffer_svm_put(struct kfd_process_device *pdd, u64 addr, u64 size)
+{
+}
+
+#endif
+
+int kfd_queue_buffer_get(struct amdgpu_vm *vm, void __user *addr, struct amdgpu_bo **pbo,
+ u64 expected_size)
+{
+ struct amdgpu_bo_va_mapping *mapping;
+ u64 user_addr;
+ u64 size;
+
+ user_addr = (u64)addr >> AMDGPU_GPU_PAGE_SHIFT;
+ size = expected_size >> AMDGPU_GPU_PAGE_SHIFT;
+
+ mapping = amdgpu_vm_bo_lookup_mapping(vm, user_addr);
+ if (!mapping)
+ goto out_err;
+
+ if (user_addr != mapping->start ||
+ (size != 0 && user_addr + size - 1 != mapping->last)) {
+ pr_debug("expected size 0x%llx not equal to mapping addr 0x%llx size 0x%llx\n",
+ expected_size, mapping->start << AMDGPU_GPU_PAGE_SHIFT,
+ (mapping->last - mapping->start + 1) << AMDGPU_GPU_PAGE_SHIFT);
+ goto out_err;
+ }
+
+ *pbo = amdgpu_bo_ref(mapping->bo_va->base.bo);
+ mapping->bo_va->queue_refcount++;
+ return 0;
+
+out_err:
+ *pbo = NULL;
+ return -EINVAL;
+}
+
+/* FIXME: remove this function, just call amdgpu_bo_unref directly */
+void kfd_queue_buffer_put(struct amdgpu_bo **bo)
+{
+ amdgpu_bo_unref(bo);
+}
+
+int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_properties *properties)
+{
+ struct kfd_topology_device *topo_dev;
+ u64 expected_queue_size;
+ struct amdgpu_vm *vm;
+ u32 total_cwsr_size;
+ int err;
+
+ topo_dev = kfd_topology_device_by_id(pdd->dev->id);
+ if (!topo_dev)
+ return -EINVAL;
+
+ /* AQL queues on GFX7 and GFX8 appear twice their actual size */
+ if (properties->type == KFD_QUEUE_TYPE_COMPUTE &&
+ properties->format == KFD_QUEUE_FORMAT_AQL &&
+ topo_dev->node_props.gfx_target_version >= 70000 &&
+ topo_dev->node_props.gfx_target_version < 90000)
+ expected_queue_size = properties->queue_size / 2;
+ else
+ expected_queue_size = properties->queue_size;
+
+ vm = drm_priv_to_vm(pdd->drm_priv);
+ err = amdgpu_bo_reserve(vm->root.bo, false);
+ if (err)
+ return err;
+
+ err = kfd_queue_buffer_get(vm, properties->write_ptr, &properties->wptr_bo, PAGE_SIZE);
+ if (err)
+ goto out_err_unreserve;
+
+ err = kfd_queue_buffer_get(vm, properties->read_ptr, &properties->rptr_bo, PAGE_SIZE);
+ if (err)
+ goto out_err_unreserve;
+
+ err = kfd_queue_buffer_get(vm, (void *)properties->queue_address,
+ &properties->ring_bo, expected_queue_size);
+ if (err)
+ goto out_err_unreserve;
+
+ /* only compute queue requires EOP buffer and CWSR area */
+ if (properties->type != KFD_QUEUE_TYPE_COMPUTE)
+ goto out_unreserve;
+
+ /* EOP buffer is not required for all ASICs */
+ if (properties->eop_ring_buffer_address) {
+ if (properties->eop_ring_buffer_size != topo_dev->node_props.eop_buffer_size) {
+ pr_debug("queue eop bo size 0x%x not equal to node eop buf size 0x%x\n",
+ properties->eop_ring_buffer_size,
+ topo_dev->node_props.eop_buffer_size);
+ err = -EINVAL;
+ goto out_err_unreserve;
+ }
+ err = kfd_queue_buffer_get(vm, (void *)properties->eop_ring_buffer_address,
+ &properties->eop_buf_bo,
+ properties->eop_ring_buffer_size);
+ if (err)
+ goto out_err_unreserve;
+ }
+
+ if (properties->ctl_stack_size != topo_dev->node_props.ctl_stack_size) {
+ pr_debug("queue ctl stack size 0x%x not equal to node ctl stack size 0x%x\n",
+ properties->ctl_stack_size,
+ topo_dev->node_props.ctl_stack_size);
+ err = -EINVAL;
+ goto out_err_unreserve;
+ }
+
+ if (properties->ctx_save_restore_area_size != topo_dev->node_props.cwsr_size) {
+ pr_debug("queue cwsr size 0x%x not equal to node cwsr size 0x%x\n",
+ properties->ctx_save_restore_area_size,
+ topo_dev->node_props.cwsr_size);
+ err = -EINVAL;
+ goto out_err_unreserve;
+ }
+
+ total_cwsr_size = (topo_dev->node_props.cwsr_size + topo_dev->node_props.debug_memory_size)
+ * NUM_XCC(pdd->dev->xcc_mask);
+ total_cwsr_size = ALIGN(total_cwsr_size, PAGE_SIZE);
+
+ err = kfd_queue_buffer_get(vm, (void *)properties->ctx_save_restore_area_address,
+ &properties->cwsr_bo, total_cwsr_size);
+ if (!err)
+ goto out_unreserve;
+
+ amdgpu_bo_unreserve(vm->root.bo);
+
+ err = kfd_queue_buffer_svm_get(pdd, properties->ctx_save_restore_area_address,
+ total_cwsr_size);
+ if (err)
+ goto out_err_release;
+
+ return 0;
+
+out_unreserve:
+ amdgpu_bo_unreserve(vm->root.bo);
+ return 0;
+
+out_err_unreserve:
+ amdgpu_bo_unreserve(vm->root.bo);
+out_err_release:
+ /* FIXME: make a _locked version of this that can be called before
+ * dropping the VM reservation.
+ */
+ kfd_queue_unref_bo_vas(pdd, properties);
+ kfd_queue_release_buffers(pdd, properties);
+ return err;
+}
+
+int kfd_queue_release_buffers(struct kfd_process_device *pdd, struct queue_properties *properties)
+{
+ struct kfd_topology_device *topo_dev;
+ u32 total_cwsr_size;
+
+ kfd_queue_buffer_put(&properties->wptr_bo);
+ kfd_queue_buffer_put(&properties->rptr_bo);
+ kfd_queue_buffer_put(&properties->ring_bo);
+ kfd_queue_buffer_put(&properties->eop_buf_bo);
+ kfd_queue_buffer_put(&properties->cwsr_bo);
+
+ topo_dev = kfd_topology_device_by_id(pdd->dev->id);
+ if (!topo_dev)
+ return -EINVAL;
+ total_cwsr_size = (topo_dev->node_props.cwsr_size + topo_dev->node_props.debug_memory_size)
+ * NUM_XCC(pdd->dev->xcc_mask);
+ total_cwsr_size = ALIGN(total_cwsr_size, PAGE_SIZE);
+
+ kfd_queue_buffer_svm_put(pdd, properties->ctx_save_restore_area_address, total_cwsr_size);
+ return 0;
+}
+
+void kfd_queue_unref_bo_va(struct amdgpu_vm *vm, struct amdgpu_bo **bo)
+{
+ if (*bo) {
+ struct amdgpu_bo_va *bo_va;
+
+ bo_va = amdgpu_vm_bo_find(vm, *bo);
+ if (bo_va && bo_va->queue_refcount)
+ bo_va->queue_refcount--;
+ }
+}
+
+int kfd_queue_unref_bo_vas(struct kfd_process_device *pdd,
+ struct queue_properties *properties)
+{
+ struct amdgpu_vm *vm;
+ int err;
+
+ vm = drm_priv_to_vm(pdd->drm_priv);
+ err = amdgpu_bo_reserve(vm->root.bo, false);
+ if (err)
+ return err;
+
+ kfd_queue_unref_bo_va(vm, &properties->wptr_bo);
+ kfd_queue_unref_bo_va(vm, &properties->rptr_bo);
+ kfd_queue_unref_bo_va(vm, &properties->ring_bo);
+ kfd_queue_unref_bo_va(vm, &properties->eop_buf_bo);
+ kfd_queue_unref_bo_va(vm, &properties->cwsr_bo);
+
+ amdgpu_bo_unreserve(vm->root.bo);
+ return 0;
+}
+
+#define SGPR_SIZE_PER_CU 0x4000
+#define LDS_SIZE_PER_CU 0x10000
+#define HWREG_SIZE_PER_CU 0x1000
+#define DEBUGGER_BYTES_ALIGN 64
+#define DEBUGGER_BYTES_PER_WAVE 32
+
+static u32 kfd_get_vgpr_size_per_cu(u32 gfxv)
+{
+ u32 vgpr_size = 0x40000;
+
+ if (gfxv == 90402 || /* GFX_VERSION_AQUA_VANJARAM */
+ gfxv == 90010 || /* GFX_VERSION_ALDEBARAN */
+ gfxv == 90008 || /* GFX_VERSION_ARCTURUS */
+ gfxv == 90500)
+ vgpr_size = 0x80000;
+ else if (gfxv == 110000 || /* GFX_VERSION_PLUM_BONITO */
+ gfxv == 110001 || /* GFX_VERSION_WHEAT_NAS */
+ gfxv == 120000 || /* GFX_VERSION_GFX1200 */
+ gfxv == 120001) /* GFX_VERSION_GFX1201 */
+ vgpr_size = 0x60000;
+
+ return vgpr_size;
+}
+
+#define WG_CONTEXT_DATA_SIZE_PER_CU(gfxv, props) \
+ (kfd_get_vgpr_size_per_cu(gfxv) + SGPR_SIZE_PER_CU +\
+ (((gfxv) == 90500) ? (props->lds_size_in_kb << 10) : LDS_SIZE_PER_CU) +\
+ HWREG_SIZE_PER_CU)
+
+#define CNTL_STACK_BYTES_PER_WAVE(gfxv) \
+ ((gfxv) >= 100100 ? 12 : 8) /* GFX_VERSION_NAVI10*/
+
+#define SIZEOF_HSA_USER_CONTEXT_SAVE_AREA_HEADER 40
+
+void kfd_queue_ctx_save_restore_size(struct kfd_topology_device *dev)
+{
+ struct kfd_node_properties *props = &dev->node_props;
+ u32 gfxv = props->gfx_target_version;
+ u32 ctl_stack_size;
+ u32 wg_data_size;
+ u32 wave_num;
+ u32 cu_num;
+
+ if (gfxv < 80001) /* GFX_VERSION_CARRIZO */
+ return;
+
+ cu_num = props->simd_count / props->simd_per_cu / NUM_XCC(dev->gpu->xcc_mask);
+ wave_num = (gfxv < 100100) ? /* GFX_VERSION_NAVI10 */
+ min(cu_num * 40, props->array_count / props->simd_arrays_per_engine * 512)
+ : cu_num * 32;
+
+ wg_data_size = ALIGN(cu_num * WG_CONTEXT_DATA_SIZE_PER_CU(gfxv, props), PAGE_SIZE);
+ ctl_stack_size = wave_num * CNTL_STACK_BYTES_PER_WAVE(gfxv) + 8;
+ ctl_stack_size = ALIGN(SIZEOF_HSA_USER_CONTEXT_SAVE_AREA_HEADER + ctl_stack_size,
+ PAGE_SIZE);
+
+ if ((gfxv / 10000 * 10000) == 100000) {
+ /* HW design limits control stack size to 0x7000.
+ * This is insufficient for theoretical PM4 cases
+ * but sufficient for AQL, limited by SPI events.
+ */
+ ctl_stack_size = min(ctl_stack_size, 0x7000);
+ }
+
+ props->ctl_stack_size = ctl_stack_size;
+ props->debug_memory_size = ALIGN(wave_num * DEBUGGER_BYTES_PER_WAVE, DEBUGGER_BYTES_ALIGN);
+ props->cwsr_size = ctl_stack_size + wg_data_size;
+
+ if (gfxv == 80002) /* GFX_VERSION_TONGA */
+ props->eop_buffer_size = 0x8000;
+ else if (gfxv == 90402) /* GFX_VERSION_AQUA_VANJARAM */
+ props->eop_buffer_size = 4096;
+ else if (gfxv >= 80000)
+ props->eop_buffer_size = 4096;
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
index 06ac835190f9..83d9384ac815 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
@@ -29,6 +29,7 @@
#include "amdgpu_vm.h"
#include "kfd_priv.h"
#include "kfd_smi_events.h"
+#include "amdgpu_reset.h"
struct kfd_smi_client {
struct list_head list;
@@ -43,7 +44,7 @@ struct kfd_smi_client {
bool suser;
};
-#define MAX_KFIFO_SIZE 1024
+#define KFD_MAX_KFIFO_SIZE 8192
static __poll_t kfd_smi_ev_poll(struct file *, struct poll_table_struct *);
static ssize_t kfd_smi_ev_read(struct file *, char __user *, size_t, loff_t *);
@@ -85,7 +86,7 @@ static ssize_t kfd_smi_ev_read(struct file *filep, char __user *user,
struct kfd_smi_client *client = filep->private_data;
unsigned char *buf;
- size = min_t(size_t, size, MAX_KFIFO_SIZE);
+ size = min_t(size_t, size, KFD_MAX_KFIFO_SIZE);
buf = kmalloc(size, GFP_KERNEL);
if (!buf)
return -ENOMEM;
@@ -162,10 +163,9 @@ static int kfd_smi_ev_release(struct inode *inode, struct file *filep)
static bool kfd_smi_ev_enabled(pid_t pid, struct kfd_smi_client *client,
unsigned int event)
{
- uint64_t all = KFD_SMI_EVENT_MASK_FROM_INDEX(KFD_SMI_EVENT_ALL_PROCESS);
uint64_t events = READ_ONCE(client->events);
- if (pid && client->pid != pid && !(client->suser && (events & all)))
+ if (pid && client->pid != pid && !client->suser)
return false;
return events & KFD_SMI_EVENT_MASK_FROM_INDEX(event);
@@ -215,9 +215,11 @@ static void kfd_smi_event_add(pid_t pid, struct kfd_node *dev,
add_event_to_kfifo(pid, dev, event, fifo_in, len);
}
-void kfd_smi_event_update_gpu_reset(struct kfd_node *dev, bool post_reset)
+void kfd_smi_event_update_gpu_reset(struct kfd_node *dev, bool post_reset,
+ struct amdgpu_reset_context *reset_context)
{
unsigned int event;
+ char reset_cause[64];
if (post_reset) {
event = KFD_SMI_EVENT_GPU_POST_RESET;
@@ -225,15 +227,23 @@ void kfd_smi_event_update_gpu_reset(struct kfd_node *dev, bool post_reset)
event = KFD_SMI_EVENT_GPU_PRE_RESET;
++(dev->reset_seq_num);
}
- kfd_smi_event_add(0, dev, event, "%x\n", dev->reset_seq_num);
+
+ memset(reset_cause, 0, sizeof(reset_cause));
+
+ if (reset_context)
+ amdgpu_reset_get_desc(reset_context, reset_cause,
+ sizeof(reset_cause));
+
+ kfd_smi_event_add(0, dev, event, KFD_EVENT_FMT_UPDATE_GPU_RESET(
+ dev->reset_seq_num, reset_cause));
}
void kfd_smi_event_update_thermal_throttling(struct kfd_node *dev,
uint64_t throttle_bitmask)
{
- kfd_smi_event_add(0, dev, KFD_SMI_EVENT_THERMAL_THROTTLE, "%llx:%llx\n",
+ kfd_smi_event_add(0, dev, KFD_SMI_EVENT_THERMAL_THROTTLE, KFD_EVENT_FMT_THERMAL_THROTTLING(
throttle_bitmask,
- amdgpu_dpm_get_thermal_throttling_counter(dev->adev));
+ amdgpu_dpm_get_thermal_throttling_counter(dev->adev)));
}
void kfd_smi_event_update_vmfault(struct kfd_node *dev, uint16_t pasid)
@@ -244,8 +254,8 @@ void kfd_smi_event_update_vmfault(struct kfd_node *dev, uint16_t pasid)
if (task_info) {
/* Report VM faults from user applications, not retry from kernel */
if (task_info->pid)
- kfd_smi_event_add(0, dev, KFD_SMI_EVENT_VMFAULT, "%x:%s\n",
- task_info->pid, task_info->task_name);
+ kfd_smi_event_add(0, dev, KFD_SMI_EVENT_VMFAULT, KFD_EVENT_FMT_VMFAULT(
+ task_info->pid, task_info->task_name));
amdgpu_vm_put_task_info(task_info);
}
}
@@ -255,16 +265,16 @@ void kfd_smi_event_page_fault_start(struct kfd_node *node, pid_t pid,
ktime_t ts)
{
kfd_smi_event_add(pid, node, KFD_SMI_EVENT_PAGE_FAULT_START,
- "%lld -%d @%lx(%x) %c\n", ktime_to_ns(ts), pid,
- address, node->id, write_fault ? 'W' : 'R');
+ KFD_EVENT_FMT_PAGEFAULT_START(ktime_to_ns(ts), pid,
+ address, node->id, write_fault ? 'W' : 'R'));
}
void kfd_smi_event_page_fault_end(struct kfd_node *node, pid_t pid,
unsigned long address, bool migration)
{
kfd_smi_event_add(pid, node, KFD_SMI_EVENT_PAGE_FAULT_END,
- "%lld -%d @%lx(%x) %c\n", ktime_get_boottime_ns(),
- pid, address, node->id, migration ? 'M' : 'U');
+ KFD_EVENT_FMT_PAGEFAULT_END(ktime_get_boottime_ns(),
+ pid, address, node->id, migration ? 'M' : 'U'));
}
void kfd_smi_event_migration_start(struct kfd_node *node, pid_t pid,
@@ -274,34 +284,35 @@ void kfd_smi_event_migration_start(struct kfd_node *node, pid_t pid,
uint32_t trigger)
{
kfd_smi_event_add(pid, node, KFD_SMI_EVENT_MIGRATE_START,
- "%lld -%d @%lx(%lx) %x->%x %x:%x %d\n",
+ KFD_EVENT_FMT_MIGRATE_START(
ktime_get_boottime_ns(), pid, start, end - start,
- from, to, prefetch_loc, preferred_loc, trigger);
+ from, to, prefetch_loc, preferred_loc, trigger));
}
void kfd_smi_event_migration_end(struct kfd_node *node, pid_t pid,
unsigned long start, unsigned long end,
- uint32_t from, uint32_t to, uint32_t trigger)
+ uint32_t from, uint32_t to, uint32_t trigger,
+ int error_code)
{
kfd_smi_event_add(pid, node, KFD_SMI_EVENT_MIGRATE_END,
- "%lld -%d @%lx(%lx) %x->%x %d\n",
+ KFD_EVENT_FMT_MIGRATE_END(
ktime_get_boottime_ns(), pid, start, end - start,
- from, to, trigger);
+ from, to, trigger, error_code));
}
void kfd_smi_event_queue_eviction(struct kfd_node *node, pid_t pid,
uint32_t trigger)
{
kfd_smi_event_add(pid, node, KFD_SMI_EVENT_QUEUE_EVICTION,
- "%lld -%d %x %d\n", ktime_get_boottime_ns(), pid,
- node->id, trigger);
+ KFD_EVENT_FMT_QUEUE_EVICTION(ktime_get_boottime_ns(), pid,
+ node->id, trigger));
}
void kfd_smi_event_queue_restore(struct kfd_node *node, pid_t pid)
{
kfd_smi_event_add(pid, node, KFD_SMI_EVENT_QUEUE_RESTORE,
- "%lld -%d %x\n", ktime_get_boottime_ns(), pid,
- node->id);
+ KFD_EVENT_FMT_QUEUE_RESTORE(ktime_get_boottime_ns(), pid,
+ node->id, 0));
}
void kfd_smi_event_queue_restore_rescheduled(struct mm_struct *mm)
@@ -318,8 +329,8 @@ void kfd_smi_event_queue_restore_rescheduled(struct mm_struct *mm)
kfd_smi_event_add(p->lead_thread->pid, pdd->dev,
KFD_SMI_EVENT_QUEUE_RESTORE,
- "%lld -%d %x %c\n", ktime_get_boottime_ns(),
- p->lead_thread->pid, pdd->dev->id, 'R');
+ KFD_EVENT_FMT_QUEUE_RESTORE(ktime_get_boottime_ns(),
+ p->lead_thread->pid, pdd->dev->id, 'R'));
}
kfd_unref_process(p);
}
@@ -329,8 +340,29 @@ void kfd_smi_event_unmap_from_gpu(struct kfd_node *node, pid_t pid,
uint32_t trigger)
{
kfd_smi_event_add(pid, node, KFD_SMI_EVENT_UNMAP_FROM_GPU,
- "%lld -%d @%lx(%lx) %x %d\n", ktime_get_boottime_ns(),
- pid, address, last - address + 1, node->id, trigger);
+ KFD_EVENT_FMT_UNMAP_FROM_GPU(ktime_get_boottime_ns(),
+ pid, address, last - address + 1, node->id, trigger));
+}
+
+void kfd_smi_event_process(struct kfd_process_device *pdd, bool start)
+{
+ struct amdgpu_task_info *task_info;
+ struct amdgpu_vm *avm;
+
+ if (!pdd->drm_priv)
+ return;
+
+ avm = drm_priv_to_vm(pdd->drm_priv);
+ task_info = amdgpu_vm_get_task_info_vm(avm);
+
+ if (task_info) {
+ kfd_smi_event_add(0, pdd->dev,
+ start ? KFD_SMI_EVENT_PROCESS_START :
+ KFD_SMI_EVENT_PROCESS_END,
+ KFD_EVENT_FMT_PROCESS(task_info->pid,
+ task_info->task_name));
+ amdgpu_vm_put_task_info(task_info);
+ }
}
int kfd_smi_event_open(struct kfd_node *dev, uint32_t *fd)
@@ -343,7 +375,7 @@ int kfd_smi_event_open(struct kfd_node *dev, uint32_t *fd)
return -ENOMEM;
INIT_LIST_HEAD(&client->list);
- ret = kfifo_alloc(&client->fifo, MAX_KFIFO_SIZE, GFP_KERNEL);
+ ret = kfifo_alloc(&client->fifo, KFD_MAX_KFIFO_SIZE, GFP_KERNEL);
if (ret) {
kfree(client);
return ret;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
index fa95c2dfd587..bb4d72b57387 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
@@ -24,11 +24,14 @@
#ifndef KFD_SMI_EVENTS_H_INCLUDED
#define KFD_SMI_EVENTS_H_INCLUDED
+struct amdgpu_reset_context;
+
int kfd_smi_event_open(struct kfd_node *dev, uint32_t *fd);
void kfd_smi_event_update_vmfault(struct kfd_node *dev, uint16_t pasid);
void kfd_smi_event_update_thermal_throttling(struct kfd_node *dev,
uint64_t throttle_bitmask);
-void kfd_smi_event_update_gpu_reset(struct kfd_node *dev, bool post_reset);
+void kfd_smi_event_update_gpu_reset(struct kfd_node *dev, bool post_reset,
+ struct amdgpu_reset_context *reset_context);
void kfd_smi_event_page_fault_start(struct kfd_node *node, pid_t pid,
unsigned long address, bool write_fault,
ktime_t ts);
@@ -41,7 +44,8 @@ void kfd_smi_event_migration_start(struct kfd_node *node, pid_t pid,
uint32_t trigger);
void kfd_smi_event_migration_end(struct kfd_node *node, pid_t pid,
unsigned long start, unsigned long end,
- uint32_t from, uint32_t to, uint32_t trigger);
+ uint32_t from, uint32_t to, uint32_t trigger,
+ int error_code);
void kfd_smi_event_queue_eviction(struct kfd_node *node, pid_t pid,
uint32_t trigger);
void kfd_smi_event_queue_restore(struct kfd_node *node, pid_t pid);
@@ -49,4 +53,5 @@ void kfd_smi_event_queue_restore_rescheduled(struct mm_struct *mm);
void kfd_smi_event_unmap_from_gpu(struct kfd_node *node, pid_t pid,
unsigned long address, unsigned long last,
uint32_t trigger);
+void kfd_smi_event_process(struct kfd_process_device *pdd, bool start);
#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index f0f7f48af413..a0f22ea6d15a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -309,12 +309,13 @@ static void svm_range_free(struct svm_range *prange, bool do_unmap)
}
static void
-svm_range_set_default_attributes(int32_t *location, int32_t *prefetch_loc,
- uint8_t *granularity, uint32_t *flags)
+svm_range_set_default_attributes(struct svm_range_list *svms, int32_t *location,
+ int32_t *prefetch_loc, uint8_t *granularity,
+ uint32_t *flags)
{
*location = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
*prefetch_loc = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
- *granularity = 9;
+ *granularity = svms->default_granularity;
*flags =
KFD_IOCTL_SVM_FLAG_HOST_ACCESS | KFD_IOCTL_SVM_FLAG_COHERENT;
}
@@ -358,7 +359,7 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start,
bitmap_copy(prange->bitmap_access, svms->bitmap_supported,
MAX_GPU_INSTANCE);
- svm_range_set_default_attributes(&prange->preferred_loc,
+ svm_range_set_default_attributes(svms, &prange->preferred_loc,
&prange->prefetch_loc,
&prange->granularity, &prange->flags);
@@ -404,6 +405,27 @@ static void svm_range_bo_release(struct kref *kref)
spin_lock(&svm_bo->list_lock);
}
spin_unlock(&svm_bo->list_lock);
+
+ if (mmget_not_zero(svm_bo->eviction_fence->mm)) {
+ struct kfd_process_device *pdd;
+ struct kfd_process *p;
+ struct mm_struct *mm;
+
+ mm = svm_bo->eviction_fence->mm;
+ /*
+ * The forked child process takes svm_bo device pages ref, svm_bo could be
+ * released after parent process is gone.
+ */
+ p = kfd_lookup_process_by_mm(mm);
+ if (p) {
+ pdd = kfd_get_process_device_data(svm_bo->node, p);
+ if (pdd)
+ atomic64_sub(amdgpu_bo_size(svm_bo->bo), &pdd->vram_usage);
+ kfd_unref_process(p);
+ }
+ mmput(mm);
+ }
+
if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base))
/* We're not in the eviction worker. Signal the fence. */
dma_fence_signal(&svm_bo->eviction_fence->base);
@@ -531,6 +553,7 @@ int
svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange,
bool clear)
{
+ struct kfd_process_device *pdd;
struct amdgpu_bo_param bp;
struct svm_range_bo *svm_bo;
struct amdgpu_bo_user *ubo;
@@ -540,7 +563,8 @@ svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange,
int r;
p = container_of(prange->svms, struct kfd_process, svms);
- pr_debug("pasid: %x svms 0x%p [0x%lx 0x%lx]\n", p->pasid, prange->svms,
+ pr_debug("process pid: %d svms 0x%p [0x%lx 0x%lx]\n",
+ p->lead_thread->pid, prange->svms,
prange->start, prange->last);
if (svm_range_validate_svm_bo(node, prange))
@@ -622,6 +646,10 @@ svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange,
list_add(&prange->svm_bo_list, &svm_bo->range_list);
spin_unlock(&svm_bo->list_lock);
+ pdd = svm_range_get_pdd_by_node(prange, node);
+ if (pdd)
+ atomic64_add(amdgpu_bo_size(bo), &pdd->vram_usage);
+
return 0;
reserve_bo_failed:
@@ -1051,6 +1079,7 @@ svm_range_split_adjust(struct svm_range *new, struct svm_range *old,
new->mapped_to_gpu = old->mapped_to_gpu;
bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE);
bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE);
+ atomic_set(&new->queue_refcount, atomic_read(&old->queue_refcount));
return 0;
}
@@ -1142,13 +1171,12 @@ svm_range_split_head(struct svm_range *prange, uint64_t new_start,
}
static void
-svm_range_add_child(struct svm_range *prange, struct mm_struct *mm,
- struct svm_range *pchild, enum svm_work_list_ops op)
+svm_range_add_child(struct svm_range *prange, struct svm_range *pchild, enum svm_work_list_ops op)
{
pr_debug("add child 0x%p [0x%lx 0x%lx] to prange 0x%p child list %d\n",
pchild, pchild->start, pchild->last, prange, op);
- pchild->work_item.mm = mm;
+ pchild->work_item.mm = NULL;
pchild->work_item.op = op;
list_add_tail(&pchild->child_list, &prange->child_list);
}
@@ -1167,17 +1195,17 @@ svm_range_get_pte_flags(struct kfd_node *node,
struct kfd_node *bo_node;
uint32_t flags = prange->flags;
uint32_t mapping_flags = 0;
+ uint32_t gc_ip_version = KFD_GC_VERSION(node);
uint64_t pte_flags;
bool snoop = (domain != SVM_RANGE_VRAM_DOMAIN);
bool coherent = flags & (KFD_IOCTL_SVM_FLAG_COHERENT | KFD_IOCTL_SVM_FLAG_EXT_COHERENT);
bool ext_coherent = flags & KFD_IOCTL_SVM_FLAG_EXT_COHERENT;
- bool uncached = false; /*flags & KFD_IOCTL_SVM_FLAG_UNCACHED;*/
unsigned int mtype_local;
if (domain == SVM_RANGE_VRAM_DOMAIN)
bo_node = prange->svm_bo->node;
- switch (amdgpu_ip_version(node->adev, GC_HWIP, 0)) {
+ switch (gc_ip_version) {
case IP_VERSION(9, 4, 1):
if (domain == SVM_RANGE_VRAM_DOMAIN) {
if (bo_node == node) {
@@ -1213,15 +1241,15 @@ svm_range_get_pte_flags(struct kfd_node *node,
}
break;
case IP_VERSION(9, 4, 3):
+ case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
if (ext_coherent)
- mtype_local = node->adev->rev_id ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_UC;
+ mtype_local = AMDGPU_VM_MTYPE_CC;
else
mtype_local = amdgpu_mtype_local == 1 ? AMDGPU_VM_MTYPE_NC :
amdgpu_mtype_local == 2 ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
snoop = true;
- if (uncached) {
- mapping_flags |= AMDGPU_VM_MTYPE_UC;
- } else if (domain == SVM_RANGE_VRAM_DOMAIN) {
+ if (domain == SVM_RANGE_VRAM_DOMAIN) {
/* local HBM region close to partition */
if (bo_node->adev == node->adev &&
(!bo_node->xcp || !node->xcp || bo_node->xcp->mem_id == node->xcp->mem_id))
@@ -1231,9 +1259,13 @@ svm_range_get_pte_flags(struct kfd_node *node,
*/
else if (svm_nodes_in_same_hive(bo_node, node) && !ext_coherent)
mapping_flags |= AMDGPU_VM_MTYPE_NC;
- /* PCIe P2P or extended system scope coherence */
- else
+ /* PCIe P2P on GPUs pre-9.5.0 */
+ else if (gc_ip_version < IP_VERSION(9, 5, 0) &&
+ !svm_nodes_in_same_hive(bo_node, node))
mapping_flags |= AMDGPU_VM_MTYPE_UC;
+ /* Other remote memory */
+ else
+ mapping_flags |= ext_coherent ? AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
/* system memory accessed by the APU */
} else if (node->adev->flags & AMD_IS_APU) {
/* On NUMA systems, locality is determined per-page
@@ -1245,9 +1277,16 @@ svm_range_get_pte_flags(struct kfd_node *node,
mapping_flags |= ext_coherent ? AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
/* system memory accessed by the dGPU */
} else {
- mapping_flags |= AMDGPU_VM_MTYPE_UC;
+ if (gc_ip_version < IP_VERSION(9, 5, 0) || ext_coherent)
+ mapping_flags |= AMDGPU_VM_MTYPE_UC;
+ else
+ mapping_flags |= AMDGPU_VM_MTYPE_NC;
}
break;
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ mapping_flags |= AMDGPU_VM_MTYPE_NC;
+ break;
default:
mapping_flags |= coherent ?
AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
@@ -1263,6 +1302,8 @@ svm_range_get_pte_flags(struct kfd_node *node,
pte_flags = AMDGPU_PTE_VALID;
pte_flags |= (domain == SVM_RANGE_VRAM_DOMAIN) ? 0 : AMDGPU_PTE_SYSTEM;
pte_flags |= snoop ? AMDGPU_PTE_SNOOPED : 0;
+ if (gc_ip_version >= IP_VERSION(12, 0, 0))
+ pte_flags |= AMDGPU_PTE_IS_PTE;
pte_flags |= amdgpu_gem_va_map_flags(node->adev, mapping_flags);
return pte_flags;
@@ -1658,7 +1699,7 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
start = map_start << PAGE_SHIFT;
end = (map_last + 1) << PAGE_SHIFT;
for (addr = start; !r && addr < end; ) {
- struct hmm_range *hmm_range;
+ struct hmm_range *hmm_range = NULL;
unsigned long map_start_vma;
unsigned long map_last_vma;
struct vm_area_struct *vma;
@@ -1678,11 +1719,8 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
readonly, owner, NULL,
&hmm_range);
WRITE_ONCE(p->svms.faulting_task, NULL);
- if (r) {
+ if (r)
pr_debug("failed %d to get svm range pages\n", r);
- if (r == -EBUSY)
- r = -EAGAIN;
- }
} else {
r = -EFAULT;
}
@@ -1696,7 +1734,12 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
}
svm_range_lock(prange);
- if (!r && amdgpu_hmm_range_get_pages_done(hmm_range)) {
+
+ /* Free backing memory of hmm_range if it was initialized
+ * Overrride return value to TRY AGAIN only if prior returns
+ * were successful
+ */
+ if (hmm_range && amdgpu_hmm_range_get_pages_done(hmm_range) && !r) {
pr_debug("hmm update the range, need validate again\n");
r = -EAGAIN;
}
@@ -1980,6 +2023,7 @@ static struct svm_range *svm_range_clone(struct svm_range *old)
new->vram_pages = old->vram_pages;
bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE);
bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE);
+ atomic_set(&new->queue_refcount, atomic_read(&old->queue_refcount));
return new;
}
@@ -2248,16 +2292,10 @@ static void svm_range_drain_retry_fault(struct svm_range_list *svms)
{
struct kfd_process_device *pdd;
struct kfd_process *p;
- int drain;
uint32_t i;
p = container_of(svms, struct kfd_process, svms);
-restart:
- drain = atomic_read(&svms->drain_pagefaults);
- if (!drain)
- return;
-
for_each_set_bit(i, svms->bitmap_supported, p->n_pdds) {
pdd = p->pdds[i];
if (!pdd)
@@ -2277,8 +2315,6 @@ restart:
pr_debug("drain retry fault gpu %d svms 0x%p done\n", i, svms);
}
- if (atomic_cmpxchg(&svms->drain_pagefaults, drain, 0) != drain)
- goto restart;
}
static void svm_range_deferred_list_work(struct work_struct *work)
@@ -2300,17 +2336,8 @@ static void svm_range_deferred_list_work(struct work_struct *work)
prange->start, prange->last, prange->work_item.op);
mm = prange->work_item.mm;
-retry:
- mmap_write_lock(mm);
- /* Checking for the need to drain retry faults must be inside
- * mmap write lock to serialize with munmap notifiers.
- */
- if (unlikely(atomic_read(&svms->drain_pagefaults))) {
- mmap_write_unlock(mm);
- svm_range_drain_retry_fault(svms);
- goto retry;
- }
+ mmap_write_lock(mm);
/* Remove from deferred_list must be inside mmap write lock, for
* two race cases:
@@ -2366,15 +2393,17 @@ svm_range_add_list_work(struct svm_range_list *svms, struct svm_range *prange,
prange->work_item.op != SVM_OP_UNMAP_RANGE)
prange->work_item.op = op;
} else {
- prange->work_item.op = op;
-
- /* Pairs with mmput in deferred_list_work */
- mmget(mm);
- prange->work_item.mm = mm;
- list_add_tail(&prange->deferred_list,
- &prange->svms->deferred_range_list);
- pr_debug("add prange 0x%p [0x%lx 0x%lx] to work list op %d\n",
- prange, prange->start, prange->last, op);
+ /* Pairs with mmput in deferred_list_work.
+ * If process is exiting and mm is gone, don't update mmu notifier.
+ */
+ if (mmget_not_zero(mm)) {
+ prange->work_item.mm = mm;
+ prange->work_item.op = op;
+ list_add_tail(&prange->deferred_list,
+ &prange->svms->deferred_range_list);
+ pr_debug("add prange 0x%p [0x%lx 0x%lx] to work list op %d\n",
+ prange, prange->start, prange->last, op);
+ }
}
spin_unlock(&svms->deferred_list_lock);
}
@@ -2388,8 +2417,7 @@ void schedule_deferred_list_work(struct svm_range_list *svms)
}
static void
-svm_range_unmap_split(struct mm_struct *mm, struct svm_range *parent,
- struct svm_range *prange, unsigned long start,
+svm_range_unmap_split(struct svm_range *parent, struct svm_range *prange, unsigned long start,
unsigned long last)
{
struct svm_range *head;
@@ -2410,12 +2438,12 @@ svm_range_unmap_split(struct mm_struct *mm, struct svm_range *parent,
svm_range_split(tail, last + 1, tail->last, &head);
if (head != prange && tail != prange) {
- svm_range_add_child(parent, mm, head, SVM_OP_UNMAP_RANGE);
- svm_range_add_child(parent, mm, tail, SVM_OP_ADD_RANGE);
+ svm_range_add_child(parent, head, SVM_OP_UNMAP_RANGE);
+ svm_range_add_child(parent, tail, SVM_OP_ADD_RANGE);
} else if (tail != prange) {
- svm_range_add_child(parent, mm, tail, SVM_OP_UNMAP_RANGE);
+ svm_range_add_child(parent, tail, SVM_OP_UNMAP_RANGE);
} else if (head != prange) {
- svm_range_add_child(parent, mm, head, SVM_OP_UNMAP_RANGE);
+ svm_range_add_child(parent, head, SVM_OP_UNMAP_RANGE);
} else if (parent != prange) {
prange->work_item.op = SVM_OP_UNMAP_RANGE;
}
@@ -2431,6 +2459,17 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange,
struct kfd_process *p;
unsigned long s, l;
bool unmap_parent;
+ uint32_t i;
+
+ if (atomic_read(&prange->queue_refcount)) {
+ int r;
+
+ pr_warn("Freeing queue vital buffer 0x%lx, queue evicted\n",
+ prange->start << PAGE_SHIFT);
+ r = kgd2kfd_quiesce_mm(mm, KFD_QUEUE_EVICTION_TRIGGER_SVM);
+ if (r)
+ pr_debug("failed %d to quiesce KFD queues\n", r);
+ }
p = kfd_lookup_process_by_mm(mm);
if (!p)
@@ -2440,11 +2479,38 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange,
pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] [0x%lx 0x%lx]\n", svms,
prange, prange->start, prange->last, start, last);
- /* Make sure pending page faults are drained in the deferred worker
- * before the range is freed to avoid straggler interrupts on
- * unmapped memory causing "phantom faults".
+ /* calculate time stamps that are used to decide which page faults need be
+ * dropped or handled before unmap pages from gpu vm
*/
- atomic_inc(&svms->drain_pagefaults);
+ for_each_set_bit(i, svms->bitmap_supported, p->n_pdds) {
+ struct kfd_process_device *pdd;
+ struct amdgpu_device *adev;
+ struct amdgpu_ih_ring *ih;
+ uint32_t checkpoint_wptr;
+
+ pdd = p->pdds[i];
+ if (!pdd)
+ continue;
+
+ adev = pdd->dev->adev;
+
+ /* Check and drain ih1 ring if cam not available */
+ if (adev->irq.ih1.ring_size) {
+ ih = &adev->irq.ih1;
+ checkpoint_wptr = amdgpu_ih_get_wptr(adev, ih);
+ if (ih->rptr != checkpoint_wptr) {
+ svms->checkpoint_ts[i] =
+ amdgpu_ih_decode_iv_ts(adev, ih, checkpoint_wptr, -1);
+ continue;
+ }
+ }
+
+ /* check if dev->irq.ih_soft is not empty */
+ ih = &adev->irq.ih_soft;
+ checkpoint_wptr = amdgpu_ih_get_wptr(adev, ih);
+ if (ih->rptr != checkpoint_wptr)
+ svms->checkpoint_ts[i] = amdgpu_ih_decode_iv_ts(adev, ih, checkpoint_wptr, -1);
+ }
unmap_parent = start <= prange->start && last >= prange->last;
@@ -2454,14 +2520,14 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange,
l = min(last, pchild->last);
if (l >= s)
svm_range_unmap_from_gpus(pchild, s, l, trigger);
- svm_range_unmap_split(mm, prange, pchild, start, last);
+ svm_range_unmap_split(prange, pchild, start, last);
mutex_unlock(&pchild->lock);
}
s = max(start, prange->start);
l = min(last, prange->last);
if (l >= s)
svm_range_unmap_from_gpus(prange, s, l, trigger);
- svm_range_unmap_split(mm, prange, prange, start, last);
+ svm_range_unmap_split(prange, prange, start, last);
if (unmap_parent)
svm_range_add_list_work(svms, prange, mm, SVM_OP_UNMAP_RANGE);
@@ -2504,8 +2570,6 @@ svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
if (range->event == MMU_NOTIFY_RELEASE)
return true;
- if (!mmget_not_zero(mni->mm))
- return true;
start = mni->interval_tree.start;
last = mni->interval_tree.last;
@@ -2532,7 +2596,6 @@ svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
}
svm_range_unlock(prange);
- mmput(mni->mm);
return true;
}
@@ -2619,7 +2682,7 @@ svm_range_best_restore_location(struct svm_range *prange,
return -1;
}
- if (node->adev->gmc.is_app_apu)
+ if (node->adev->apu_prefer_gtt)
return 0;
if (prange->preferred_loc == gpuid ||
@@ -2668,9 +2731,10 @@ svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr,
*is_heap_stack = vma_is_initial_heap(vma) || vma_is_initial_stack(vma);
start_limit = max(vma->vm_start >> PAGE_SHIFT,
- (unsigned long)ALIGN_DOWN(addr, 2UL << 8));
+ (unsigned long)ALIGN_DOWN(addr, 1UL << p->svms.default_granularity));
end_limit = min(vma->vm_end >> PAGE_SHIFT,
- (unsigned long)ALIGN(addr + 1, 2UL << 8));
+ (unsigned long)ALIGN(addr + 1, 1UL << p->svms.default_granularity));
+
/* First range that starts after the fault address */
node = interval_tree_iter_first(&p->svms.objects, addr + 1, ULONG_MAX);
if (node) {
@@ -2885,7 +2949,7 @@ svm_fault_allowed(struct vm_area_struct *vma, bool write_fault)
int
svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
uint32_t vmid, uint32_t node_id,
- uint64_t addr, bool write_fault)
+ uint64_t addr, uint64_t ts, bool write_fault)
{
unsigned long start, last, size;
struct mm_struct *mm = NULL;
@@ -2895,7 +2959,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
ktime_t timestamp = ktime_get_boottime();
struct kfd_node *node;
int32_t best_loc;
- int32_t gpuidx = MAX_GPU_INSTANCE;
+ int32_t gpuid, gpuidx = MAX_GPU_INSTANCE;
bool write_locked = false;
struct vm_area_struct *vma;
bool migration = false;
@@ -2906,7 +2970,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
return -EFAULT;
}
- p = kfd_lookup_process_by_pasid(pasid);
+ p = kfd_lookup_process_by_pasid(pasid, NULL);
if (!p) {
pr_debug("kfd process not founded pasid 0x%x\n", pasid);
return 0;
@@ -2916,11 +2980,25 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
pr_debug("restoring svms 0x%p fault address 0x%llx\n", svms, addr);
if (atomic_read(&svms->drain_pagefaults)) {
- pr_debug("draining retry fault, drop fault 0x%llx\n", addr);
+ pr_debug("page fault handling disabled, drop fault 0x%llx\n", addr);
r = 0;
goto out;
}
+ node = kfd_node_by_irq_ids(adev, node_id, vmid);
+ if (!node) {
+ pr_debug("kfd node does not exist node_id: %d, vmid: %d\n", node_id,
+ vmid);
+ r = -EFAULT;
+ goto out;
+ }
+
+ if (kfd_process_gpuid_from_node(p, node, &gpuid, &gpuidx)) {
+ pr_debug("failed to get gpuid/gpuidex for node_id: %d\n", node_id);
+ r = -EFAULT;
+ goto out;
+ }
+
if (!p->xnack_enabled) {
pr_debug("XNACK not enabled for pasid 0x%x\n", pasid);
r = -EFAULT;
@@ -2937,16 +3015,24 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
goto out;
}
- node = kfd_node_by_irq_ids(adev, node_id, vmid);
- if (!node) {
- pr_debug("kfd node does not exist node_id: %d, vmid: %d\n", node_id,
- vmid);
- r = -EFAULT;
- goto out;
- }
mmap_read_lock(mm);
retry_write_locked:
mutex_lock(&svms->lock);
+
+ /* check if this page fault time stamp is before svms->checkpoint_ts */
+ if (svms->checkpoint_ts[gpuidx] != 0) {
+ if (amdgpu_ih_ts_after_or_equal(ts, svms->checkpoint_ts[gpuidx])) {
+ pr_debug("draining retry fault, drop fault 0x%llx\n", addr);
+ r = -EAGAIN;
+ goto out_unlock_svms;
+ } else {
+ /* ts is after svms->checkpoint_ts now, reset svms->checkpoint_ts
+ * to zero to avoid following ts wrap around give wrong comparing
+ */
+ svms->checkpoint_ts[gpuidx] = 0;
+ }
+ }
+
prange = svm_range_from_addr(svms, addr, NULL);
if (!prange) {
pr_debug("failed to find prange svms 0x%p address [0x%llx]\n",
@@ -3028,8 +3114,6 @@ retry_write_locked:
start = max_t(unsigned long, ALIGN_DOWN(addr, size), prange->start);
last = min_t(unsigned long, ALIGN(addr + 1, size) - 1, prange->last);
if (prange->actual_loc != 0 || best_loc != 0) {
- migration = true;
-
if (best_loc) {
r = svm_migrate_to_vram(prange, best_loc, start, last,
mm, KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU);
@@ -3052,7 +3136,9 @@ retry_write_locked:
if (r) {
pr_debug("failed %d to migrate svms %p [0x%lx 0x%lx]\n",
r, svms, start, last);
- goto out_unlock_range;
+ goto out_migrate_fail;
+ } else {
+ migration = true;
}
}
@@ -3062,6 +3148,7 @@ retry_write_locked:
pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n",
r, svms, start, last);
+out_migrate_fail:
kfd_smi_event_page_fault_end(node, p->lead_thread->pid, addr,
migration);
@@ -3071,7 +3158,8 @@ out_unlock_svms:
mutex_unlock(&svms->lock);
mmap_read_unlock(mm);
- svm_range_count_fault(node, p, gpuidx);
+ if (r != -EAGAIN)
+ svm_range_count_fault(node, p, gpuidx);
mmput(mm);
out:
@@ -3148,7 +3236,8 @@ void svm_range_list_fini(struct kfd_process *p)
struct svm_range *prange;
struct svm_range *next;
- pr_debug("pasid 0x%x svms 0x%p\n", p->pasid, &p->svms);
+ pr_debug("process pid %d svms 0x%p\n", p->lead_thread->pid,
+ &p->svms);
cancel_delayed_work_sync(&p->svms.restore_work);
@@ -3158,8 +3247,9 @@ void svm_range_list_fini(struct kfd_process *p)
/*
* Ensure no retry fault comes in afterwards, as page fault handler will
* not find kfd process and take mm lock to recover fault.
+ * stop kfd page fault handing, then wait pending page faults got drained
*/
- atomic_inc(&p->svms.drain_pagefaults);
+ atomic_set(&p->svms.drain_pagefaults, 1);
svm_range_drain_retry_fault(&p->svms);
list_for_each_entry_safe(prange, next, &p->svms.list, list) {
@@ -3170,7 +3260,8 @@ void svm_range_list_fini(struct kfd_process *p)
mutex_destroy(&p->svms.lock);
- pr_debug("pasid 0x%x svms 0x%p done\n", p->pasid, &p->svms);
+ pr_debug("process pid %d svms 0x%p done\n",
+ p->lead_thread->pid, &p->svms);
}
int svm_range_list_init(struct kfd_process *p)
@@ -3193,6 +3284,12 @@ int svm_range_list_init(struct kfd_process *p)
if (KFD_IS_SVM_API_SUPPORTED(p->pdds[i]->dev->adev))
bitmap_set(svms->bitmap_supported, i, 1);
+ /* Value of default granularity cannot exceed 0x1B, the
+ * number of pages supported by a 4-level paging table
+ */
+ svms->default_granularity = min_t(u8, amdgpu_svm_default_granularity, 0x1B);
+ pr_debug("Default SVM Granularity to use: %d\n", svms->default_granularity);
+
return 0;
}
@@ -3337,7 +3434,7 @@ svm_range_best_prefetch_location(struct svm_range *prange)
goto out;
}
- if (bo_node->adev->gmc.is_app_apu) {
+ if (bo_node->adev->apu_prefer_gtt) {
best_loc = 0;
goto out;
}
@@ -3426,7 +3523,7 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange,
mm, KFD_MIGRATE_TRIGGER_PREFETCH);
*migrated = !r;
- return r;
+ return 0;
}
int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence)
@@ -3527,8 +3624,8 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm,
bool flush_tlb;
int r, ret = 0;
- pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] pages 0x%llx\n",
- p->pasid, &p->svms, start, start + size - 1, size);
+ pr_debug("process pid %d svms 0x%p [0x%llx 0x%llx] pages 0x%llx\n",
+ p->lead_thread->pid, &p->svms, start, start + size - 1, size);
r = svm_range_check_attr(p, nattr, attrs);
if (r)
@@ -3636,8 +3733,8 @@ out_unlock_range:
out:
mutex_unlock(&process_info->lock);
- pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] done, r=%d\n", p->pasid,
- &p->svms, start, start + size - 1, r);
+ pr_debug("process pid %d svms 0x%p [0x%llx 0x%llx] done, r=%d\n",
+ p->lead_thread->pid, &p->svms, start, start + size - 1, r);
return ret ? ret : r;
}
@@ -3720,7 +3817,7 @@ svm_range_get_attr(struct kfd_process *p, struct mm_struct *mm,
node = interval_tree_iter_first(&svms->objects, start, last);
if (!node) {
pr_debug("range attrs not found return default values\n");
- svm_range_set_default_attributes(&location, &prefetch_loc,
+ svm_range_set_default_attributes(svms, &location, &prefetch_loc,
&granularity, &flags_and);
flags_or = flags_and;
if (p->xnack_enabled)
@@ -3975,8 +4072,8 @@ exit:
return ret;
}
-int svm_range_get_info(struct kfd_process *p, uint32_t *num_svm_ranges,
- uint64_t *svm_priv_data_size)
+void svm_range_get_info(struct kfd_process *p, uint32_t *num_svm_ranges,
+ uint64_t *svm_priv_data_size)
{
uint64_t total_size, accessibility_size, common_attr_size;
int nattr_common = 4, nattr_accessibility = 1;
@@ -3988,8 +4085,6 @@ int svm_range_get_info(struct kfd_process *p, uint32_t *num_svm_ranges,
*svm_priv_data_size = 0;
svms = &p->svms;
- if (!svms)
- return -EINVAL;
mutex_lock(&svms->lock);
list_for_each_entry(prange, &svms->list, list) {
@@ -4031,7 +4126,6 @@ int svm_range_get_info(struct kfd_process *p, uint32_t *num_svm_ranges,
pr_debug("num_svm_ranges %u total_priv_size %llu\n", *num_svm_ranges,
*svm_priv_data_size);
- return 0;
}
int kfd_criu_checkpoint_svm(struct kfd_process *p,
@@ -4048,8 +4142,6 @@ int kfd_criu_checkpoint_svm(struct kfd_process *p,
struct mm_struct *mm;
svms = &p->svms;
- if (!svms)
- return -EINVAL;
mm = get_task_mm(p->lead_thread);
if (!mm) {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
index 026863a0abcd..01c7a4877904 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
@@ -137,6 +137,7 @@ struct svm_range {
DECLARE_BITMAP(bitmap_access, MAX_GPU_INSTANCE);
DECLARE_BITMAP(bitmap_aip, MAX_GPU_INSTANCE);
bool mapped_to_gpu;
+ atomic_t queue_refcount;
};
static inline void svm_range_lock(struct svm_range *prange)
@@ -173,7 +174,7 @@ int svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange,
bool clear);
void svm_range_vram_node_free(struct svm_range *prange);
int svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
- uint32_t vmid, uint32_t node_id, uint64_t addr,
+ uint32_t vmid, uint32_t node_id, uint64_t addr, uint64_t ts,
bool write_fault);
int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence);
void svm_range_add_list_work(struct svm_range_list *svms,
@@ -183,8 +184,8 @@ void schedule_deferred_list_work(struct svm_range_list *svms);
void svm_range_dma_unmap_dev(struct device *dev, dma_addr_t *dma_addr,
unsigned long offset, unsigned long npages);
void svm_range_dma_unmap(struct svm_range *prange);
-int svm_range_get_info(struct kfd_process *p, uint32_t *num_svm_ranges,
- uint64_t *svm_priv_data_size);
+void svm_range_get_info(struct kfd_process *p, uint32_t *num_svm_ranges,
+ uint64_t *svm_priv_data_size);
int kfd_criu_checkpoint_svm(struct kfd_process *p,
uint8_t __user *user_priv_data,
uint64_t *priv_offset);
@@ -201,7 +202,7 @@ void svm_range_list_lock_and_flush_work(struct svm_range_list *svms, struct mm_s
* is initialized to not 0 when page migration register device memory.
*/
#define KFD_IS_SVM_API_SUPPORTED(adev) ((adev)->kfd.pgmap.type != 0 ||\
- (adev)->gmc.is_app_apu)
+ ((adev)->apu_prefer_gtt))
void svm_range_bo_unref_async(struct svm_range_bo *svm_bo);
@@ -224,7 +225,7 @@ static inline void svm_range_list_fini(struct kfd_process *p)
static inline int svm_range_restore_pages(struct amdgpu_device *adev,
unsigned int pasid,
uint32_t client_id, uint32_t node_id,
- uint64_t addr, bool write_fault)
+ uint64_t addr, uint64_t ts, bool write_fault)
{
return -EFAULT;
}
@@ -236,13 +237,12 @@ static inline int svm_range_schedule_evict_svm_bo(
return -EINVAL;
}
-static inline int svm_range_get_info(struct kfd_process *p,
- uint32_t *num_svm_ranges,
- uint64_t *svm_priv_data_size)
+static inline void svm_range_get_info(struct kfd_process *p,
+ uint32_t *num_svm_ranges,
+ uint64_t *svm_priv_data_size)
{
*num_svm_ranges = 0;
*svm_priv_data_size = 0;
- return 0;
}
static inline int kfd_criu_checkpoint_svm(struct kfd_process *p,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index c51f131eaa2f..4ec73f33535e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -31,6 +31,7 @@
#include <linux/log2.h>
#include <linux/dmi.h>
#include <linux/atomic.h>
+#include <linux/crc16.h>
#include "kfd_priv.h"
#include "kfd_crat.h"
@@ -107,24 +108,6 @@ struct kfd_node *kfd_device_by_id(uint32_t gpu_id)
return top_dev->gpu;
}
-struct kfd_node *kfd_device_by_pci_dev(const struct pci_dev *pdev)
-{
- struct kfd_topology_device *top_dev;
- struct kfd_node *device = NULL;
-
- down_read(&topology_lock);
-
- list_for_each_entry(top_dev, &topology_device_list, list)
- if (top_dev->gpu && top_dev->gpu->adev->pdev == pdev) {
- device = top_dev->gpu;
- break;
- }
-
- up_read(&topology_lock);
-
- return device;
-}
-
/* Called with write topology_lock acquired */
static void kfd_release_topology_device(struct kfd_topology_device *dev)
{
@@ -291,6 +274,8 @@ static ssize_t iolink_show(struct kobject *kobj, struct attribute *attr,
iolink->max_bandwidth);
sysfs_show_32bit_prop(buffer, offs, "recommended_transfer_size",
iolink->rec_transfer_size);
+ sysfs_show_32bit_prop(buffer, offs, "recommended_sdma_engine_id_mask",
+ iolink->rec_sdma_eng_id_mask);
sysfs_show_32bit_prop(buffer, offs, "flags", iolink->flags);
return offs;
@@ -525,6 +510,10 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
dev->node_props.capability |=
HSA_CAP_AQL_QUEUE_DOUBLE_MAP;
+ if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(10, 0, 0) &&
+ (dev->gpu->adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
+ dev->node_props.capability2 |= HSA_CAP2_PER_SDMA_QUEUE_RESET_SUPPORTED;
+
sysfs_show_32bit_prop(buffer, offs, "max_engine_clk_fcompute",
dev->node_props.max_engine_clk_fcompute);
@@ -534,6 +523,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
dev->gpu->kfd->mec_fw_version);
sysfs_show_32bit_prop(buffer, offs, "capability",
dev->node_props.capability);
+ sysfs_show_32bit_prop(buffer, offs, "capability2",
+ dev->node_props.capability2);
sysfs_show_64bit_prop(buffer, offs, "debug_prop",
dev->node_props.debug_prop);
sysfs_show_32bit_prop(buffer, offs, "sdma_fw_version",
@@ -958,32 +949,30 @@ static void kfd_update_system_properties(void)
dev = list_last_entry(&topology_device_list,
struct kfd_topology_device, list);
if (dev) {
- sys_props.platform_id =
- (*((uint64_t *)dev->oem_id)) & CRAT_OEMID_64BIT_MASK;
+ sys_props.platform_id = dev->oem_id64;
sys_props.platform_oem = *((uint64_t *)dev->oem_table_id);
sys_props.platform_rev = dev->oem_revision;
}
up_read(&topology_lock);
}
-static void find_system_memory(const struct dmi_header *dm,
- void *private)
+static void find_system_memory(const struct dmi_header *dm, void *private)
{
+ struct dmi_mem_device *memdev = container_of(dm, struct dmi_mem_device, header);
struct kfd_mem_properties *mem;
- u16 mem_width, mem_clock;
struct kfd_topology_device *kdev =
(struct kfd_topology_device *)private;
- const u8 *dmi_data = (const u8 *)(dm + 1);
-
- if (dm->type == DMI_ENTRY_MEM_DEVICE && dm->length >= 0x15) {
- mem_width = (u16)(*(const u16 *)(dmi_data + 0x6));
- mem_clock = (u16)(*(const u16 *)(dmi_data + 0x11));
- list_for_each_entry(mem, &kdev->mem_props, list) {
- if (mem_width != 0xFFFF && mem_width != 0)
- mem->width = mem_width;
- if (mem_clock != 0)
- mem->mem_clk_max = mem_clock;
- }
+
+ if (memdev->header.type != DMI_ENTRY_MEM_DEVICE)
+ return;
+ if (memdev->header.length < sizeof(struct dmi_mem_device))
+ return;
+
+ list_for_each_entry(mem, &kdev->mem_props, list) {
+ if (memdev->total_width != 0xFFFF && memdev->total_width != 0)
+ mem->width = memdev->total_width;
+ if (memdev->speed != 0)
+ mem->mem_clk_max = memdev->speed;
}
}
@@ -1092,14 +1081,17 @@ void kfd_topology_shutdown(void)
static uint32_t kfd_generate_gpu_id(struct kfd_node *gpu)
{
- uint32_t hashout;
+ uint32_t gpu_id;
uint32_t buf[8];
uint64_t local_mem_size;
- int i;
+ struct kfd_topology_device *dev;
+ bool is_unique;
+ uint8_t *crc_buf;
if (!gpu)
return 0;
+ crc_buf = (uint8_t *)&buf;
local_mem_size = gpu->local_mem_info.local_mem_size_private +
gpu->local_mem_info.local_mem_size_public;
buf[0] = gpu->adev->pdev->devfn;
@@ -1112,10 +1104,34 @@ static uint32_t kfd_generate_gpu_id(struct kfd_node *gpu)
buf[6] = upper_32_bits(local_mem_size);
buf[7] = (ffs(gpu->xcc_mask) - 1) | (NUM_XCC(gpu->xcc_mask) << 16);
- for (i = 0, hashout = 0; i < 8; i++)
- hashout ^= hash_32(buf[i], KFD_GPU_ID_HASH_WIDTH);
+ gpu_id = crc16(0, crc_buf, sizeof(buf)) &
+ ((1 << KFD_GPU_ID_HASH_WIDTH) - 1);
- return hashout;
+ /* There is a very small possibility when generating a
+ * 16 (KFD_GPU_ID_HASH_WIDTH) bit value from 8 word buffer
+ * that the value could be 0 or non-unique. So, check if
+ * it is unique and non-zero. If not unique increment till
+ * unique one is found. In case of overflow, restart from 1
+ */
+
+ down_read(&topology_lock);
+ do {
+ is_unique = true;
+ if (!gpu_id)
+ gpu_id = 1;
+ list_for_each_entry(dev, &topology_device_list, list) {
+ if (dev->gpu && dev->gpu_id == gpu_id) {
+ is_unique = false;
+ break;
+ }
+ }
+ if (unlikely(!is_unique))
+ gpu_id = (gpu_id + 1) &
+ ((1 << KFD_GPU_ID_HASH_WIDTH) - 1);
+ } while (!is_unique);
+ up_read(&topology_lock);
+
+ return gpu_id;
}
/* kfd_assign_gpu - Attach @gpu to the correct kfd topology device. If
* the GPU device is not already present in the topology device
@@ -1238,6 +1254,61 @@ static void kfd_set_iolink_non_coherent(struct kfd_topology_device *to_dev,
}
}
+#define REC_SDMA_NUM_GPU 8
+static const int rec_sdma_eng_map[REC_SDMA_NUM_GPU][REC_SDMA_NUM_GPU] = {
+ { -1, 14, 12, 2, 4, 8, 10, 6 },
+ { 14, -1, 2, 10, 8, 4, 6, 12 },
+ { 10, 2, -1, 12, 14, 6, 4, 8 },
+ { 2, 12, 10, -1, 6, 14, 8, 4 },
+ { 4, 8, 14, 6, -1, 10, 12, 2 },
+ { 8, 4, 6, 14, 12, -1, 2, 10 },
+ { 10, 6, 4, 8, 12, 2, -1, 14 },
+ { 6, 12, 8, 4, 2, 10, 14, -1 }};
+
+static void kfd_set_recommended_sdma_engines(struct kfd_topology_device *to_dev,
+ struct kfd_iolink_properties *outbound_link,
+ struct kfd_iolink_properties *inbound_link)
+{
+ struct kfd_node *gpu = outbound_link->gpu;
+ struct amdgpu_device *adev = gpu->adev;
+ unsigned int num_xgmi_nodes = adev->gmc.xgmi.num_physical_nodes;
+ unsigned int num_xgmi_sdma_engines = kfd_get_num_xgmi_sdma_engines(gpu);
+ unsigned int num_sdma_engines = kfd_get_num_sdma_engines(gpu);
+ uint32_t sdma_eng_id_mask = (1 << num_sdma_engines) - 1;
+ uint32_t xgmi_sdma_eng_id_mask =
+ ((1 << num_xgmi_sdma_engines) - 1) << num_sdma_engines;
+
+ bool support_rec_eng = !amdgpu_sriov_vf(adev) && to_dev->gpu &&
+ adev->aid_mask && num_xgmi_nodes && gpu->kfd->num_nodes == 1 &&
+ num_xgmi_sdma_engines >= 6 && (!(adev->flags & AMD_IS_APU) &&
+ num_xgmi_nodes == 8);
+
+ if (support_rec_eng) {
+ int src_socket_id = adev->gmc.xgmi.physical_node_id;
+ int dst_socket_id = to_dev->gpu->adev->gmc.xgmi.physical_node_id;
+ unsigned int reshift = num_xgmi_sdma_engines == 6 ? 1 : 0;
+
+ outbound_link->rec_sdma_eng_id_mask =
+ 1 << (rec_sdma_eng_map[src_socket_id][dst_socket_id] >> reshift);
+ inbound_link->rec_sdma_eng_id_mask =
+ 1 << (rec_sdma_eng_map[dst_socket_id][src_socket_id] >> reshift);
+
+ /* If recommended engine is out of range, need to reset the mask */
+ if (outbound_link->rec_sdma_eng_id_mask & sdma_eng_id_mask)
+ outbound_link->rec_sdma_eng_id_mask = xgmi_sdma_eng_id_mask;
+ if (inbound_link->rec_sdma_eng_id_mask & sdma_eng_id_mask)
+ inbound_link->rec_sdma_eng_id_mask = xgmi_sdma_eng_id_mask;
+
+ } else {
+ uint32_t engine_mask = (outbound_link->iolink_type == CRAT_IOLINK_TYPE_XGMI &&
+ num_xgmi_sdma_engines && to_dev->gpu) ? xgmi_sdma_eng_id_mask :
+ sdma_eng_id_mask;
+
+ outbound_link->rec_sdma_eng_id_mask = engine_mask;
+ inbound_link->rec_sdma_eng_id_mask = engine_mask;
+ }
+}
+
static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev)
{
struct kfd_iolink_properties *link, *inbound_link;
@@ -1276,6 +1347,7 @@ static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev)
inbound_link->flags = CRAT_IOLINK_FLAGS_ENABLED;
kfd_set_iolink_no_atomics(peer_dev, dev, inbound_link);
kfd_set_iolink_non_coherent(peer_dev, link, inbound_link);
+ kfd_set_recommended_sdma_engines(peer_dev, link, inbound_link);
}
}
@@ -1605,17 +1677,32 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
int cache_type, unsigned int cu_processor_id,
struct kfd_node *knode)
{
- unsigned int cu_sibling_map_mask;
+ unsigned int cu_sibling_map_mask = 0;
int first_active_cu;
int i, j, k, xcc, start, end;
int num_xcc = NUM_XCC(knode->xcc_mask);
struct kfd_cache_properties *pcache = NULL;
enum amdgpu_memory_partition mode;
struct amdgpu_device *adev = knode->adev;
+ bool found = false;
start = ffs(knode->xcc_mask) - 1;
end = start + num_xcc;
- cu_sibling_map_mask = cu_info->bitmap[start][0][0];
+
+ /* To find the bitmap in the first active cu in the first
+ * xcc, it is based on the assumption that evrey xcc must
+ * have at least one active cu.
+ */
+ for (i = 0; i < gfx_info->max_shader_engines && !found; i++) {
+ for (j = 0; j < gfx_info->max_sh_per_se && !found; j++) {
+ if (cu_info->bitmap[start][i % 4][j % 4]) {
+ cu_sibling_map_mask =
+ cu_info->bitmap[start][i % 4][j % 4];
+ found = true;
+ }
+ }
+ }
+
cu_sibling_map_mask &=
((1 << pcache_info[cache_type].num_cu_shared) - 1);
first_active_cu = ffs(cu_sibling_map_mask);
@@ -1635,7 +1722,9 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
pcache->cache_level = pcache_info[cache_type].cache_level;
pcache->cacheline_size = pcache_info[cache_type].cache_line_size;
- if (KFD_GC_VERSION(knode) == IP_VERSION(9, 4, 3))
+ if (KFD_GC_VERSION(knode) == IP_VERSION(9, 4, 3) ||
+ KFD_GC_VERSION(knode) == IP_VERSION(9, 4, 4) ||
+ KFD_GC_VERSION(knode) == IP_VERSION(9, 5, 0))
mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
else
mode = UNKNOWN_MEMORY_PARTITION_MODE;
@@ -1697,7 +1786,7 @@ static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct
struct amdgpu_cu_info *cu_info = &kdev->adev->gfx.cu_info;
struct amdgpu_gfx_config *gfx_info = &kdev->adev->gfx.config;
int gpu_processor_id;
- struct kfd_cache_properties *props_ext;
+ struct kfd_cache_properties *props_ext = NULL;
int num_of_entries = 0;
int num_of_cache_types = 0;
struct kfd_gpu_cache_info cache_info[KFD_MAX_CACHE_TYPES];
@@ -1772,7 +1861,7 @@ static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct
pr_debug("Added [%d] GPU cache entries\n", num_of_entries);
}
-static int kfd_topology_add_device_locked(struct kfd_node *gpu, uint32_t gpu_id,
+static int kfd_topology_add_device_locked(struct kfd_node *gpu,
struct kfd_topology_device **dev)
{
int proximity_domain = ++topology_crat_proximity_domain;
@@ -1785,8 +1874,7 @@ static int kfd_topology_add_device_locked(struct kfd_node *gpu, uint32_t gpu_id,
COMPUTE_UNIT_GPU, gpu,
proximity_domain);
if (res) {
- pr_err("Error creating VCRAT for GPU (ID: 0x%x)\n",
- gpu_id);
+ dev_err(gpu->adev->dev, "Error creating VCRAT\n");
topology_crat_proximity_domain--;
goto err;
}
@@ -1797,8 +1885,7 @@ static int kfd_topology_add_device_locked(struct kfd_node *gpu, uint32_t gpu_id,
&temp_topology_device_list,
proximity_domain);
if (res) {
- pr_err("Error parsing VCRAT for GPU (ID: 0x%x)\n",
- gpu_id);
+ dev_err(gpu->adev->dev, "Error parsing VCRAT\n");
topology_crat_proximity_domain--;
goto err;
}
@@ -1824,8 +1911,8 @@ static int kfd_topology_add_device_locked(struct kfd_node *gpu, uint32_t gpu_id,
if (!res)
sys_props.generation_count++;
else
- pr_err("Failed to update GPU (ID: 0x%x) to sysfs topology. res=%d\n",
- gpu_id, res);
+ dev_err(gpu->adev->dev, "Failed to update GPU to sysfs topology. res=%d\n",
+ res);
err:
kfd_destroy_crat_image(crat_image);
@@ -1908,7 +1995,8 @@ static void kfd_topology_set_capabilities(struct kfd_topology_device *dev)
dev->node_props.debug_prop |= HSA_DBG_DISPATCH_INFO_ALWAYS_VALID;
if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(10, 0, 0)) {
- if (KFD_GC_VERSION(dev->gpu) == IP_VERSION(9, 4, 3))
+ if (KFD_GC_VERSION(dev->gpu) == IP_VERSION(9, 4, 3) ||
+ KFD_GC_VERSION(dev->gpu) == IP_VERSION(9, 4, 4))
dev->node_props.debug_prop |=
HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX9_4_3 |
HSA_DBG_WATCH_ADDR_MASK_HI_BIT_GFX9_4_3;
@@ -1920,13 +2008,17 @@ static void kfd_topology_set_capabilities(struct kfd_topology_device *dev)
if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(9, 4, 2))
dev->node_props.capability |=
HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED;
+
+ if (!amdgpu_sriov_vf(dev->gpu->adev))
+ dev->node_props.capability |= HSA_CAP_PER_QUEUE_RESET_SUPPORTED;
+
} else {
dev->node_props.debug_prop |= HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX10 |
HSA_DBG_WATCH_ADDR_MASK_HI_BIT;
- if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(11, 0, 0))
+ if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(12, 0, 0))
dev->node_props.capability |=
- HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED;
+ HSA_CAP_TRAP_DEBUG_PRECISE_ALU_OPERATIONS_SUPPORTED;
}
kfd_topology_set_dbg_firmware_support(dev);
@@ -1942,14 +2034,12 @@ int kfd_topology_add_device(struct kfd_node *gpu)
struct amdgpu_gfx_config *gfx_info = &gpu->adev->gfx.config;
struct amdgpu_cu_info *cu_info = &gpu->adev->gfx.cu_info;
- gpu_id = kfd_generate_gpu_id(gpu);
if (gpu->xcp && !gpu->xcp->ddev) {
dev_warn(gpu->adev->dev,
- "Won't add GPU (ID: 0x%x) to topology since it has no drm node assigned.",
- gpu_id);
+ "Won't add GPU to topology since it has no drm node assigned.");
return 0;
} else {
- pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id);
+ dev_dbg(gpu->adev->dev, "Adding new GPU to topology\n");
}
/* Check to see if this gpu device exists in the topology_device_list.
@@ -1961,11 +2051,12 @@ int kfd_topology_add_device(struct kfd_node *gpu)
down_write(&topology_lock);
dev = kfd_assign_gpu(gpu);
if (!dev)
- res = kfd_topology_add_device_locked(gpu, gpu_id, &dev);
+ res = kfd_topology_add_device_locked(gpu, &dev);
up_write(&topology_lock);
if (res)
return res;
+ gpu_id = kfd_generate_gpu_id(gpu);
dev->gpu_id = gpu_id;
gpu->id = gpu_id;
@@ -1997,9 +2088,8 @@ int kfd_topology_add_device(struct kfd_node *gpu)
HSA_CAP_ASIC_REVISION_MASK);
dev->node_props.location_id = pci_dev_id(gpu->adev->pdev);
- /* On multi-partition nodes, node id = location_id[31:28] */
if (gpu->kfd->num_nodes > 1)
- dev->node_props.location_id |= (dev->gpu->node_id << 28);
+ dev->node_props.location_id |= dev->gpu->node_id;
dev->node_props.domain = pci_domain_nr(gpu->adev->pdev->bus);
dev->node_props.max_engine_clk_fcompute =
@@ -2091,6 +2181,8 @@ int kfd_topology_add_device(struct kfd_node *gpu)
dev->gpu->adev->gmc.xgmi.connected_to_cpu)
dev->node_props.capability |= HSA_CAP_FLAGS_COHERENTHOSTACCESS;
+ kfd_queue_ctx_save_restore_size(dev);
+
kfd_debug_print_topology();
kfd_notify_gpu_change(gpu_id, 1);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
index 27386ce9a021..3de8ec0043bb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
@@ -24,6 +24,7 @@
#ifndef __KFD_TOPOLOGY_H__
#define __KFD_TOPOLOGY_H__
+#include <linux/dmi.h>
#include <linux/types.h>
#include <linux/list.h>
#include <linux/kfd_sysfs.h>
@@ -50,6 +51,7 @@ struct kfd_node_properties {
uint32_t cpu_core_id_base;
uint32_t simd_id_base;
uint32_t capability;
+ uint32_t capability2;
uint64_t debug_prop;
uint32_t max_waves_per_simd;
uint32_t lds_size_in_kb;
@@ -74,6 +76,10 @@ struct kfd_node_properties {
uint32_t num_sdma_xgmi_engines;
uint32_t num_sdma_queues_per_engine;
uint32_t num_cp_queues;
+ uint32_t cwsr_size;
+ uint32_t ctl_stack_size;
+ uint32_t eop_buffer_size;
+ uint32_t debug_memory_size;
char name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE];
};
@@ -121,6 +127,7 @@ struct kfd_iolink_properties {
uint32_t min_bandwidth;
uint32_t max_bandwidth;
uint32_t rec_transfer_size;
+ uint32_t rec_sdma_eng_id_mask;
uint32_t flags;
struct kfd_node *gpu;
struct kobject *kobj;
@@ -154,7 +161,10 @@ struct kfd_topology_device {
struct attribute attr_gpuid;
struct attribute attr_name;
struct attribute attr_props;
- uint8_t oem_id[CRAT_OEMID_LENGTH];
+ union {
+ uint8_t oem_id[CRAT_OEMID_LENGTH];
+ uint64_t oem_id64;
+ };
uint8_t oem_table_id[CRAT_OEMTABLEID_LENGTH];
uint32_t oem_revision;
};
@@ -171,6 +181,22 @@ struct kfd_system_properties {
struct attribute attr_props;
};
+struct dmi_mem_device {
+ struct dmi_header header;
+ u16 physical_handle;
+ u16 error_handle;
+ u16 total_width;
+ u16 data_width;
+ u16 size;
+ u8 form_factor;
+ u8 device_set;
+ u8 device_locator;
+ u8 bank_locator;
+ u8 memory_type;
+ u16 type_detail;
+ u16 speed;
+} __packed;
+
struct kfd_topology_device *kfd_create_topology_device(
struct list_head *device_list);
void kfd_release_topology_device_list(struct list_head *device_list);
diff --git a/drivers/gpu/drm/amd/amdkfd/soc15_int.h b/drivers/gpu/drm/amd/amdkfd/soc15_int.h
index 10138676f27f..e5c0205f2618 100644
--- a/drivers/gpu/drm/amd/amdkfd/soc15_int.h
+++ b/drivers/gpu/drm/amd/amdkfd/soc15_int.h
@@ -29,6 +29,7 @@
#define SOC15_INTSRC_CP_BAD_OPCODE 183
#define SOC15_INTSRC_SQ_INTERRUPT_MSG 239
#define SOC15_INTSRC_VMC_FAULT 0
+#define SOC15_INTSRC_VMC_UTCL2_POISON 1
#define SOC15_INTSRC_SDMA_TRAP 224
#define SOC15_INTSRC_SDMA_ECC 220
#define SOC21_INTSRC_SDMA_TRAP 49