diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/mes_v11_0.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 636 |
1 files changed, 524 insertions, 112 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 63f281a9984d..c9eba537de09 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -26,6 +26,7 @@ #include "amdgpu.h" #include "soc15_common.h" #include "soc21.h" +#include "gfx_v11_0.h" #include "gc/gc_11_0_0_offset.h" #include "gc/gc_11_0_0_sh_mask.h" #include "gc/gc_11_0_0_default.h" @@ -51,14 +52,19 @@ MODULE_FIRMWARE("amdgpu/gc_11_5_0_mes_2.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_0_mes1.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_1_mes_2.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_1_mes1.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_2_mes_2.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_2_mes1.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_3_mes_2.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_3_mes1.bin"); - -static int mes_v11_0_hw_fini(void *handle); +static int mes_v11_0_hw_init(struct amdgpu_ip_block *ip_block); +static int mes_v11_0_hw_fini(struct amdgpu_ip_block *ip_block); static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev); static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev); #define MES_EOP_SIZE 2048 #define GFX_MES_DRAM_SIZE 0x80000 +#define MES11_HW_RESOURCE_1_SIZE (128 * AMDGPU_GPU_PAGE_SIZE) static void mes_v11_0_ring_set_wptr(struct amdgpu_ring *ring) { @@ -100,18 +106,79 @@ static const struct amdgpu_ring_funcs mes_v11_0_ring_funcs = { .insert_nop = amdgpu_ring_insert_nop, }; +static const char *mes_v11_0_opcodes[] = { + "SET_HW_RSRC", + "SET_SCHEDULING_CONFIG", + "ADD_QUEUE", + "REMOVE_QUEUE", + "PERFORM_YIELD", + "SET_GANG_PRIORITY_LEVEL", + "SUSPEND", + "RESUME", + "RESET", + "SET_LOG_BUFFER", + "CHANGE_GANG_PRORITY", + "QUERY_SCHEDULER_STATUS", + "PROGRAM_GDS", + "SET_DEBUG_VMID", + "MISC", + "UPDATE_ROOT_PAGE_TABLE", + "AMD_LOG", + "unused", + "unused", + "SET_HW_RSRC_1", +}; + +static const char *mes_v11_0_misc_opcodes[] = { + "WRITE_REG", + "INV_GART", + "QUERY_STATUS", + "READ_REG", + "WAIT_REG_MEM", + "SET_SHADER_DEBUGGER", +}; + +static const char *mes_v11_0_get_op_string(union MESAPI__MISC *x_pkt) +{ + const char *op_str = NULL; + + if (x_pkt->header.opcode < ARRAY_SIZE(mes_v11_0_opcodes)) + op_str = mes_v11_0_opcodes[x_pkt->header.opcode]; + + return op_str; +} + +static const char *mes_v11_0_get_misc_op_string(union MESAPI__MISC *x_pkt) +{ + const char *op_str = NULL; + + if ((x_pkt->header.opcode == MES_SCH_API_MISC) && + (x_pkt->opcode < ARRAY_SIZE(mes_v11_0_misc_opcodes))) + op_str = mes_v11_0_misc_opcodes[x_pkt->opcode]; + + return op_str; +} + static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, void *pkt, int size, int api_status_off) { - int ndw = size / 4; - signed long r; - union MESAPI__ADD_QUEUE *x_pkt = pkt; - struct MES_API_STATUS *api_status; + union MESAPI__QUERY_MES_STATUS mes_status_pkt; + signed long timeout = 2100000; /* 2100 ms */ struct amdgpu_device *adev = mes->adev; - struct amdgpu_ring *ring = &mes->ring; + struct amdgpu_ring *ring = &mes->ring[0]; + struct MES_API_STATUS *api_status; + union MESAPI__MISC *x_pkt = pkt; + const char *op_str, *misc_op_str; unsigned long flags; - signed long timeout = adev->usec_timeout; + u64 status_gpu_addr; + u32 seq, status_offset; + u64 *status_ptr; + signed long r; + int ret; + + if (x_pkt->header.opcode >= MES_SCH_API_MAX) + return -EINVAL; if (amdgpu_emu_mode) { timeout *= 100; @@ -119,37 +186,92 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, /* Worst case in sriov where all other 15 VF timeout, each VF needs about 600ms */ timeout = 15 * 600 * 1000; } - BUG_ON(size % 4 != 0); - spin_lock_irqsave(&mes->ring_lock, flags); - if (amdgpu_ring_alloc(ring, ndw)) { - spin_unlock_irqrestore(&mes->ring_lock, flags); - return -ENOMEM; - } + ret = amdgpu_device_wb_get(adev, &status_offset); + if (ret) + return ret; + + status_gpu_addr = adev->wb.gpu_addr + (status_offset * 4); + status_ptr = (u64 *)&adev->wb.wb[status_offset]; + *status_ptr = 0; + + spin_lock_irqsave(&mes->ring_lock[0], flags); + r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4); + if (r) + goto error_unlock_free; + + seq = ++ring->fence_drv.sync_seq; + r = amdgpu_fence_wait_polling(ring, + seq - ring->fence_drv.num_fences_mask, + timeout); + if (r < 1) + goto error_undo; api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off); - api_status->api_completion_fence_addr = mes->ring.fence_drv.gpu_addr; - api_status->api_completion_fence_value = ++mes->ring.fence_drv.sync_seq; + api_status->api_completion_fence_addr = status_gpu_addr; + api_status->api_completion_fence_value = 1; + + amdgpu_ring_write_multiple(ring, pkt, size / 4); + + memset(&mes_status_pkt, 0, sizeof(mes_status_pkt)); + mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS; + mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + mes_status_pkt.api_status.api_completion_fence_addr = + ring->fence_drv.gpu_addr; + mes_status_pkt.api_status.api_completion_fence_value = seq; + + amdgpu_ring_write_multiple(ring, &mes_status_pkt, + sizeof(mes_status_pkt) / 4); - amdgpu_ring_write_multiple(ring, pkt, ndw); amdgpu_ring_commit(ring); - spin_unlock_irqrestore(&mes->ring_lock, flags); + spin_unlock_irqrestore(&mes->ring_lock[0], flags); - DRM_DEBUG("MES msg=%d was emitted\n", x_pkt->header.opcode); + op_str = mes_v11_0_get_op_string(x_pkt); + misc_op_str = mes_v11_0_get_misc_op_string(x_pkt); - r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq, - timeout); - if (r < 1) { - DRM_ERROR("MES failed to response msg=%d\n", - x_pkt->header.opcode); + if (misc_op_str) + dev_dbg(adev->dev, "MES msg=%s (%s) was emitted\n", op_str, + misc_op_str); + else if (op_str) + dev_dbg(adev->dev, "MES msg=%s was emitted\n", op_str); + else + dev_dbg(adev->dev, "MES msg=%d was emitted\n", + x_pkt->header.opcode); + + r = amdgpu_fence_wait_polling(ring, seq, timeout); + if (r < 1 || !*status_ptr) { + + if (misc_op_str) + dev_err(adev->dev, "MES failed to respond to msg=%s (%s)\n", + op_str, misc_op_str); + else if (op_str) + dev_err(adev->dev, "MES failed to respond to msg=%s\n", + op_str); + else + dev_err(adev->dev, "MES failed to respond to msg=%d\n", + x_pkt->header.opcode); while (halt_if_hws_hang) schedule(); - return -ETIMEDOUT; + r = -ETIMEDOUT; + goto error_wb_free; } + amdgpu_device_wb_free(adev, status_offset); return 0; + +error_undo: + dev_err(adev->dev, "MES ring buffer is full.\n"); + amdgpu_ring_undo(ring); + +error_unlock_free: + spin_unlock_irqrestore(&mes->ring_lock[0], flags); + +error_wb_free: + amdgpu_device_wb_free(adev, status_offset); + return r; } static int convert_to_mes_queue_type(int queue_type) @@ -165,6 +287,23 @@ static int convert_to_mes_queue_type(int queue_type) return -1; } +static int convert_to_mes_priority_level(int priority_level) +{ + switch (priority_level) { + case AMDGPU_MES_PRIORITY_LEVEL_LOW: + return AMD_PRIORITY_LEVEL_LOW; + case AMDGPU_MES_PRIORITY_LEVEL_NORMAL: + default: + return AMD_PRIORITY_LEVEL_NORMAL; + case AMDGPU_MES_PRIORITY_LEVEL_MEDIUM: + return AMD_PRIORITY_LEVEL_MEDIUM; + case AMDGPU_MES_PRIORITY_LEVEL_HIGH: + return AMD_PRIORITY_LEVEL_HIGH; + case AMDGPU_MES_PRIORITY_LEVEL_REALTIME: + return AMD_PRIORITY_LEVEL_REALTIME; + } +} + static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes, struct mes_add_queue_input *input) { @@ -188,9 +327,9 @@ static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes, mes_add_queue_pkt.gang_quantum = input->gang_quantum; mes_add_queue_pkt.gang_context_addr = input->gang_context_addr; mes_add_queue_pkt.inprocess_gang_priority = - input->inprocess_gang_priority; + convert_to_mes_priority_level(input->inprocess_gang_priority); mes_add_queue_pkt.gang_global_priority_level = - input->gang_global_priority_level; + convert_to_mes_priority_level(input->gang_global_priority_level); mes_add_queue_pkt.doorbell_offset = input->doorbell_offset; mes_add_queue_pkt.mqd_addr = input->mqd_addr; @@ -242,6 +381,125 @@ static int mes_v11_0_remove_hw_queue(struct amdgpu_mes *mes, offsetof(union MESAPI__REMOVE_QUEUE, api_status)); } +static int mes_v11_0_reset_queue_mmio(struct amdgpu_mes *mes, uint32_t queue_type, + uint32_t me_id, uint32_t pipe_id, + uint32_t queue_id, uint32_t vmid) +{ + struct amdgpu_device *adev = mes->adev; + uint32_t value, reg; + int i, r = 0; + + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); + + if (queue_type == AMDGPU_RING_TYPE_GFX) { + dev_info(adev->dev, "reset gfx queue (%d:%d:%d: vmid:%d)\n", + me_id, pipe_id, queue_id, vmid); + + mutex_lock(&adev->gfx.reset_sem_mutex); + gfx_v11_0_request_gfx_index_mutex(adev, true); + /* all se allow writes */ + WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, + (uint32_t)(0x1 << GRBM_GFX_INDEX__SE_BROADCAST_WRITES__SHIFT)); + value = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid); + if (pipe_id == 0) + value = REG_SET_FIELD(value, CP_VMID_RESET, PIPE0_QUEUES, 1 << queue_id); + else + value = REG_SET_FIELD(value, CP_VMID_RESET, PIPE1_QUEUES, 1 << queue_id); + WREG32_SOC15(GC, 0, regCP_VMID_RESET, value); + gfx_v11_0_request_gfx_index_mutex(adev, false); + mutex_unlock(&adev->gfx.reset_sem_mutex); + + mutex_lock(&adev->srbm_mutex); + soc21_grbm_select(adev, me_id, pipe_id, queue_id, 0); + /* wait till dequeue take effects */ + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + if (i >= adev->usec_timeout) { + dev_err(adev->dev, "failed to wait on gfx hqd deactivate\n"); + r = -ETIMEDOUT; + } + + soc21_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + } else if (queue_type == AMDGPU_RING_TYPE_COMPUTE) { + dev_info(adev->dev, "reset compute queue (%d:%d:%d)\n", + me_id, pipe_id, queue_id); + mutex_lock(&adev->srbm_mutex); + soc21_grbm_select(adev, me_id, pipe_id, queue_id, 0); + WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2); + WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1); + + /* wait till dequeue take effects */ + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + if (i >= adev->usec_timeout) { + dev_err(adev->dev, "failed to wait on hqd deactivate\n"); + r = -ETIMEDOUT; + } + soc21_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + } else if (queue_type == AMDGPU_RING_TYPE_SDMA) { + dev_info(adev->dev, "reset sdma queue (%d:%d:%d)\n", + me_id, pipe_id, queue_id); + switch (me_id) { + case 1: + reg = SOC15_REG_OFFSET(GC, 0, regSDMA1_QUEUE_RESET_REQ); + break; + case 0: + default: + reg = SOC15_REG_OFFSET(GC, 0, regSDMA0_QUEUE_RESET_REQ); + break; + } + + value = 1 << queue_id; + WREG32(reg, value); + /* wait for queue reset done */ + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32(reg) & value)) + break; + udelay(1); + } + if (i >= adev->usec_timeout) { + dev_err(adev->dev, "failed to wait on sdma queue reset done\n"); + r = -ETIMEDOUT; + } + } + + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); + return r; +} + +static int mes_v11_0_map_legacy_queue(struct amdgpu_mes *mes, + struct mes_map_legacy_queue_input *input) +{ + union MESAPI__ADD_QUEUE mes_add_queue_pkt; + + memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt)); + + mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE; + mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_add_queue_pkt.pipe_id = input->pipe_id; + mes_add_queue_pkt.queue_id = input->queue_id; + mes_add_queue_pkt.doorbell_offset = input->doorbell_offset; + mes_add_queue_pkt.mqd_addr = input->mqd_addr; + mes_add_queue_pkt.wptr_addr = input->wptr_addr; + mes_add_queue_pkt.queue_type = + convert_to_mes_queue_type(input->queue_type); + mes_add_queue_pkt.map_legacy_kq = 1; + + return mes_v11_0_submit_pkt_and_poll_completion(mes, + &mes_add_queue_pkt, sizeof(mes_add_queue_pkt), + offsetof(union MESAPI__ADD_QUEUE, api_status)); +} + static int mes_v11_0_unmap_legacy_queue(struct amdgpu_mes *mes, struct mes_unmap_legacy_queue_input *input) { @@ -278,13 +536,41 @@ static int mes_v11_0_unmap_legacy_queue(struct amdgpu_mes *mes, static int mes_v11_0_suspend_gang(struct amdgpu_mes *mes, struct mes_suspend_gang_input *input) { - return 0; + union MESAPI__SUSPEND mes_suspend_gang_pkt; + + memset(&mes_suspend_gang_pkt, 0, sizeof(mes_suspend_gang_pkt)); + + mes_suspend_gang_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_suspend_gang_pkt.header.opcode = MES_SCH_API_SUSPEND; + mes_suspend_gang_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_suspend_gang_pkt.suspend_all_gangs = input->suspend_all_gangs; + mes_suspend_gang_pkt.gang_context_addr = input->gang_context_addr; + mes_suspend_gang_pkt.suspend_fence_addr = input->suspend_fence_addr; + mes_suspend_gang_pkt.suspend_fence_value = input->suspend_fence_value; + + return mes_v11_0_submit_pkt_and_poll_completion(mes, + &mes_suspend_gang_pkt, sizeof(mes_suspend_gang_pkt), + offsetof(union MESAPI__SUSPEND, api_status)); } static int mes_v11_0_resume_gang(struct amdgpu_mes *mes, struct mes_resume_gang_input *input) { - return 0; + union MESAPI__RESUME mes_resume_gang_pkt; + + memset(&mes_resume_gang_pkt, 0, sizeof(mes_resume_gang_pkt)); + + mes_resume_gang_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_resume_gang_pkt.header.opcode = MES_SCH_API_RESUME; + mes_resume_gang_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_resume_gang_pkt.resume_all_gangs = input->resume_all_gangs; + mes_resume_gang_pkt.gang_context_addr = input->gang_context_addr; + + return mes_v11_0_submit_pkt_and_poll_completion(mes, + &mes_resume_gang_pkt, sizeof(mes_resume_gang_pkt), + offsetof(union MESAPI__RESUME, api_status)); } static int mes_v11_0_query_sched_status(struct amdgpu_mes *mes) @@ -353,6 +639,18 @@ static int mes_v11_0_misc_op(struct amdgpu_mes *mes, sizeof(misc_pkt.set_shader_debugger.tcp_watch_cntl)); misc_pkt.set_shader_debugger.trap_en = input->set_shader_debugger.trap_en; break; + case MES_MISC_OP_CHANGE_CONFIG: + if ((mes->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) < 0x63) { + dev_err(mes->adev->dev, "MES FW version must be larger than 0x63 to support limit single process feature.\n"); + return -EINVAL; + } + misc_pkt.opcode = MESAPI_MISC__CHANGE_CONFIG; + misc_pkt.change_config.opcode = + MESAPI_MISC__CHANGE_CONFIG_OPTION_LIMIT_SINGLE_PROCESS; + misc_pkt.change_config.option.bits.limit_single_process = + input->change_config.option.limit_single_process; + break; + default: DRM_ERROR("unsupported misc op (%d) \n", input->op); return -EINVAL; @@ -379,16 +677,17 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub; mes_set_hw_res_pkt.gds_size = adev->gds.gds_size; mes_set_hw_res_pkt.paging_vmid = 0; - mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr; + mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr[0]; mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr = - mes->query_status_fence_gpu_addr; + mes->query_status_fence_gpu_addr[0]; for (i = 0; i < MAX_COMPUTE_PIPES; i++) mes_set_hw_res_pkt.compute_hqd_mask[i] = mes->compute_hqd_mask[i]; for (i = 0; i < MAX_GFX_PIPES; i++) - mes_set_hw_res_pkt.gfx_hqd_mask[i] = mes->gfx_hqd_mask[i]; + mes_set_hw_res_pkt.gfx_hqd_mask[i] = + mes->gfx_hqd_mask[i]; for (i = 0; i < MAX_SDMA_PIPES; i++) mes_set_hw_res_pkt.sdma_hqd_mask[i] = mes->sdma_hqd_mask[i]; @@ -417,22 +716,86 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) mes->event_log_gpu_addr; } + if (adev->enforce_isolation[0] == AMDGPU_ENFORCE_ISOLATION_ENABLE) + mes_set_hw_res_pkt.limit_single_process = 1; + return mes_v11_0_submit_pkt_and_poll_completion(mes, &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt), offsetof(union MESAPI_SET_HW_RESOURCES, api_status)); } +static int mes_v11_0_set_hw_resources_1(struct amdgpu_mes *mes) +{ + union MESAPI_SET_HW_RESOURCES_1 mes_set_hw_res_pkt; + memset(&mes_set_hw_res_pkt, 0, sizeof(mes_set_hw_res_pkt)); + + mes_set_hw_res_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC_1; + mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + mes_set_hw_res_pkt.enable_mes_info_ctx = 1; + + mes_set_hw_res_pkt.cleaner_shader_fence_mc_addr = mes->resource_1_gpu_addr[0]; + if (amdgpu_sriov_is_mes_info_enable(mes->adev)) { + mes_set_hw_res_pkt.mes_info_ctx_mc_addr = + mes->resource_1_gpu_addr[0] + AMDGPU_GPU_PAGE_SIZE; + mes_set_hw_res_pkt.mes_info_ctx_size = MES11_HW_RESOURCE_1_SIZE; + } + + return mes_v11_0_submit_pkt_and_poll_completion(mes, + &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt), + offsetof(union MESAPI_SET_HW_RESOURCES_1, api_status)); +} + +static int mes_v11_0_reset_hw_queue(struct amdgpu_mes *mes, + struct mes_reset_queue_input *input) +{ + union MESAPI__RESET mes_reset_queue_pkt; + + if (input->use_mmio) + return mes_v11_0_reset_queue_mmio(mes, input->queue_type, + input->me_id, input->pipe_id, + input->queue_id, input->vmid); + + memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt)); + + mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET; + mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + mes_reset_queue_pkt.queue_type = + convert_to_mes_queue_type(input->queue_type); + + if (input->legacy_gfx) { + mes_reset_queue_pkt.reset_legacy_gfx = 1; + mes_reset_queue_pkt.pipe_id_lp = input->pipe_id; + mes_reset_queue_pkt.queue_id_lp = input->queue_id; + mes_reset_queue_pkt.mqd_mc_addr_lp = input->mqd_addr; + mes_reset_queue_pkt.doorbell_offset_lp = input->doorbell_offset; + mes_reset_queue_pkt.wptr_addr_lp = input->wptr_addr; + mes_reset_queue_pkt.vmid_id_lp = input->vmid; + } else { + mes_reset_queue_pkt.reset_queue_only = 1; + mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset; + } + + return mes_v11_0_submit_pkt_and_poll_completion(mes, + &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt), + offsetof(union MESAPI__RESET, api_status)); +} + static const struct amdgpu_mes_funcs mes_v11_0_funcs = { .add_hw_queue = mes_v11_0_add_hw_queue, .remove_hw_queue = mes_v11_0_remove_hw_queue, + .map_legacy_queue = mes_v11_0_map_legacy_queue, .unmap_legacy_queue = mes_v11_0_unmap_legacy_queue, .suspend_gang = mes_v11_0_suspend_gang, .resume_gang = mes_v11_0_resume_gang, .misc_op = mes_v11_0_misc_op, + .reset_hw_queue = mes_v11_0_reset_hw_queue, }; static int mes_v11_0_allocate_ucode_buffer(struct amdgpu_device *adev, - enum admgpu_mes_pipe pipe) + enum amdgpu_mes_pipe pipe) { int r; const struct mes_firmware_header_v1_0 *mes_hdr; @@ -467,7 +830,7 @@ static int mes_v11_0_allocate_ucode_buffer(struct amdgpu_device *adev, } static int mes_v11_0_allocate_ucode_data_buffer(struct amdgpu_device *adev, - enum admgpu_mes_pipe pipe) + enum amdgpu_mes_pipe pipe) { int r; const struct mes_firmware_header_v1_0 *mes_hdr; @@ -508,7 +871,7 @@ static int mes_v11_0_allocate_ucode_data_buffer(struct amdgpu_device *adev, } static void mes_v11_0_free_ucode_buffers(struct amdgpu_device *adev, - enum admgpu_mes_pipe pipe) + enum amdgpu_mes_pipe pipe) { amdgpu_bo_free_kernel(&adev->mes.data_fw_obj[pipe], &adev->mes.data_fw_gpu_addr[pipe], @@ -519,12 +882,48 @@ static void mes_v11_0_free_ucode_buffers(struct amdgpu_device *adev, (void **)&adev->mes.ucode_fw_ptr[pipe]); } +static void mes_v11_0_get_fw_version(struct amdgpu_device *adev) +{ + int pipe; + + /* return early if we have already fetched these */ + if (adev->mes.sched_version && adev->mes.kiq_version) + return; + + /* get MES scheduler/KIQ versions */ + mutex_lock(&adev->srbm_mutex); + + for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { + soc21_grbm_select(adev, 3, pipe, 0, 0); + + if (pipe == AMDGPU_MES_SCHED_PIPE) + adev->mes.sched_version = + RREG32_SOC15(GC, 0, regCP_MES_GP3_LO); + else if (pipe == AMDGPU_MES_KIQ_PIPE && adev->enable_mes_kiq) + adev->mes.kiq_version = + RREG32_SOC15(GC, 0, regCP_MES_GP3_LO); + } + + soc21_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); +} + static void mes_v11_0_enable(struct amdgpu_device *adev, bool enable) { uint64_t ucode_addr; uint32_t pipe, data = 0; if (enable) { + if (amdgpu_mes_log_enable) { + WREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_LO, + lower_32_bits(adev->mes.event_log_gpu_addr + AMDGPU_MES_LOG_BUFFER_SIZE)); + WREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_HI, + upper_32_bits(adev->mes.event_log_gpu_addr + AMDGPU_MES_LOG_BUFFER_SIZE)); + dev_info(adev->dev, "Setup CP MES MSCRATCH address : 0x%x. 0x%x\n", + RREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_HI), + RREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_LO)); + } + data = RREG32_SOC15(GC, 0, regCP_MES_CNTL); data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1); data = REG_SET_FIELD(data, CP_MES_CNTL, @@ -557,7 +956,7 @@ static void mes_v11_0_enable(struct amdgpu_device *adev, bool enable) if (amdgpu_emu_mode) msleep(100); else - udelay(50); + udelay(500); } else { data = RREG32_SOC15(GC, 0, regCP_MES_CNTL); data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_ACTIVE, 0); @@ -574,7 +973,7 @@ static void mes_v11_0_enable(struct amdgpu_device *adev, bool enable) /* This function is for backdoor MES firmware */ static int mes_v11_0_load_microcode(struct amdgpu_device *adev, - enum admgpu_mes_pipe pipe, bool prime_icache) + enum amdgpu_mes_pipe pipe, bool prime_icache) { int r; uint32_t data; @@ -646,7 +1045,7 @@ static int mes_v11_0_load_microcode(struct amdgpu_device *adev, } static int mes_v11_0_allocate_eop_buf(struct amdgpu_device *adev, - enum admgpu_mes_pipe pipe) + enum amdgpu_mes_pipe pipe) { int r; u32 *eop; @@ -851,13 +1250,13 @@ static int mes_v11_0_kiq_enable_queue(struct amdgpu_device *adev) return r; } - kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring); + kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring[0]); return amdgpu_ring_test_helper(kiq_ring); } static int mes_v11_0_queue_init(struct amdgpu_device *adev, - enum admgpu_mes_pipe pipe) + enum amdgpu_mes_pipe pipe) { struct amdgpu_ring *ring; int r; @@ -865,7 +1264,7 @@ static int mes_v11_0_queue_init(struct amdgpu_device *adev, if (pipe == AMDGPU_MES_KIQ_PIPE) ring = &adev->gfx.kiq[0].ring; else if (pipe == AMDGPU_MES_SCHED_PIPE) - ring = &adev->mes.ring; + ring = &adev->mes.ring[0]; else BUG(); @@ -888,18 +1287,6 @@ static int mes_v11_0_queue_init(struct amdgpu_device *adev, mes_v11_0_queue_init_register(ring); } - /* get MES scheduler/KIQ versions */ - mutex_lock(&adev->srbm_mutex); - soc21_grbm_select(adev, 3, pipe, 0, 0); - - if (pipe == AMDGPU_MES_SCHED_PIPE) - adev->mes.sched_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO); - else if (pipe == AMDGPU_MES_KIQ_PIPE && adev->enable_mes_kiq) - adev->mes.kiq_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO); - - soc21_grbm_select(adev, 0, 0, 0, 0); - mutex_unlock(&adev->srbm_mutex); - return 0; } @@ -907,7 +1294,7 @@ static int mes_v11_0_ring_init(struct amdgpu_device *adev) { struct amdgpu_ring *ring; - ring = &adev->mes.ring; + ring = &adev->mes.ring[0]; ring->funcs = &mes_v11_0_ring_funcs; @@ -952,7 +1339,7 @@ static int mes_v11_0_kiq_ring_init(struct amdgpu_device *adev) } static int mes_v11_0_mqd_sw_init(struct amdgpu_device *adev, - enum admgpu_mes_pipe pipe) + enum amdgpu_mes_pipe pipe) { int r, mqd_size = sizeof(struct v11_compute_mqd); struct amdgpu_ring *ring; @@ -960,7 +1347,7 @@ static int mes_v11_0_mqd_sw_init(struct amdgpu_device *adev, if (pipe == AMDGPU_MES_KIQ_PIPE) ring = &adev->gfx.kiq[0].ring; else if (pipe == AMDGPU_MES_SCHED_PIPE) - ring = &adev->mes.ring; + ring = &adev->mes.ring[0]; else BUG(); @@ -990,15 +1377,17 @@ static int mes_v11_0_mqd_sw_init(struct amdgpu_device *adev, return 0; } -static int mes_v11_0_sw_init(void *handle) +static int mes_v11_0_sw_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int pipe, r; + struct amdgpu_device *adev = ip_block->adev; + int pipe, r, bo_size; adev->mes.funcs = &mes_v11_0_funcs; adev->mes.kiq_hw_init = &mes_v11_0_kiq_hw_init; adev->mes.kiq_hw_fini = &mes_v11_0_kiq_hw_fini; + adev->mes.event_log_size = AMDGPU_MES_LOG_BUFFER_SIZE + AMDGPU_MES_MSCRATCH_SIZE; + r = amdgpu_mes_init(adev); if (r) return r; @@ -1026,16 +1415,33 @@ static int mes_v11_0_sw_init(void *handle) if (r) return r; + bo_size = AMDGPU_GPU_PAGE_SIZE; + if (amdgpu_sriov_is_mes_info_enable(adev)) + bo_size += MES11_HW_RESOURCE_1_SIZE; + + /* Only needed for AMDGPU_MES_SCHED_PIPE on MES 11*/ + r = amdgpu_bo_create_kernel(adev, + bo_size, + PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->mes.resource_1[0], + &adev->mes.resource_1_gpu_addr[0], + &adev->mes.resource_1_addr[0]); + if (r) { + dev_err(adev->dev, "(%d) failed to create mes resource_1 bo\n", r); + return r; + } + return 0; } -static int mes_v11_0_sw_fini(void *handle) +static int mes_v11_0_sw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int pipe; - amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); - amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); + amdgpu_bo_free_kernel(&adev->mes.resource_1[0], &adev->mes.resource_1_gpu_addr[0], + &adev->mes.resource_1_addr[0]); for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { kfree(adev->mes.mqd_backup[pipe]); @@ -1050,12 +1456,12 @@ static int mes_v11_0_sw_fini(void *handle) &adev->gfx.kiq[0].ring.mqd_gpu_addr, &adev->gfx.kiq[0].ring.mqd_ptr); - amdgpu_bo_free_kernel(&adev->mes.ring.mqd_obj, - &adev->mes.ring.mqd_gpu_addr, - &adev->mes.ring.mqd_ptr); + amdgpu_bo_free_kernel(&adev->mes.ring[0].mqd_obj, + &adev->mes.ring[0].mqd_gpu_addr, + &adev->mes.ring[0].mqd_ptr); amdgpu_ring_fini(&adev->gfx.kiq[0].ring); - amdgpu_ring_fini(&adev->mes.ring); + amdgpu_ring_fini(&adev->mes.ring[0]); if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { mes_v11_0_free_ucode_buffers(adev, AMDGPU_MES_KIQ_PIPE); @@ -1110,9 +1516,7 @@ static void mes_v11_0_kiq_setting(struct amdgpu_ring *ring) tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS); tmp &= 0xffffff00; tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); - WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp); - tmp |= 0x80; - WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp); + WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp | 0x80); } static void mes_v11_0_kiq_clear(struct amdgpu_device *adev) @@ -1128,6 +1532,7 @@ static void mes_v11_0_kiq_clear(struct amdgpu_device *adev) static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev) { int r = 0; + struct amdgpu_ip_block *ip_block; if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { @@ -1147,24 +1552,43 @@ static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev) mes_v11_0_enable(adev, true); + mes_v11_0_get_fw_version(adev); + mes_v11_0_kiq_setting(&adev->gfx.kiq[0].ring); + ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_MES); + if (unlikely(!ip_block)) { + dev_err(adev->dev, "Failed to get MES handle\n"); + return -EINVAL; + } + r = mes_v11_0_queue_init(adev, AMDGPU_MES_KIQ_PIPE); if (r) goto failure; + if ((adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x47) + adev->mes.enable_legacy_queue_map = true; + else + adev->mes.enable_legacy_queue_map = false; + + if (adev->mes.enable_legacy_queue_map) { + r = mes_v11_0_hw_init(ip_block); + if (r) + goto failure; + } + return r; failure: - mes_v11_0_hw_fini(adev); + mes_v11_0_hw_fini(ip_block); return r; } static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev) { - if (adev->mes.ring.sched.ready) { - mes_v11_0_kiq_dequeue(&adev->mes.ring); - adev->mes.ring.sched.ready = false; + if (adev->mes.ring[0].sched.ready) { + mes_v11_0_kiq_dequeue(&adev->mes.ring[0]); + adev->mes.ring[0].sched.ready = false; } if (amdgpu_sriov_vf(adev)) { @@ -1177,10 +1601,13 @@ static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev) return 0; } -static int mes_v11_0_hw_init(void *handle) +static int mes_v11_0_hw_init(struct amdgpu_ip_block *ip_block) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; + + if (adev->mes.ring[0].sched.ready) + goto out; if (!adev->enable_mes_kiq) { if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { @@ -1203,59 +1630,56 @@ static int mes_v11_0_hw_init(void *handle) if (r) goto failure; + r = mes_v11_0_set_hw_resources_1(&adev->mes); + if (r) { + DRM_ERROR("failed mes_v11_0_set_hw_resources_1, r=%d\n", r); + goto failure; + } + r = mes_v11_0_query_sched_status(&adev->mes); if (r) { DRM_ERROR("MES is busy\n"); goto failure; } + r = amdgpu_mes_update_enforce_isolation(adev); + if (r) + goto failure; + +out: /* * Disable KIQ ring usage from the driver once MES is enabled. * MES uses KIQ ring exclusively so driver cannot access KIQ ring * with MES enabled. */ adev->gfx.kiq[0].ring.sched.ready = false; - adev->mes.ring.sched.ready = true; + adev->mes.ring[0].sched.ready = true; return 0; failure: - mes_v11_0_hw_fini(adev); + mes_v11_0_hw_fini(ip_block); return r; } -static int mes_v11_0_hw_fini(void *handle) +static int mes_v11_0_hw_fini(struct amdgpu_ip_block *ip_block) { return 0; } -static int mes_v11_0_suspend(void *handle) +static int mes_v11_0_suspend(struct amdgpu_ip_block *ip_block) { - int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - r = amdgpu_mes_suspend(adev); - if (r) - return r; - - return mes_v11_0_hw_fini(adev); + return mes_v11_0_hw_fini(ip_block); } -static int mes_v11_0_resume(void *handle) +static int mes_v11_0_resume(struct amdgpu_ip_block *ip_block) { - int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - r = mes_v11_0_hw_init(adev); - if (r) - return r; - - return amdgpu_mes_resume(adev); + return mes_v11_0_hw_init(ip_block); } -static int mes_v11_0_early_init(void *handle) +static int mes_v11_0_early_init(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int pipe, r; for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { @@ -1269,22 +1693,10 @@ static int mes_v11_0_early_init(void *handle) return 0; } -static int mes_v11_0_late_init(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - /* it's only intended for use in mes_self_test case, not for s0ix and reset */ - if (!amdgpu_in_reset(adev) && !adev->in_s0ix && !adev->in_suspend && - (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(11, 0, 3))) - amdgpu_mes_self_test(adev); - - return 0; -} - static const struct amd_ip_funcs mes_v11_0_ip_funcs = { .name = "mes_v11_0", .early_init = mes_v11_0_early_init, - .late_init = mes_v11_0_late_init, + .late_init = NULL, .sw_init = mes_v11_0_sw_init, .sw_fini = mes_v11_0_sw_fini, .hw_init = mes_v11_0_hw_init, |