From e77a541f5dea0a2ff9d6a40dcda9b284e1e736fe Mon Sep 17 00:00:00 2001 From: Graham Sider Date: Wed, 11 May 2022 12:33:35 -0400 Subject: drm/amdkfd: Enable GFX11 usermode queue oversubscription MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Starting with GFX11, MES requires wptr BOs to be GTT allocated/mapped to GART for usermode queues in order to support oversubscription. In the case that work is submitted to an unmapped queue, MES must have a GART wptr address to determine whether the queue should be mapped. This change is accompanied with changes in MES and is applicable for MES_API_VERSION >= 2. v3: - Use amdgpu_vm_bo_lookup_mapping for wptr_bo mapping lookup - Move wptr_bo refcount increment to amdgpu_amdkfd_map_gtt_bo_to_gart - Remove list_del_init from amdgpu_amdkfd_map_gtt_bo_to_gart - Cleanup/fix create_queue wptr_bo error handling v4: - Add MES version shift/mask defines to amdgpu_mes.h - Change version check from MES_VERSION to MES_API_VERSION - Add check in kfd_ioctl_create_queue before wptr bo pin/GART map to ensure bo is a single page. Signed-off-by: Graham Sider Acked-by: Alex Deucher Acked-by: Christian König Reviewed-by: Philip Yang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 213246a5b4e4..299927a4959b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -177,6 +177,7 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q, struct kfd_process_device *pdd = qpd_to_pdd(qpd); struct mes_add_queue_input queue_input; int r, queue_type; + uint64_t wptr_addr_off; if (dqm->is_hws_hang) return -EIO; @@ -196,7 +197,13 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q, AMDGPU_MES_PRIORITY_LEVEL_NORMAL; queue_input.doorbell_offset = q->properties.doorbell_off; queue_input.mqd_addr = q->gart_mqd_addr; - queue_input.wptr_addr = (uint64_t)q->properties.write_ptr; + + if (q->wptr_bo) { + wptr_addr_off = (uint64_t)q->properties.write_ptr - (uint64_t)q->wptr_bo->kfd_bo->va; + queue_input.wptr_addr = ((uint64_t)q->wptr_bo->tbo.resource->start << PAGE_SHIFT) + wptr_addr_off; + } else + queue_input.wptr_addr = (uint64_t)q->properties.write_ptr; + queue_input.paging = false; queue_input.tba_addr = qpd->tba_addr; queue_input.tma_addr = qpd->tma_addr; -- cgit