diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 240 |
1 files changed, 175 insertions, 65 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index adb5e4bdc0b2..7fbb5c274ccc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -28,6 +28,7 @@ #include "kfd_priv.h" #include "kfd_kernel_queue.h" #include "amdgpu_amdkfd.h" +#include "amdgpu_reset.h" static inline struct process_queue_node *get_queue_by_qid( struct process_queue_manager *pqm, unsigned int qid) @@ -68,8 +69,8 @@ static int find_available_queue_slot(struct process_queue_manager *pqm, pr_debug("The new slot id %lu\n", found); if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) { - pr_info("Cannot open more queues for process with pasid 0x%x\n", - pqm->process->pasid); + pr_info("Cannot open more queues for process with pid %d\n", + pqm->process->lead_thread->pid); return -ENOMEM; } @@ -85,14 +86,24 @@ void kfd_process_dequeue_from_device(struct kfd_process_device *pdd) if (pdd->already_dequeued) return; - + /* The MES context flush needs to filter out the case which the + * KFD process is created without setting up the MES context and + * queue for creating a compute queue. + */ dev->dqm->ops.process_termination(dev->dqm, &pdd->qpd); + if (dev->kfd->shared_resources.enable_mes && !!pdd->proc_ctx_gpu_addr && + down_read_trylock(&dev->adev->reset_domain->sem)) { + amdgpu_mes_flush_shader_debugger(dev->adev, + pdd->proc_ctx_gpu_addr); + up_read(&dev->adev->reset_domain->sem); + } pdd->already_dequeued = true; } int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid, void *gws) { + struct mqd_update_info minfo = {0}; struct kfd_node *dev = NULL; struct process_queue_node *pqn; struct kfd_process_device *pdd; @@ -123,7 +134,10 @@ int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid, if (!gws && pdd->qpd.num_gws == 0) return -EINVAL; - if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3) && !dev->kfd->shared_resources.enable_mes) { + if ((KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3) && + KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 4) && + KFD_GC_VERSION(dev) != IP_VERSION(9, 5, 0)) && + !dev->kfd->shared_resources.enable_mes) { if (gws) ret = amdgpu_amdkfd_add_gws_to_process(pdd->process->kgd_process_info, gws, &mem); @@ -144,9 +158,10 @@ int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid, } pdd->qpd.num_gws = gws ? dev->adev->gds.gws_size : 0; + minfo.update_flag = gws ? UPDATE_FLAG_IS_GWS : 0; return pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm, - pqn->q, NULL); + pqn->q, &minfo); } void kfd_process_dequeue_from_all_devices(struct kfd_process *p) @@ -169,16 +184,53 @@ int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p) return 0; } +static void pqm_clean_queue_resource(struct process_queue_manager *pqm, + struct process_queue_node *pqn) +{ + struct kfd_node *dev; + struct kfd_process_device *pdd; + + dev = pqn->q->device; + + pdd = kfd_get_process_device_data(dev, pqm->process); + if (!pdd) { + pr_err("Process device data doesn't exist\n"); + return; + } + + if (pqn->q->gws) { + if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) && + KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 4) && + KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 5, 0) && + !dev->kfd->shared_resources.enable_mes) + amdgpu_amdkfd_remove_gws_from_process( + pqm->process->kgd_process_info, pqn->q->gws); + pdd->qpd.num_gws = 0; + } + + if (dev->kfd->shared_resources.enable_mes) { + amdgpu_amdkfd_free_gtt_mem(dev->adev, &pqn->q->gang_ctx_bo); + amdgpu_amdkfd_free_gtt_mem(dev->adev, (void **)&pqn->q->wptr_bo_gart); + } +} + void pqm_uninit(struct process_queue_manager *pqm) { struct process_queue_node *pqn, *next; list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) { - if (pqn->q && pqn->q->gws && - KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) && - !pqn->q->device->kfd->shared_resources.enable_mes) - amdgpu_amdkfd_remove_gws_from_process(pqm->process->kgd_process_info, - pqn->q->gws); + if (pqn->q) { + struct kfd_process_device *pdd = kfd_get_process_device_data(pqn->q->device, + pqm->process); + if (pdd) { + kfd_queue_unref_bo_vas(pdd, &pqn->q->properties); + kfd_queue_release_buffers(pdd, &pqn->q->properties); + } else { + WARN_ON(!pdd); + } + pqm_clean_queue_resource(pqm, pqn); + } + kfd_procfs_del_queue(pqn->q); uninit_queue(pqn->q); list_del(&pqn->process_queue_list); @@ -192,7 +244,6 @@ void pqm_uninit(struct process_queue_manager *pqm) static int init_user_queue(struct process_queue_manager *pqm, struct kfd_node *dev, struct queue **q, struct queue_properties *q_properties, - struct file *f, struct amdgpu_bo *wptr_bo, unsigned int qid) { int retval; @@ -224,12 +275,29 @@ static int init_user_queue(struct process_queue_manager *pqm, goto cleanup; } memset((*q)->gang_ctx_cpu_ptr, 0, AMDGPU_MES_GANG_CTX_SIZE); - (*q)->wptr_bo = wptr_bo; + + /* Starting with GFX11, wptr BOs must be mapped to GART for MES to determine work + * on unmapped queues for usermode queue oversubscription (no aggregated doorbell) + */ + if (dev->adev != amdgpu_ttm_adev(q_properties->wptr_bo->tbo.bdev)) { + pr_err("Queue memory allocated to wrong device\n"); + retval = -EINVAL; + goto free_gang_ctx_bo; + } + + retval = amdgpu_amdkfd_map_gtt_bo_to_gart(q_properties->wptr_bo, + &(*q)->wptr_bo_gart); + if (retval) { + pr_err("Failed to map wptr bo to GART\n"); + goto free_gang_ctx_bo; + } } pr_debug("PQM After init queue"); return 0; +free_gang_ctx_bo: + amdgpu_amdkfd_free_gtt_mem(dev->adev, &(*q)->gang_ctx_bo); cleanup: uninit_queue(*q); *q = NULL; @@ -238,10 +306,8 @@ cleanup: int pqm_create_queue(struct process_queue_manager *pqm, struct kfd_node *dev, - struct file *f, struct queue_properties *properties, unsigned int *qid, - struct amdgpu_bo *wptr_bo, const struct kfd_criu_queue_priv_data *q_data, const void *restore_mqd, const void *restore_ctl_stack, @@ -256,10 +322,12 @@ int pqm_create_queue(struct process_queue_manager *pqm, unsigned int max_queues = 127; /* HWS limit */ /* - * On GFX 9.4.3, increase the number of queues that - * can be created to 255. No HWS limit on GFX 9.4.3. + * On GFX 9.4.3/9.5.0, increase the number of queues that + * can be created to 255. No HWS limit on GFX 9.4.3/9.5.0. */ - if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3)) + if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) || + KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4) || + KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0)) max_queues = 255; q = NULL; @@ -292,10 +360,26 @@ int pqm_create_queue(struct process_queue_manager *pqm, if (retval != 0) return retval; + /* Register process if this is the first queue */ if (list_empty(&pdd->qpd.queues_list) && list_empty(&pdd->qpd.priv_queue_list)) dev->dqm->ops.register_process(dev->dqm, &pdd->qpd); + /* Allocate proc_ctx_bo only if MES is enabled and this is the first queue */ + if (!pdd->proc_ctx_cpu_ptr && dev->kfd->shared_resources.enable_mes) { + retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, + AMDGPU_MES_PROC_CTX_SIZE, + &pdd->proc_ctx_bo, + &pdd->proc_ctx_gpu_addr, + &pdd->proc_ctx_cpu_ptr, + false); + if (retval) { + dev_err(dev->adev->dev, "failed to allocate process context bo\n"); + return retval; + } + memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE); + } + pqn = kzalloc(sizeof(*pqn), GFP_KERNEL); if (!pqn) { retval = -ENOMEM; @@ -305,13 +389,14 @@ int pqm_create_queue(struct process_queue_manager *pqm, switch (type) { case KFD_QUEUE_TYPE_SDMA: case KFD_QUEUE_TYPE_SDMA_XGMI: + case KFD_QUEUE_TYPE_SDMA_BY_ENG_ID: /* SDMA queues are always allocated statically no matter * which scheduler mode is used. We also do not need to * check whether a SDMA queue can be allocated here, because * allocate_sdma_queue() in create_queue() has the * corresponding check logic. */ - retval = init_user_queue(pqm, dev, &q, properties, f, wptr_bo, *qid); + retval = init_user_queue(pqm, dev, &q, properties, *qid); if (retval != 0) goto err_create_queue; pqn->q = q; @@ -332,7 +417,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, goto err_create_queue; } - retval = init_user_queue(pqm, dev, &q, properties, f, wptr_bo, *qid); + retval = init_user_queue(pqm, dev, &q, properties, *qid); if (retval != 0) goto err_create_queue; pqn->q = q; @@ -363,8 +448,15 @@ int pqm_create_queue(struct process_queue_manager *pqm, } if (retval != 0) { - pr_err("Pasid 0x%x DQM create queue type %d failed. ret %d\n", - pqm->process->pasid, type, retval); + if ((type == KFD_QUEUE_TYPE_SDMA || + type == KFD_QUEUE_TYPE_SDMA_XGMI || + type == KFD_QUEUE_TYPE_SDMA_BY_ENG_ID) && + retval == -ENOMEM) + pr_warn("process pid %d DQM create queue type %d failed. ret %d\n", + pqm->process->lead_thread->pid, type, retval); + else + pr_err("process pid %d DQM create queue type %d failed. ret %d\n", + pqm->process->lead_thread->pid, type, retval); goto err_create_queue; } @@ -377,7 +469,8 @@ int pqm_create_queue(struct process_queue_manager *pqm, */ uint32_t first_db_index = amdgpu_doorbell_index_on_bar(pdd->dev->adev, pdd->qpd.proc_doorbells, - 0); + 0, + pdd->dev->kfd->device_info.doorbell_size); *p_doorbell_offset_in_process = (q->properties.doorbell_off - first_db_index) * sizeof(uint32_t); @@ -398,7 +491,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, err_create_queue: uninit_queue(q); if (kq) - kernel_queue_uninit(kq, false); + kernel_queue_uninit(kq); kfree(pqn); err_allocate_pqn: /* check if queues list is empty unregister process from device */ @@ -445,37 +538,26 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) /* destroy kernel queue (DIQ) */ dqm = pqn->kq->dev->dqm; dqm->ops.destroy_kernel_queue(dqm, pqn->kq, &pdd->qpd); - kernel_queue_uninit(pqn->kq, false); + kernel_queue_uninit(pqn->kq); } if (pqn->q) { - kfd_procfs_del_queue(pqn->q); + retval = kfd_queue_unref_bo_vas(pdd, &pqn->q->properties); + if (retval) + goto err_destroy_queue; + dqm = pqn->q->device->dqm; retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q); if (retval) { pr_err("Pasid 0x%x destroy queue %d failed, ret %d\n", - pqm->process->pasid, + pdd->pasid, pqn->q->properties.queue_id, retval); - if (retval != -ETIME) + if (retval != -ETIME && retval != -EIO) goto err_destroy_queue; } - - if (pqn->q->gws) { - if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) && - !dev->kfd->shared_resources.enable_mes) - amdgpu_amdkfd_remove_gws_from_process( - pqm->process->kgd_process_info, - pqn->q->gws); - pdd->qpd.num_gws = 0; - } - - if (dev->kfd->shared_resources.enable_mes) { - amdgpu_amdkfd_free_gtt_mem(dev->adev, - pqn->q->gang_ctx_bo); - if (pqn->q->wptr_bo) - amdgpu_amdkfd_free_gtt_mem(dev->adev, pqn->q->wptr_bo); - - } + kfd_procfs_del_queue(pqn->q); + kfd_queue_release_buffers(pdd, &pqn->q->properties); + pqm_clean_queue_resource(pqm, pqn); uninit_queue(pqn->q); } @@ -498,11 +580,42 @@ int pqm_update_queue_properties(struct process_queue_manager *pqm, struct process_queue_node *pqn; pqn = get_queue_by_qid(pqm, qid); - if (!pqn) { + if (!pqn || !pqn->q) { pr_debug("No queue %d exists for update operation\n", qid); return -EFAULT; } + /* + * Update with NULL ring address is used to disable the queue + */ + if (p->queue_address && p->queue_size) { + struct kfd_process_device *pdd; + struct amdgpu_vm *vm; + struct queue *q = pqn->q; + int err; + + pdd = kfd_get_process_device_data(q->device, q->process); + if (!pdd) + return -ENODEV; + vm = drm_priv_to_vm(pdd->drm_priv); + err = amdgpu_bo_reserve(vm->root.bo, false); + if (err) + return err; + + if (kfd_queue_buffer_get(vm, (void *)p->queue_address, &p->ring_bo, + p->queue_size)) { + pr_debug("ring buf 0x%llx size 0x%llx not mapped on GPU\n", + p->queue_address, p->queue_size); + return -EFAULT; + } + + kfd_queue_unref_bo_va(vm, &pqn->q->properties.ring_bo); + kfd_queue_buffer_put(&pqn->q->properties.ring_bo); + amdgpu_bo_unreserve(vm->root.bo); + + pqn->q->properties.ring_bo = p->ring_bo; + } + pqn->q->properties.queue_address = p->queue_address; pqn->q->properties.queue_size = p->queue_size; pqn->q->properties.queue_percent = p->queue_percent; @@ -559,19 +672,6 @@ int pqm_update_mqd(struct process_queue_manager *pqm, return 0; } -struct kernel_queue *pqm_get_kernel_queue( - struct process_queue_manager *pqm, - unsigned int qid) -{ - struct process_queue_node *pqn; - - pqn = get_queue_by_qid(pqm, qid); - if (pqn && pqn->kq) - return pqn->kq; - - return NULL; -} - struct queue *pqm_get_user_queue(struct process_queue_manager *pqm, unsigned int qid) { @@ -814,7 +914,10 @@ static int criu_checkpoint_queues_device(struct kfd_process_device *pdd, q_data = (struct kfd_criu_queue_priv_data *)q_private_data; - /* data stored in this order: priv_data, mqd, ctl_stack */ + /* + * data stored in this order: + * priv_data, mqd[xcc0], mqd[xcc1],..., ctl_stack[xcc0], ctl_stack[xcc1]... + */ q_data->mqd_size = mqd_size; q_data->ctl_stack_size = ctl_stack_size; @@ -863,7 +966,7 @@ int kfd_criu_checkpoint_queues(struct kfd_process *p, } static void set_queue_properties_from_criu(struct queue_properties *qp, - struct kfd_criu_queue_priv_data *q_data) + struct kfd_criu_queue_priv_data *q_data, uint32_t num_xcc) { qp->is_interop = false; qp->queue_percent = q_data->q_percent; @@ -876,7 +979,11 @@ static void set_queue_properties_from_criu(struct queue_properties *qp, qp->eop_ring_buffer_size = q_data->eop_ring_buffer_size; qp->ctx_save_restore_area_address = q_data->ctx_save_restore_area_address; qp->ctx_save_restore_area_size = q_data->ctx_save_restore_area_size; - qp->ctl_stack_size = q_data->ctl_stack_size; + if (q_data->type == KFD_QUEUE_TYPE_COMPUTE) + qp->ctl_stack_size = q_data->ctl_stack_size / num_xcc; + else + qp->ctl_stack_size = q_data->ctl_stack_size; + qp->type = q_data->type; qp->format = q_data->format; } @@ -936,17 +1043,19 @@ int kfd_criu_restore_queue(struct kfd_process *p, goto exit; } - /* data stored in this order: mqd, ctl_stack */ + /* + * data stored in this order: + * mqd[xcc0], mqd[xcc1],..., ctl_stack[xcc0], ctl_stack[xcc1]... + */ mqd = q_extra_data; ctl_stack = mqd + q_data->mqd_size; memset(&qp, 0, sizeof(qp)); - set_queue_properties_from_criu(&qp, q_data); + set_queue_properties_from_criu(&qp, q_data, NUM_XCC(pdd->dev->adev->gfx.xcc_mask)); print_queue_properties(&qp); - ret = pqm_create_queue(&p->pqm, pdd->dev, NULL, &qp, &queue_id, NULL, q_data, mqd, ctl_stack, - NULL); + ret = pqm_create_queue(&p->pqm, pdd->dev, &qp, &queue_id, q_data, mqd, ctl_stack, NULL); if (ret) { pr_err("Failed to create new queue err:%d\n", ret); goto exit; @@ -962,6 +1071,7 @@ exit: pr_debug("Queue id %d was restored successfully\n", queue_id); kfree(q_data); + kfree(q_extra_data); return ret; } |
