summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c240
1 files changed, 175 insertions, 65 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index adb5e4bdc0b2..7fbb5c274ccc 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -28,6 +28,7 @@
#include "kfd_priv.h"
#include "kfd_kernel_queue.h"
#include "amdgpu_amdkfd.h"
+#include "amdgpu_reset.h"
static inline struct process_queue_node *get_queue_by_qid(
struct process_queue_manager *pqm, unsigned int qid)
@@ -68,8 +69,8 @@ static int find_available_queue_slot(struct process_queue_manager *pqm,
pr_debug("The new slot id %lu\n", found);
if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
- pr_info("Cannot open more queues for process with pasid 0x%x\n",
- pqm->process->pasid);
+ pr_info("Cannot open more queues for process with pid %d\n",
+ pqm->process->lead_thread->pid);
return -ENOMEM;
}
@@ -85,14 +86,24 @@ void kfd_process_dequeue_from_device(struct kfd_process_device *pdd)
if (pdd->already_dequeued)
return;
-
+ /* The MES context flush needs to filter out the case which the
+ * KFD process is created without setting up the MES context and
+ * queue for creating a compute queue.
+ */
dev->dqm->ops.process_termination(dev->dqm, &pdd->qpd);
+ if (dev->kfd->shared_resources.enable_mes && !!pdd->proc_ctx_gpu_addr &&
+ down_read_trylock(&dev->adev->reset_domain->sem)) {
+ amdgpu_mes_flush_shader_debugger(dev->adev,
+ pdd->proc_ctx_gpu_addr);
+ up_read(&dev->adev->reset_domain->sem);
+ }
pdd->already_dequeued = true;
}
int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
void *gws)
{
+ struct mqd_update_info minfo = {0};
struct kfd_node *dev = NULL;
struct process_queue_node *pqn;
struct kfd_process_device *pdd;
@@ -123,7 +134,10 @@ int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
if (!gws && pdd->qpd.num_gws == 0)
return -EINVAL;
- if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3) && !dev->kfd->shared_resources.enable_mes) {
+ if ((KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3) &&
+ KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 4) &&
+ KFD_GC_VERSION(dev) != IP_VERSION(9, 5, 0)) &&
+ !dev->kfd->shared_resources.enable_mes) {
if (gws)
ret = amdgpu_amdkfd_add_gws_to_process(pdd->process->kgd_process_info,
gws, &mem);
@@ -144,9 +158,10 @@ int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
}
pdd->qpd.num_gws = gws ? dev->adev->gds.gws_size : 0;
+ minfo.update_flag = gws ? UPDATE_FLAG_IS_GWS : 0;
return pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
- pqn->q, NULL);
+ pqn->q, &minfo);
}
void kfd_process_dequeue_from_all_devices(struct kfd_process *p)
@@ -169,16 +184,53 @@ int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p)
return 0;
}
+static void pqm_clean_queue_resource(struct process_queue_manager *pqm,
+ struct process_queue_node *pqn)
+{
+ struct kfd_node *dev;
+ struct kfd_process_device *pdd;
+
+ dev = pqn->q->device;
+
+ pdd = kfd_get_process_device_data(dev, pqm->process);
+ if (!pdd) {
+ pr_err("Process device data doesn't exist\n");
+ return;
+ }
+
+ if (pqn->q->gws) {
+ if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) &&
+ KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 4) &&
+ KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 5, 0) &&
+ !dev->kfd->shared_resources.enable_mes)
+ amdgpu_amdkfd_remove_gws_from_process(
+ pqm->process->kgd_process_info, pqn->q->gws);
+ pdd->qpd.num_gws = 0;
+ }
+
+ if (dev->kfd->shared_resources.enable_mes) {
+ amdgpu_amdkfd_free_gtt_mem(dev->adev, &pqn->q->gang_ctx_bo);
+ amdgpu_amdkfd_free_gtt_mem(dev->adev, (void **)&pqn->q->wptr_bo_gart);
+ }
+}
+
void pqm_uninit(struct process_queue_manager *pqm)
{
struct process_queue_node *pqn, *next;
list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) {
- if (pqn->q && pqn->q->gws &&
- KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) &&
- !pqn->q->device->kfd->shared_resources.enable_mes)
- amdgpu_amdkfd_remove_gws_from_process(pqm->process->kgd_process_info,
- pqn->q->gws);
+ if (pqn->q) {
+ struct kfd_process_device *pdd = kfd_get_process_device_data(pqn->q->device,
+ pqm->process);
+ if (pdd) {
+ kfd_queue_unref_bo_vas(pdd, &pqn->q->properties);
+ kfd_queue_release_buffers(pdd, &pqn->q->properties);
+ } else {
+ WARN_ON(!pdd);
+ }
+ pqm_clean_queue_resource(pqm, pqn);
+ }
+
kfd_procfs_del_queue(pqn->q);
uninit_queue(pqn->q);
list_del(&pqn->process_queue_list);
@@ -192,7 +244,6 @@ void pqm_uninit(struct process_queue_manager *pqm)
static int init_user_queue(struct process_queue_manager *pqm,
struct kfd_node *dev, struct queue **q,
struct queue_properties *q_properties,
- struct file *f, struct amdgpu_bo *wptr_bo,
unsigned int qid)
{
int retval;
@@ -224,12 +275,29 @@ static int init_user_queue(struct process_queue_manager *pqm,
goto cleanup;
}
memset((*q)->gang_ctx_cpu_ptr, 0, AMDGPU_MES_GANG_CTX_SIZE);
- (*q)->wptr_bo = wptr_bo;
+
+ /* Starting with GFX11, wptr BOs must be mapped to GART for MES to determine work
+ * on unmapped queues for usermode queue oversubscription (no aggregated doorbell)
+ */
+ if (dev->adev != amdgpu_ttm_adev(q_properties->wptr_bo->tbo.bdev)) {
+ pr_err("Queue memory allocated to wrong device\n");
+ retval = -EINVAL;
+ goto free_gang_ctx_bo;
+ }
+
+ retval = amdgpu_amdkfd_map_gtt_bo_to_gart(q_properties->wptr_bo,
+ &(*q)->wptr_bo_gart);
+ if (retval) {
+ pr_err("Failed to map wptr bo to GART\n");
+ goto free_gang_ctx_bo;
+ }
}
pr_debug("PQM After init queue");
return 0;
+free_gang_ctx_bo:
+ amdgpu_amdkfd_free_gtt_mem(dev->adev, &(*q)->gang_ctx_bo);
cleanup:
uninit_queue(*q);
*q = NULL;
@@ -238,10 +306,8 @@ cleanup:
int pqm_create_queue(struct process_queue_manager *pqm,
struct kfd_node *dev,
- struct file *f,
struct queue_properties *properties,
unsigned int *qid,
- struct amdgpu_bo *wptr_bo,
const struct kfd_criu_queue_priv_data *q_data,
const void *restore_mqd,
const void *restore_ctl_stack,
@@ -256,10 +322,12 @@ int pqm_create_queue(struct process_queue_manager *pqm,
unsigned int max_queues = 127; /* HWS limit */
/*
- * On GFX 9.4.3, increase the number of queues that
- * can be created to 255. No HWS limit on GFX 9.4.3.
+ * On GFX 9.4.3/9.5.0, increase the number of queues that
+ * can be created to 255. No HWS limit on GFX 9.4.3/9.5.0.
*/
- if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3))
+ if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) ||
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4) ||
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0))
max_queues = 255;
q = NULL;
@@ -292,10 +360,26 @@ int pqm_create_queue(struct process_queue_manager *pqm,
if (retval != 0)
return retval;
+ /* Register process if this is the first queue */
if (list_empty(&pdd->qpd.queues_list) &&
list_empty(&pdd->qpd.priv_queue_list))
dev->dqm->ops.register_process(dev->dqm, &pdd->qpd);
+ /* Allocate proc_ctx_bo only if MES is enabled and this is the first queue */
+ if (!pdd->proc_ctx_cpu_ptr && dev->kfd->shared_resources.enable_mes) {
+ retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev,
+ AMDGPU_MES_PROC_CTX_SIZE,
+ &pdd->proc_ctx_bo,
+ &pdd->proc_ctx_gpu_addr,
+ &pdd->proc_ctx_cpu_ptr,
+ false);
+ if (retval) {
+ dev_err(dev->adev->dev, "failed to allocate process context bo\n");
+ return retval;
+ }
+ memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
+ }
+
pqn = kzalloc(sizeof(*pqn), GFP_KERNEL);
if (!pqn) {
retval = -ENOMEM;
@@ -305,13 +389,14 @@ int pqm_create_queue(struct process_queue_manager *pqm,
switch (type) {
case KFD_QUEUE_TYPE_SDMA:
case KFD_QUEUE_TYPE_SDMA_XGMI:
+ case KFD_QUEUE_TYPE_SDMA_BY_ENG_ID:
/* SDMA queues are always allocated statically no matter
* which scheduler mode is used. We also do not need to
* check whether a SDMA queue can be allocated here, because
* allocate_sdma_queue() in create_queue() has the
* corresponding check logic.
*/
- retval = init_user_queue(pqm, dev, &q, properties, f, wptr_bo, *qid);
+ retval = init_user_queue(pqm, dev, &q, properties, *qid);
if (retval != 0)
goto err_create_queue;
pqn->q = q;
@@ -332,7 +417,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
goto err_create_queue;
}
- retval = init_user_queue(pqm, dev, &q, properties, f, wptr_bo, *qid);
+ retval = init_user_queue(pqm, dev, &q, properties, *qid);
if (retval != 0)
goto err_create_queue;
pqn->q = q;
@@ -363,8 +448,15 @@ int pqm_create_queue(struct process_queue_manager *pqm,
}
if (retval != 0) {
- pr_err("Pasid 0x%x DQM create queue type %d failed. ret %d\n",
- pqm->process->pasid, type, retval);
+ if ((type == KFD_QUEUE_TYPE_SDMA ||
+ type == KFD_QUEUE_TYPE_SDMA_XGMI ||
+ type == KFD_QUEUE_TYPE_SDMA_BY_ENG_ID) &&
+ retval == -ENOMEM)
+ pr_warn("process pid %d DQM create queue type %d failed. ret %d\n",
+ pqm->process->lead_thread->pid, type, retval);
+ else
+ pr_err("process pid %d DQM create queue type %d failed. ret %d\n",
+ pqm->process->lead_thread->pid, type, retval);
goto err_create_queue;
}
@@ -377,7 +469,8 @@ int pqm_create_queue(struct process_queue_manager *pqm,
*/
uint32_t first_db_index = amdgpu_doorbell_index_on_bar(pdd->dev->adev,
pdd->qpd.proc_doorbells,
- 0);
+ 0,
+ pdd->dev->kfd->device_info.doorbell_size);
*p_doorbell_offset_in_process = (q->properties.doorbell_off
- first_db_index) * sizeof(uint32_t);
@@ -398,7 +491,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
err_create_queue:
uninit_queue(q);
if (kq)
- kernel_queue_uninit(kq, false);
+ kernel_queue_uninit(kq);
kfree(pqn);
err_allocate_pqn:
/* check if queues list is empty unregister process from device */
@@ -445,37 +538,26 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
/* destroy kernel queue (DIQ) */
dqm = pqn->kq->dev->dqm;
dqm->ops.destroy_kernel_queue(dqm, pqn->kq, &pdd->qpd);
- kernel_queue_uninit(pqn->kq, false);
+ kernel_queue_uninit(pqn->kq);
}
if (pqn->q) {
- kfd_procfs_del_queue(pqn->q);
+ retval = kfd_queue_unref_bo_vas(pdd, &pqn->q->properties);
+ if (retval)
+ goto err_destroy_queue;
+
dqm = pqn->q->device->dqm;
retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q);
if (retval) {
pr_err("Pasid 0x%x destroy queue %d failed, ret %d\n",
- pqm->process->pasid,
+ pdd->pasid,
pqn->q->properties.queue_id, retval);
- if (retval != -ETIME)
+ if (retval != -ETIME && retval != -EIO)
goto err_destroy_queue;
}
-
- if (pqn->q->gws) {
- if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) &&
- !dev->kfd->shared_resources.enable_mes)
- amdgpu_amdkfd_remove_gws_from_process(
- pqm->process->kgd_process_info,
- pqn->q->gws);
- pdd->qpd.num_gws = 0;
- }
-
- if (dev->kfd->shared_resources.enable_mes) {
- amdgpu_amdkfd_free_gtt_mem(dev->adev,
- pqn->q->gang_ctx_bo);
- if (pqn->q->wptr_bo)
- amdgpu_amdkfd_free_gtt_mem(dev->adev, pqn->q->wptr_bo);
-
- }
+ kfd_procfs_del_queue(pqn->q);
+ kfd_queue_release_buffers(pdd, &pqn->q->properties);
+ pqm_clean_queue_resource(pqm, pqn);
uninit_queue(pqn->q);
}
@@ -498,11 +580,42 @@ int pqm_update_queue_properties(struct process_queue_manager *pqm,
struct process_queue_node *pqn;
pqn = get_queue_by_qid(pqm, qid);
- if (!pqn) {
+ if (!pqn || !pqn->q) {
pr_debug("No queue %d exists for update operation\n", qid);
return -EFAULT;
}
+ /*
+ * Update with NULL ring address is used to disable the queue
+ */
+ if (p->queue_address && p->queue_size) {
+ struct kfd_process_device *pdd;
+ struct amdgpu_vm *vm;
+ struct queue *q = pqn->q;
+ int err;
+
+ pdd = kfd_get_process_device_data(q->device, q->process);
+ if (!pdd)
+ return -ENODEV;
+ vm = drm_priv_to_vm(pdd->drm_priv);
+ err = amdgpu_bo_reserve(vm->root.bo, false);
+ if (err)
+ return err;
+
+ if (kfd_queue_buffer_get(vm, (void *)p->queue_address, &p->ring_bo,
+ p->queue_size)) {
+ pr_debug("ring buf 0x%llx size 0x%llx not mapped on GPU\n",
+ p->queue_address, p->queue_size);
+ return -EFAULT;
+ }
+
+ kfd_queue_unref_bo_va(vm, &pqn->q->properties.ring_bo);
+ kfd_queue_buffer_put(&pqn->q->properties.ring_bo);
+ amdgpu_bo_unreserve(vm->root.bo);
+
+ pqn->q->properties.ring_bo = p->ring_bo;
+ }
+
pqn->q->properties.queue_address = p->queue_address;
pqn->q->properties.queue_size = p->queue_size;
pqn->q->properties.queue_percent = p->queue_percent;
@@ -559,19 +672,6 @@ int pqm_update_mqd(struct process_queue_manager *pqm,
return 0;
}
-struct kernel_queue *pqm_get_kernel_queue(
- struct process_queue_manager *pqm,
- unsigned int qid)
-{
- struct process_queue_node *pqn;
-
- pqn = get_queue_by_qid(pqm, qid);
- if (pqn && pqn->kq)
- return pqn->kq;
-
- return NULL;
-}
-
struct queue *pqm_get_user_queue(struct process_queue_manager *pqm,
unsigned int qid)
{
@@ -814,7 +914,10 @@ static int criu_checkpoint_queues_device(struct kfd_process_device *pdd,
q_data = (struct kfd_criu_queue_priv_data *)q_private_data;
- /* data stored in this order: priv_data, mqd, ctl_stack */
+ /*
+ * data stored in this order:
+ * priv_data, mqd[xcc0], mqd[xcc1],..., ctl_stack[xcc0], ctl_stack[xcc1]...
+ */
q_data->mqd_size = mqd_size;
q_data->ctl_stack_size = ctl_stack_size;
@@ -863,7 +966,7 @@ int kfd_criu_checkpoint_queues(struct kfd_process *p,
}
static void set_queue_properties_from_criu(struct queue_properties *qp,
- struct kfd_criu_queue_priv_data *q_data)
+ struct kfd_criu_queue_priv_data *q_data, uint32_t num_xcc)
{
qp->is_interop = false;
qp->queue_percent = q_data->q_percent;
@@ -876,7 +979,11 @@ static void set_queue_properties_from_criu(struct queue_properties *qp,
qp->eop_ring_buffer_size = q_data->eop_ring_buffer_size;
qp->ctx_save_restore_area_address = q_data->ctx_save_restore_area_address;
qp->ctx_save_restore_area_size = q_data->ctx_save_restore_area_size;
- qp->ctl_stack_size = q_data->ctl_stack_size;
+ if (q_data->type == KFD_QUEUE_TYPE_COMPUTE)
+ qp->ctl_stack_size = q_data->ctl_stack_size / num_xcc;
+ else
+ qp->ctl_stack_size = q_data->ctl_stack_size;
+
qp->type = q_data->type;
qp->format = q_data->format;
}
@@ -936,17 +1043,19 @@ int kfd_criu_restore_queue(struct kfd_process *p,
goto exit;
}
- /* data stored in this order: mqd, ctl_stack */
+ /*
+ * data stored in this order:
+ * mqd[xcc0], mqd[xcc1],..., ctl_stack[xcc0], ctl_stack[xcc1]...
+ */
mqd = q_extra_data;
ctl_stack = mqd + q_data->mqd_size;
memset(&qp, 0, sizeof(qp));
- set_queue_properties_from_criu(&qp, q_data);
+ set_queue_properties_from_criu(&qp, q_data, NUM_XCC(pdd->dev->adev->gfx.xcc_mask));
print_queue_properties(&qp);
- ret = pqm_create_queue(&p->pqm, pdd->dev, NULL, &qp, &queue_id, NULL, q_data, mqd, ctl_stack,
- NULL);
+ ret = pqm_create_queue(&p->pqm, pdd->dev, &qp, &queue_id, q_data, mqd, ctl_stack, NULL);
if (ret) {
pr_err("Failed to create new queue err:%d\n", ret);
goto exit;
@@ -962,6 +1071,7 @@ exit:
pr_debug("Queue id %d was restored successfully\n", queue_id);
kfree(q_data);
+ kfree(q_extra_data);
return ret;
}