summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c372
1 files changed, 281 insertions, 91 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 5137476ec18e..7fbb5c274ccc 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -28,6 +28,7 @@
#include "kfd_priv.h"
#include "kfd_kernel_queue.h"
#include "amdgpu_amdkfd.h"
+#include "amdgpu_reset.h"
static inline struct process_queue_node *get_queue_by_qid(
struct process_queue_manager *pqm, unsigned int qid)
@@ -68,8 +69,8 @@ static int find_available_queue_slot(struct process_queue_manager *pqm,
pr_debug("The new slot id %lu\n", found);
if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
- pr_info("Cannot open more queues for process with pasid 0x%x\n",
- pqm->process->pasid);
+ pr_info("Cannot open more queues for process with pid %d\n",
+ pqm->process->lead_thread->pid);
return -ENOMEM;
}
@@ -81,19 +82,29 @@ static int find_available_queue_slot(struct process_queue_manager *pqm,
void kfd_process_dequeue_from_device(struct kfd_process_device *pdd)
{
- struct kfd_dev *dev = pdd->dev;
+ struct kfd_node *dev = pdd->dev;
if (pdd->already_dequeued)
return;
-
+ /* The MES context flush needs to filter out the case which the
+ * KFD process is created without setting up the MES context and
+ * queue for creating a compute queue.
+ */
dev->dqm->ops.process_termination(dev->dqm, &pdd->qpd);
+ if (dev->kfd->shared_resources.enable_mes && !!pdd->proc_ctx_gpu_addr &&
+ down_read_trylock(&dev->adev->reset_domain->sem)) {
+ amdgpu_mes_flush_shader_debugger(dev->adev,
+ pdd->proc_ctx_gpu_addr);
+ up_read(&dev->adev->reset_domain->sem);
+ }
pdd->already_dequeued = true;
}
int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
void *gws)
{
- struct kfd_dev *dev = NULL;
+ struct mqd_update_info minfo = {0};
+ struct kfd_node *dev = NULL;
struct process_queue_node *pqn;
struct kfd_process_device *pdd;
struct kgd_mem *mem = NULL;
@@ -123,20 +134,34 @@ int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
if (!gws && pdd->qpd.num_gws == 0)
return -EINVAL;
- if (gws)
- ret = amdgpu_amdkfd_add_gws_to_process(pdd->process->kgd_process_info,
- gws, &mem);
- else
- ret = amdgpu_amdkfd_remove_gws_from_process(pdd->process->kgd_process_info,
- pqn->q->gws);
- if (unlikely(ret))
- return ret;
+ if ((KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3) &&
+ KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 4) &&
+ KFD_GC_VERSION(dev) != IP_VERSION(9, 5, 0)) &&
+ !dev->kfd->shared_resources.enable_mes) {
+ if (gws)
+ ret = amdgpu_amdkfd_add_gws_to_process(pdd->process->kgd_process_info,
+ gws, &mem);
+ else
+ ret = amdgpu_amdkfd_remove_gws_from_process(pdd->process->kgd_process_info,
+ pqn->q->gws);
+ if (unlikely(ret))
+ return ret;
+ pqn->q->gws = mem;
+ } else {
+ /*
+ * Intentionally set GWS to a non-NULL value
+ * for devices that do not use GWS for global wave
+ * synchronization but require the formality
+ * of setting GWS for cooperative groups.
+ */
+ pqn->q->gws = gws ? ERR_PTR(-ENOMEM) : NULL;
+ }
- pqn->q->gws = mem;
pdd->qpd.num_gws = gws ? dev->adev->gds.gws_size : 0;
+ minfo.update_flag = gws ? UPDATE_FLAG_IS_GWS : 0;
return pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
- pqn->q, NULL);
+ pqn->q, &minfo);
}
void kfd_process_dequeue_from_all_devices(struct kfd_process *p)
@@ -159,14 +184,53 @@ int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p)
return 0;
}
+static void pqm_clean_queue_resource(struct process_queue_manager *pqm,
+ struct process_queue_node *pqn)
+{
+ struct kfd_node *dev;
+ struct kfd_process_device *pdd;
+
+ dev = pqn->q->device;
+
+ pdd = kfd_get_process_device_data(dev, pqm->process);
+ if (!pdd) {
+ pr_err("Process device data doesn't exist\n");
+ return;
+ }
+
+ if (pqn->q->gws) {
+ if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) &&
+ KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 4) &&
+ KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 5, 0) &&
+ !dev->kfd->shared_resources.enable_mes)
+ amdgpu_amdkfd_remove_gws_from_process(
+ pqm->process->kgd_process_info, pqn->q->gws);
+ pdd->qpd.num_gws = 0;
+ }
+
+ if (dev->kfd->shared_resources.enable_mes) {
+ amdgpu_amdkfd_free_gtt_mem(dev->adev, &pqn->q->gang_ctx_bo);
+ amdgpu_amdkfd_free_gtt_mem(dev->adev, (void **)&pqn->q->wptr_bo_gart);
+ }
+}
+
void pqm_uninit(struct process_queue_manager *pqm)
{
struct process_queue_node *pqn, *next;
list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) {
- if (pqn->q && pqn->q->gws)
- amdgpu_amdkfd_remove_gws_from_process(pqm->process->kgd_process_info,
- pqn->q->gws);
+ if (pqn->q) {
+ struct kfd_process_device *pdd = kfd_get_process_device_data(pqn->q->device,
+ pqm->process);
+ if (pdd) {
+ kfd_queue_unref_bo_vas(pdd, &pqn->q->properties);
+ kfd_queue_release_buffers(pdd, &pqn->q->properties);
+ } else {
+ WARN_ON(!pdd);
+ }
+ pqm_clean_queue_resource(pqm, pqn);
+ }
+
kfd_procfs_del_queue(pqn->q);
uninit_queue(pqn->q);
list_del(&pqn->process_queue_list);
@@ -178,15 +242,15 @@ void pqm_uninit(struct process_queue_manager *pqm)
}
static int init_user_queue(struct process_queue_manager *pqm,
- struct kfd_dev *dev, struct queue **q,
+ struct kfd_node *dev, struct queue **q,
struct queue_properties *q_properties,
- struct file *f, struct amdgpu_bo *wptr_bo,
unsigned int qid)
{
int retval;
/* Doorbell initialized in user space*/
q_properties->doorbell_ptr = NULL;
+ q_properties->exception_status = KFD_EC_MASK(EC_QUEUE_NEW);
/* let DQM handle it*/
q_properties->vmid = 0;
@@ -199,7 +263,7 @@ static int init_user_queue(struct process_queue_manager *pqm,
(*q)->device = dev;
(*q)->process = pqm->process;
- if (dev->shared_resources.enable_mes) {
+ if (dev->kfd->shared_resources.enable_mes) {
retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev,
AMDGPU_MES_GANG_CTX_SIZE,
&(*q)->gang_ctx_bo,
@@ -211,24 +275,39 @@ static int init_user_queue(struct process_queue_manager *pqm,
goto cleanup;
}
memset((*q)->gang_ctx_cpu_ptr, 0, AMDGPU_MES_GANG_CTX_SIZE);
- (*q)->wptr_bo = wptr_bo;
+
+ /* Starting with GFX11, wptr BOs must be mapped to GART for MES to determine work
+ * on unmapped queues for usermode queue oversubscription (no aggregated doorbell)
+ */
+ if (dev->adev != amdgpu_ttm_adev(q_properties->wptr_bo->tbo.bdev)) {
+ pr_err("Queue memory allocated to wrong device\n");
+ retval = -EINVAL;
+ goto free_gang_ctx_bo;
+ }
+
+ retval = amdgpu_amdkfd_map_gtt_bo_to_gart(q_properties->wptr_bo,
+ &(*q)->wptr_bo_gart);
+ if (retval) {
+ pr_err("Failed to map wptr bo to GART\n");
+ goto free_gang_ctx_bo;
+ }
}
pr_debug("PQM After init queue");
return 0;
+free_gang_ctx_bo:
+ amdgpu_amdkfd_free_gtt_mem(dev->adev, &(*q)->gang_ctx_bo);
cleanup:
- if (dev->shared_resources.enable_mes)
- uninit_queue(*q);
+ uninit_queue(*q);
+ *q = NULL;
return retval;
}
int pqm_create_queue(struct process_queue_manager *pqm,
- struct kfd_dev *dev,
- struct file *f,
+ struct kfd_node *dev,
struct queue_properties *properties,
unsigned int *qid,
- struct amdgpu_bo *wptr_bo,
const struct kfd_criu_queue_priv_data *q_data,
const void *restore_mqd,
const void *restore_ctl_stack,
@@ -242,6 +321,15 @@ int pqm_create_queue(struct process_queue_manager *pqm,
enum kfd_queue_type type = properties->type;
unsigned int max_queues = 127; /* HWS limit */
+ /*
+ * On GFX 9.4.3/9.5.0, increase the number of queues that
+ * can be created to 255. No HWS limit on GFX 9.4.3/9.5.0.
+ */
+ if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) ||
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4) ||
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0))
+ max_queues = 255;
+
q = NULL;
kq = NULL;
@@ -258,7 +346,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
* Hence we also check the type as well
*/
if ((pdd->qpd.is_debug) || (type == KFD_QUEUE_TYPE_DIQ))
- max_queues = dev->device_info.max_no_of_hqd/2;
+ max_queues = dev->kfd->device_info.max_no_of_hqd/2;
if (pdd->qpd.queue_count >= max_queues)
return -ENOSPC;
@@ -272,10 +360,26 @@ int pqm_create_queue(struct process_queue_manager *pqm,
if (retval != 0)
return retval;
+ /* Register process if this is the first queue */
if (list_empty(&pdd->qpd.queues_list) &&
list_empty(&pdd->qpd.priv_queue_list))
dev->dqm->ops.register_process(dev->dqm, &pdd->qpd);
+ /* Allocate proc_ctx_bo only if MES is enabled and this is the first queue */
+ if (!pdd->proc_ctx_cpu_ptr && dev->kfd->shared_resources.enable_mes) {
+ retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev,
+ AMDGPU_MES_PROC_CTX_SIZE,
+ &pdd->proc_ctx_bo,
+ &pdd->proc_ctx_gpu_addr,
+ &pdd->proc_ctx_cpu_ptr,
+ false);
+ if (retval) {
+ dev_err(dev->adev->dev, "failed to allocate process context bo\n");
+ return retval;
+ }
+ memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
+ }
+
pqn = kzalloc(sizeof(*pqn), GFP_KERNEL);
if (!pqn) {
retval = -ENOMEM;
@@ -285,13 +389,14 @@ int pqm_create_queue(struct process_queue_manager *pqm,
switch (type) {
case KFD_QUEUE_TYPE_SDMA:
case KFD_QUEUE_TYPE_SDMA_XGMI:
+ case KFD_QUEUE_TYPE_SDMA_BY_ENG_ID:
/* SDMA queues are always allocated statically no matter
* which scheduler mode is used. We also do not need to
* check whether a SDMA queue can be allocated here, because
* allocate_sdma_queue() in create_queue() has the
* corresponding check logic.
*/
- retval = init_user_queue(pqm, dev, &q, properties, f, wptr_bo, *qid);
+ retval = init_user_queue(pqm, dev, &q, properties, *qid);
if (retval != 0)
goto err_create_queue;
pqn->q = q;
@@ -312,7 +417,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
goto err_create_queue;
}
- retval = init_user_queue(pqm, dev, &q, properties, f, wptr_bo, *qid);
+ retval = init_user_queue(pqm, dev, &q, properties, *qid);
if (retval != 0)
goto err_create_queue;
pqn->q = q;
@@ -330,6 +435,10 @@ int pqm_create_queue(struct process_queue_manager *pqm,
kq->queue->properties.queue_id = *qid;
pqn->kq = kq;
pqn->q = NULL;
+ retval = kfd_process_drain_interrupts(pdd);
+ if (retval)
+ break;
+
retval = dev->dqm->ops.create_kernel_queue(dev->dqm,
kq, &pdd->qpd);
break;
@@ -339,22 +448,33 @@ int pqm_create_queue(struct process_queue_manager *pqm,
}
if (retval != 0) {
- pr_err("Pasid 0x%x DQM create queue type %d failed. ret %d\n",
- pqm->process->pasid, type, retval);
+ if ((type == KFD_QUEUE_TYPE_SDMA ||
+ type == KFD_QUEUE_TYPE_SDMA_XGMI ||
+ type == KFD_QUEUE_TYPE_SDMA_BY_ENG_ID) &&
+ retval == -ENOMEM)
+ pr_warn("process pid %d DQM create queue type %d failed. ret %d\n",
+ pqm->process->lead_thread->pid, type, retval);
+ else
+ pr_err("process pid %d DQM create queue type %d failed. ret %d\n",
+ pqm->process->lead_thread->pid, type, retval);
goto err_create_queue;
}
- if (q && p_doorbell_offset_in_process)
+ if (q && p_doorbell_offset_in_process) {
/* Return the doorbell offset within the doorbell page
* to the caller so it can be passed up to user mode
* (in bytes).
- * There are always 1024 doorbells per process, so in case
- * of 8-byte doorbells, there are two doorbell pages per
- * process.
+ * relative doorbell index = Absolute doorbell index -
+ * absolute index of first doorbell in the page.
*/
- *p_doorbell_offset_in_process =
- (q->properties.doorbell_off * sizeof(uint32_t)) &
- (kfd_doorbell_process_slice(dev) - 1);
+ uint32_t first_db_index = amdgpu_doorbell_index_on_bar(pdd->dev->adev,
+ pdd->qpd.proc_doorbells,
+ 0,
+ pdd->dev->kfd->device_info.doorbell_size);
+
+ *p_doorbell_offset_in_process = (q->properties.doorbell_off
+ - first_db_index) * sizeof(uint32_t);
+ }
pr_debug("PQM After DQM create queue\n");
@@ -371,7 +491,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
err_create_queue:
uninit_queue(q);
if (kq)
- kernel_queue_uninit(kq, false);
+ kernel_queue_uninit(kq);
kfree(pqn);
err_allocate_pqn:
/* check if queues list is empty unregister process from device */
@@ -387,7 +507,7 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
struct process_queue_node *pqn;
struct kfd_process_device *pdd;
struct device_queue_manager *dqm;
- struct kfd_dev *dev;
+ struct kfd_node *dev;
int retval;
dqm = NULL;
@@ -418,34 +538,26 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
/* destroy kernel queue (DIQ) */
dqm = pqn->kq->dev->dqm;
dqm->ops.destroy_kernel_queue(dqm, pqn->kq, &pdd->qpd);
- kernel_queue_uninit(pqn->kq, false);
+ kernel_queue_uninit(pqn->kq);
}
if (pqn->q) {
- kfd_procfs_del_queue(pqn->q);
+ retval = kfd_queue_unref_bo_vas(pdd, &pqn->q->properties);
+ if (retval)
+ goto err_destroy_queue;
+
dqm = pqn->q->device->dqm;
retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q);
if (retval) {
pr_err("Pasid 0x%x destroy queue %d failed, ret %d\n",
- pqm->process->pasid,
+ pdd->pasid,
pqn->q->properties.queue_id, retval);
- if (retval != -ETIME)
+ if (retval != -ETIME && retval != -EIO)
goto err_destroy_queue;
}
-
- if (pqn->q->gws) {
- amdgpu_amdkfd_remove_gws_from_process(pqm->process->kgd_process_info,
- pqn->q->gws);
- pdd->qpd.num_gws = 0;
- }
-
- if (dev->shared_resources.enable_mes) {
- amdgpu_amdkfd_free_gtt_mem(dev->adev,
- pqn->q->gang_ctx_bo);
- if (pqn->q->wptr_bo)
- amdgpu_amdkfd_free_gtt_mem(dev->adev, pqn->q->wptr_bo);
-
- }
+ kfd_procfs_del_queue(pqn->q);
+ kfd_queue_release_buffers(pdd, &pqn->q->properties);
+ pqm_clean_queue_resource(pqm, pqn);
uninit_queue(pqn->q);
}
@@ -468,15 +580,47 @@ int pqm_update_queue_properties(struct process_queue_manager *pqm,
struct process_queue_node *pqn;
pqn = get_queue_by_qid(pqm, qid);
- if (!pqn) {
+ if (!pqn || !pqn->q) {
pr_debug("No queue %d exists for update operation\n", qid);
return -EFAULT;
}
+ /*
+ * Update with NULL ring address is used to disable the queue
+ */
+ if (p->queue_address && p->queue_size) {
+ struct kfd_process_device *pdd;
+ struct amdgpu_vm *vm;
+ struct queue *q = pqn->q;
+ int err;
+
+ pdd = kfd_get_process_device_data(q->device, q->process);
+ if (!pdd)
+ return -ENODEV;
+ vm = drm_priv_to_vm(pdd->drm_priv);
+ err = amdgpu_bo_reserve(vm->root.bo, false);
+ if (err)
+ return err;
+
+ if (kfd_queue_buffer_get(vm, (void *)p->queue_address, &p->ring_bo,
+ p->queue_size)) {
+ pr_debug("ring buf 0x%llx size 0x%llx not mapped on GPU\n",
+ p->queue_address, p->queue_size);
+ return -EFAULT;
+ }
+
+ kfd_queue_unref_bo_va(vm, &pqn->q->properties.ring_bo);
+ kfd_queue_buffer_put(&pqn->q->properties.ring_bo);
+ amdgpu_bo_unreserve(vm->root.bo);
+
+ pqn->q->properties.ring_bo = p->ring_bo;
+ }
+
pqn->q->properties.queue_address = p->queue_address;
pqn->q->properties.queue_size = p->queue_size;
pqn->q->properties.queue_percent = p->queue_percent;
pqn->q->properties.priority = p->priority;
+ pqn->q->properties.pm4_target_xcc = p->pm4_target_xcc;
retval = pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
pqn->q, NULL);
@@ -498,8 +642,12 @@ int pqm_update_mqd(struct process_queue_manager *pqm,
return -EFAULT;
}
+ /* CUs are masked for debugger requirements so deny user mask */
+ if (pqn->q->properties.is_dbg_wa && minfo && minfo->cu_mask.ptr)
+ return -EBUSY;
+
/* ASICs that have WGPs must enforce pairwise enabled mask checks. */
- if (minfo && minfo->update_flag == UPDATE_FLAG_CU_MASK && minfo->cu_mask.ptr &&
+ if (minfo && minfo->cu_mask.ptr &&
KFD_GC_VERSION(pqn->q->device) >= IP_VERSION(10, 0, 0)) {
int i;
@@ -518,20 +666,10 @@ int pqm_update_mqd(struct process_queue_manager *pqm,
if (retval != 0)
return retval;
- return 0;
-}
-
-struct kernel_queue *pqm_get_kernel_queue(
- struct process_queue_manager *pqm,
- unsigned int qid)
-{
- struct process_queue_node *pqn;
+ if (minfo && minfo->cu_mask.ptr)
+ pqn->q->properties.is_user_cu_masked = true;
- pqn = get_queue_by_qid(pqm, qid);
- if (pqn && pqn->kq)
- return pqn->kq;
-
- return NULL;
+ return 0;
}
struct queue *pqm_get_user_queue(struct process_queue_manager *pqm,
@@ -565,6 +703,46 @@ int pqm_get_wave_state(struct process_queue_manager *pqm,
save_area_used_size);
}
+int pqm_get_queue_snapshot(struct process_queue_manager *pqm,
+ uint64_t exception_clear_mask,
+ void __user *buf,
+ int *num_qss_entries,
+ uint32_t *entry_size)
+{
+ struct process_queue_node *pqn;
+ struct kfd_queue_snapshot_entry src;
+ uint32_t tmp_entry_size = *entry_size, tmp_qss_entries = *num_qss_entries;
+ int r = 0;
+
+ *num_qss_entries = 0;
+ if (!(*entry_size))
+ return -EINVAL;
+
+ *entry_size = min_t(size_t, *entry_size, sizeof(struct kfd_queue_snapshot_entry));
+ mutex_lock(&pqm->process->event_mutex);
+
+ memset(&src, 0, sizeof(src));
+
+ list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
+ if (!pqn->q)
+ continue;
+
+ if (*num_qss_entries < tmp_qss_entries) {
+ set_queue_snapshot_entry(pqn->q, exception_clear_mask, &src);
+
+ if (copy_to_user(buf, &src, *entry_size)) {
+ r = -EFAULT;
+ break;
+ }
+ buf += tmp_entry_size;
+ }
+ *num_qss_entries += 1;
+ }
+
+ mutex_unlock(&pqm->process->event_mutex);
+ return r;
+}
+
static int get_queue_data_sizes(struct kfd_process_device *pdd,
struct queue *q,
uint32_t *mqd_size,
@@ -736,7 +914,10 @@ static int criu_checkpoint_queues_device(struct kfd_process_device *pdd,
q_data = (struct kfd_criu_queue_priv_data *)q_private_data;
- /* data stored in this order: priv_data, mqd, ctl_stack */
+ /*
+ * data stored in this order:
+ * priv_data, mqd[xcc0], mqd[xcc1],..., ctl_stack[xcc0], ctl_stack[xcc1]...
+ */
q_data->mqd_size = mqd_size;
q_data->ctl_stack_size = ctl_stack_size;
@@ -785,7 +966,7 @@ int kfd_criu_checkpoint_queues(struct kfd_process *p,
}
static void set_queue_properties_from_criu(struct queue_properties *qp,
- struct kfd_criu_queue_priv_data *q_data)
+ struct kfd_criu_queue_priv_data *q_data, uint32_t num_xcc)
{
qp->is_interop = false;
qp->queue_percent = q_data->q_percent;
@@ -798,7 +979,11 @@ static void set_queue_properties_from_criu(struct queue_properties *qp,
qp->eop_ring_buffer_size = q_data->eop_ring_buffer_size;
qp->ctx_save_restore_area_address = q_data->ctx_save_restore_area_address;
qp->ctx_save_restore_area_size = q_data->ctx_save_restore_area_size;
- qp->ctl_stack_size = q_data->ctl_stack_size;
+ if (q_data->type == KFD_QUEUE_TYPE_COMPUTE)
+ qp->ctl_stack_size = q_data->ctl_stack_size / num_xcc;
+ else
+ qp->ctl_stack_size = q_data->ctl_stack_size;
+
qp->type = q_data->type;
qp->format = q_data->format;
}
@@ -858,23 +1043,19 @@ int kfd_criu_restore_queue(struct kfd_process *p,
goto exit;
}
- if (!pdd->doorbell_index &&
- kfd_alloc_process_doorbells(pdd->dev, &pdd->doorbell_index) < 0) {
- ret = -ENOMEM;
- goto exit;
- }
-
- /* data stored in this order: mqd, ctl_stack */
+ /*
+ * data stored in this order:
+ * mqd[xcc0], mqd[xcc1],..., ctl_stack[xcc0], ctl_stack[xcc1]...
+ */
mqd = q_extra_data;
ctl_stack = mqd + q_data->mqd_size;
memset(&qp, 0, sizeof(qp));
- set_queue_properties_from_criu(&qp, q_data);
+ set_queue_properties_from_criu(&qp, q_data, NUM_XCC(pdd->dev->adev->gfx.xcc_mask));
print_queue_properties(&qp);
- ret = pqm_create_queue(&p->pqm, pdd->dev, NULL, &qp, &queue_id, NULL, q_data, mqd, ctl_stack,
- NULL);
+ ret = pqm_create_queue(&p->pqm, pdd->dev, &qp, &queue_id, q_data, mqd, ctl_stack, NULL);
if (ret) {
pr_err("Failed to create new queue err:%d\n", ret);
goto exit;
@@ -890,6 +1071,7 @@ exit:
pr_debug("Queue id %d was restored successfully\n", queue_id);
kfree(q_data);
+ kfree(q_extra_data);
return ret;
}
@@ -927,7 +1109,9 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data)
struct queue *q;
enum KFD_MQD_TYPE mqd_type;
struct mqd_manager *mqd_mgr;
- int r = 0;
+ int r = 0, xcc, num_xccs = 1;
+ void *mqd;
+ uint64_t size = 0;
list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
if (pqn->q) {
@@ -943,6 +1127,7 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data)
seq_printf(m, " Compute queue on device %x\n",
q->device->id);
mqd_type = KFD_MQD_TYPE_CP;
+ num_xccs = NUM_XCC(q->device->xcc_mask);
break;
default:
seq_printf(m,
@@ -951,6 +1136,8 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data)
continue;
}
mqd_mgr = q->device->dqm->mqd_mgrs[mqd_type];
+ size = mqd_mgr->mqd_stride(mqd_mgr,
+ &q->properties);
} else if (pqn->kq) {
q = pqn->kq->queue;
mqd_mgr = pqn->kq->mqd_mgr;
@@ -972,9 +1159,12 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data)
continue;
}
- r = mqd_mgr->debugfs_show_mqd(m, q->mqd);
- if (r != 0)
- break;
+ for (xcc = 0; xcc < num_xccs; xcc++) {
+ mqd = q->mqd + size * xcc;
+ r = mqd_mgr->debugfs_show_mqd(m, mqd);
+ if (r != 0)
+ break;
+ }
}
return r;