summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c447
1 files changed, 443 insertions, 4 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 44d87943e40a..bc9e81293165 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -952,6 +952,92 @@ out_unlock:
return retval;
}
+/* suspend_single_queue does not lock the dqm like the
+ * evict_process_queues_cpsch or evict_process_queues_nocpsch. You should
+ * lock the dqm before calling, and unlock after calling.
+ *
+ * The reason we don't lock the dqm is because this function may be
+ * called on multiple queues in a loop, so rather than locking/unlocking
+ * multiple times, we will just keep the dqm locked for all of the calls.
+ */
+static int suspend_single_queue(struct device_queue_manager *dqm,
+ struct kfd_process_device *pdd,
+ struct queue *q)
+{
+ bool is_new;
+
+ if (q->properties.is_suspended)
+ return 0;
+
+ pr_debug("Suspending PASID %u queue [%i]\n",
+ pdd->process->pasid,
+ q->properties.queue_id);
+
+ is_new = q->properties.exception_status & KFD_EC_MASK(EC_QUEUE_NEW);
+
+ if (is_new || q->properties.is_being_destroyed) {
+ pr_debug("Suspend: skip %s queue id %i\n",
+ is_new ? "new" : "destroyed",
+ q->properties.queue_id);
+ return -EBUSY;
+ }
+
+ q->properties.is_suspended = true;
+ if (q->properties.is_active) {
+ if (dqm->dev->kfd->shared_resources.enable_mes) {
+ int r = remove_queue_mes(dqm, q, &pdd->qpd);
+
+ if (r)
+ return r;
+ }
+
+ decrement_queue_count(dqm, &pdd->qpd, q);
+ q->properties.is_active = false;
+ }
+
+ return 0;
+}
+
+/* resume_single_queue does not lock the dqm like the functions
+ * restore_process_queues_cpsch or restore_process_queues_nocpsch. You should
+ * lock the dqm before calling, and unlock after calling.
+ *
+ * The reason we don't lock the dqm is because this function may be
+ * called on multiple queues in a loop, so rather than locking/unlocking
+ * multiple times, we will just keep the dqm locked for all of the calls.
+ */
+static int resume_single_queue(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
+ struct queue *q)
+{
+ struct kfd_process_device *pdd;
+
+ if (!q->properties.is_suspended)
+ return 0;
+
+ pdd = qpd_to_pdd(qpd);
+
+ pr_debug("Restoring from suspend PASID %u queue [%i]\n",
+ pdd->process->pasid,
+ q->properties.queue_id);
+
+ q->properties.is_suspended = false;
+
+ if (QUEUE_IS_ACTIVE(q->properties)) {
+ if (dqm->dev->kfd->shared_resources.enable_mes) {
+ int r = add_queue_mes(dqm, q, &pdd->qpd);
+
+ if (r)
+ return r;
+ }
+
+ q->properties.is_active = true;
+ increment_queue_count(dqm, qpd, q);
+ }
+
+ return 0;
+}
+
static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
@@ -1926,6 +2012,31 @@ static int execute_queues_cpsch(struct device_queue_manager *dqm,
return map_queues_cpsch(dqm);
}
+static int wait_on_destroy_queue(struct device_queue_manager *dqm,
+ struct queue *q)
+{
+ struct kfd_process_device *pdd = kfd_get_process_device_data(q->device,
+ q->process);
+ int ret = 0;
+
+ if (pdd->qpd.is_debug)
+ return ret;
+
+ q->properties.is_being_destroyed = true;
+
+ if (pdd->process->debug_trap_enabled && q->properties.is_suspended) {
+ dqm_unlock(dqm);
+ mutex_unlock(&q->process->mutex);
+ ret = wait_event_interruptible(dqm->destroy_wait,
+ !q->properties.is_suspended);
+
+ mutex_lock(&q->process->mutex);
+ dqm_lock(dqm);
+ }
+
+ return ret;
+}
+
static int destroy_queue_cpsch(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
struct queue *q)
@@ -1945,11 +2056,16 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
q->properties.queue_id);
}
- retval = 0;
-
/* remove queue from list to prevent rescheduling after preemption */
dqm_lock(dqm);
+ retval = wait_on_destroy_queue(dqm, q);
+
+ if (retval) {
+ dqm_unlock(dqm);
+ return retval;
+ }
+
if (qpd->is_debug) {
/*
* error, currently we do not allow to destroy a queue
@@ -1996,7 +2112,14 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
dqm_unlock(dqm);
- /* Do free_mqd after dqm_unlock(dqm) to avoid circular locking */
+ /*
+ * Do free_mqd and raise delete event after dqm_unlock(dqm) to avoid
+ * circular locking
+ */
+ kfd_dbg_ev_raise(KFD_EC_MASK(EC_DEVICE_QUEUE_DELETE),
+ qpd->pqm->process, q->device,
+ -1, false, NULL, 0);
+
mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
return retval;
@@ -2461,8 +2584,10 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev)
goto out_free;
}
- if (!dqm->ops.initialize(dqm))
+ if (!dqm->ops.initialize(dqm)) {
+ init_waitqueue_head(&dqm->destroy_wait);
return dqm;
+ }
out_free:
kfree(dqm);
@@ -2602,6 +2727,320 @@ out_unlock:
return r;
}
+#define QUEUE_NOT_FOUND -1
+/* invalidate queue operation in array */
+static void q_array_invalidate(uint32_t num_queues, uint32_t *queue_ids)
+{
+ int i;
+
+ for (i = 0; i < num_queues; i++)
+ queue_ids[i] |= KFD_DBG_QUEUE_INVALID_MASK;
+}
+
+/* find queue index in array */
+static int q_array_get_index(unsigned int queue_id,
+ uint32_t num_queues,
+ uint32_t *queue_ids)
+{
+ int i;
+
+ for (i = 0; i < num_queues; i++)
+ if (queue_id == (queue_ids[i] & ~KFD_DBG_QUEUE_INVALID_MASK))
+ return i;
+
+ return QUEUE_NOT_FOUND;
+}
+
+struct copy_context_work_handler_workarea {
+ struct work_struct copy_context_work;
+ struct kfd_process *p;
+};
+
+static void copy_context_work_handler (struct work_struct *work)
+{
+ struct copy_context_work_handler_workarea *workarea;
+ struct mqd_manager *mqd_mgr;
+ struct queue *q;
+ struct mm_struct *mm;
+ struct kfd_process *p;
+ uint32_t tmp_ctl_stack_used_size, tmp_save_area_used_size;
+ int i;
+
+ workarea = container_of(work,
+ struct copy_context_work_handler_workarea,
+ copy_context_work);
+
+ p = workarea->p;
+ mm = get_task_mm(p->lead_thread);
+
+ if (!mm)
+ return;
+
+ kthread_use_mm(mm);
+ for (i = 0; i < p->n_pdds; i++) {
+ struct kfd_process_device *pdd = p->pdds[i];
+ struct device_queue_manager *dqm = pdd->dev->dqm;
+ struct qcm_process_device *qpd = &pdd->qpd;
+
+ list_for_each_entry(q, &qpd->queues_list, list) {
+ mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP];
+
+ /* We ignore the return value from get_wave_state
+ * because
+ * i) right now, it always returns 0, and
+ * ii) if we hit an error, we would continue to the
+ * next queue anyway.
+ */
+ mqd_mgr->get_wave_state(mqd_mgr,
+ q->mqd,
+ &q->properties,
+ (void __user *) q->properties.ctx_save_restore_area_address,
+ &tmp_ctl_stack_used_size,
+ &tmp_save_area_used_size);
+ }
+ }
+ kthread_unuse_mm(mm);
+ mmput(mm);
+}
+
+static uint32_t *get_queue_ids(uint32_t num_queues, uint32_t *usr_queue_id_array)
+{
+ size_t array_size = num_queues * sizeof(uint32_t);
+ uint32_t *queue_ids = NULL;
+
+ if (!usr_queue_id_array)
+ return NULL;
+
+ queue_ids = kzalloc(array_size, GFP_KERNEL);
+ if (!queue_ids)
+ return ERR_PTR(-ENOMEM);
+
+ if (copy_from_user(queue_ids, usr_queue_id_array, array_size))
+ return ERR_PTR(-EFAULT);
+
+ return queue_ids;
+}
+
+int resume_queues(struct kfd_process *p,
+ uint32_t num_queues,
+ uint32_t *usr_queue_id_array)
+{
+ uint32_t *queue_ids = NULL;
+ int total_resumed = 0;
+ int i;
+
+ if (usr_queue_id_array) {
+ queue_ids = get_queue_ids(num_queues, usr_queue_id_array);
+
+ if (IS_ERR(queue_ids))
+ return PTR_ERR(queue_ids);
+
+ /* mask all queues as invalid. unmask per successful request */
+ q_array_invalidate(num_queues, queue_ids);
+ }
+
+ for (i = 0; i < p->n_pdds; i++) {
+ struct kfd_process_device *pdd = p->pdds[i];
+ struct device_queue_manager *dqm = pdd->dev->dqm;
+ struct qcm_process_device *qpd = &pdd->qpd;
+ struct queue *q;
+ int r, per_device_resumed = 0;
+
+ dqm_lock(dqm);
+
+ /* unmask queues that resume or already resumed as valid */
+ list_for_each_entry(q, &qpd->queues_list, list) {
+ int q_idx = QUEUE_NOT_FOUND;
+
+ if (queue_ids)
+ q_idx = q_array_get_index(
+ q->properties.queue_id,
+ num_queues,
+ queue_ids);
+
+ if (!queue_ids || q_idx != QUEUE_NOT_FOUND) {
+ int err = resume_single_queue(dqm, &pdd->qpd, q);
+
+ if (queue_ids) {
+ if (!err) {
+ queue_ids[q_idx] &=
+ ~KFD_DBG_QUEUE_INVALID_MASK;
+ } else {
+ queue_ids[q_idx] |=
+ KFD_DBG_QUEUE_ERROR_MASK;
+ break;
+ }
+ }
+
+ if (dqm->dev->kfd->shared_resources.enable_mes) {
+ wake_up_all(&dqm->destroy_wait);
+ if (!err)
+ total_resumed++;
+ } else {
+ per_device_resumed++;
+ }
+ }
+ }
+
+ if (!per_device_resumed) {
+ dqm_unlock(dqm);
+ continue;
+ }
+
+ r = execute_queues_cpsch(dqm,
+ KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES,
+ 0,
+ USE_DEFAULT_GRACE_PERIOD);
+ if (r) {
+ pr_err("Failed to resume process queues\n");
+ if (queue_ids) {
+ list_for_each_entry(q, &qpd->queues_list, list) {
+ int q_idx = q_array_get_index(
+ q->properties.queue_id,
+ num_queues,
+ queue_ids);
+
+ /* mask queue as error on resume fail */
+ if (q_idx != QUEUE_NOT_FOUND)
+ queue_ids[q_idx] |=
+ KFD_DBG_QUEUE_ERROR_MASK;
+ }
+ }
+ } else {
+ wake_up_all(&dqm->destroy_wait);
+ total_resumed += per_device_resumed;
+ }
+
+ dqm_unlock(dqm);
+ }
+
+ if (queue_ids) {
+ if (copy_to_user((void __user *)usr_queue_id_array, queue_ids,
+ num_queues * sizeof(uint32_t)))
+ pr_err("copy_to_user failed on queue resume\n");
+
+ kfree(queue_ids);
+ }
+
+ return total_resumed;
+}
+
+int suspend_queues(struct kfd_process *p,
+ uint32_t num_queues,
+ uint32_t grace_period,
+ uint64_t exception_clear_mask,
+ uint32_t *usr_queue_id_array)
+{
+ uint32_t *queue_ids = get_queue_ids(num_queues, usr_queue_id_array);
+ int total_suspended = 0;
+ int i;
+
+ if (IS_ERR(queue_ids))
+ return PTR_ERR(queue_ids);
+
+ /* mask all queues as invalid. umask on successful request */
+ q_array_invalidate(num_queues, queue_ids);
+
+ for (i = 0; i < p->n_pdds; i++) {
+ struct kfd_process_device *pdd = p->pdds[i];
+ struct device_queue_manager *dqm = pdd->dev->dqm;
+ struct qcm_process_device *qpd = &pdd->qpd;
+ struct queue *q;
+ int r, per_device_suspended = 0;
+
+ mutex_lock(&p->event_mutex);
+ dqm_lock(dqm);
+
+ /* unmask queues that suspend or already suspended */
+ list_for_each_entry(q, &qpd->queues_list, list) {
+ int q_idx = q_array_get_index(q->properties.queue_id,
+ num_queues,
+ queue_ids);
+
+ if (q_idx != QUEUE_NOT_FOUND) {
+ int err = suspend_single_queue(dqm, pdd, q);
+ bool is_mes = dqm->dev->kfd->shared_resources.enable_mes;
+
+ if (!err) {
+ queue_ids[q_idx] &= ~KFD_DBG_QUEUE_INVALID_MASK;
+ if (exception_clear_mask && is_mes)
+ q->properties.exception_status &=
+ ~exception_clear_mask;
+
+ if (is_mes)
+ total_suspended++;
+ else
+ per_device_suspended++;
+ } else if (err != -EBUSY) {
+ r = err;
+ queue_ids[q_idx] |= KFD_DBG_QUEUE_ERROR_MASK;
+ break;
+ }
+ }
+ }
+
+ if (!per_device_suspended) {
+ dqm_unlock(dqm);
+ mutex_unlock(&p->event_mutex);
+ if (total_suspended)
+ amdgpu_amdkfd_debug_mem_fence(dqm->dev->adev);
+ continue;
+ }
+
+ r = execute_queues_cpsch(dqm,
+ KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0,
+ grace_period);
+
+ if (r)
+ pr_err("Failed to suspend process queues.\n");
+ else
+ total_suspended += per_device_suspended;
+
+ list_for_each_entry(q, &qpd->queues_list, list) {
+ int q_idx = q_array_get_index(q->properties.queue_id,
+ num_queues, queue_ids);
+
+ if (q_idx == QUEUE_NOT_FOUND)
+ continue;
+
+ /* mask queue as error on suspend fail */
+ if (r)
+ queue_ids[q_idx] |= KFD_DBG_QUEUE_ERROR_MASK;
+ else if (exception_clear_mask)
+ q->properties.exception_status &=
+ ~exception_clear_mask;
+ }
+
+ dqm_unlock(dqm);
+ mutex_unlock(&p->event_mutex);
+ amdgpu_device_flush_hdp(dqm->dev->adev, NULL);
+ }
+
+ if (total_suspended) {
+ struct copy_context_work_handler_workarea copy_context_worker;
+
+ INIT_WORK_ONSTACK(
+ &copy_context_worker.copy_context_work,
+ copy_context_work_handler);
+
+ copy_context_worker.p = p;
+
+ schedule_work(&copy_context_worker.copy_context_work);
+
+
+ flush_work(&copy_context_worker.copy_context_work);
+ destroy_work_on_stack(&copy_context_worker.copy_context_work);
+ }
+
+ if (copy_to_user((void __user *)usr_queue_id_array, queue_ids,
+ num_queues * sizeof(uint32_t)))
+ pr_err("copy_to_user failed on queue suspend\n");
+
+ kfree(queue_ids);
+
+ return total_suspended;
+}
+
int debug_lock_and_unmap(struct device_queue_manager *dqm)
{
int r;