diff options
Diffstat (limited to 'drivers/gpu/drm/scheduler/sched_entity.c')
| -rw-r--r-- | drivers/gpu/drm/scheduler/sched_entity.c | 300 |
1 files changed, 197 insertions, 103 deletions
diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index 15d04a0ec623..fe174a4857be 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -21,17 +21,16 @@ * */ -#include <linux/kthread.h> +#include <linux/export.h> #include <linux/slab.h> #include <linux/completion.h> #include <drm/drm_print.h> #include <drm/gpu_scheduler.h> -#include "gpu_scheduler_trace.h" +#include "sched_internal.h" -#define to_drm_sched_job(sched_job) \ - container_of((sched_job), struct drm_sched_job, queue_node) +#include "gpu_scheduler_trace.h" /** * drm_sched_entity_init - Init a context entity used by scheduler when @@ -51,7 +50,7 @@ * drm_sched_entity_set_priority(). For changing the set of schedulers * @sched_list at runtime see drm_sched_entity_modify_sched(). * - * An entity is cleaned up by callind drm_sched_entity_fini(). See also + * An entity is cleaned up by calling drm_sched_entity_fini(). See also * drm_sched_entity_destroy(). * * Returns 0 on success or a negative error code on failure. @@ -71,19 +70,42 @@ int drm_sched_entity_init(struct drm_sched_entity *entity, entity->guilty = guilty; entity->num_sched_list = num_sched_list; entity->priority = priority; + entity->last_user = current->group_leader; + /* + * It's perfectly valid to initialize an entity without having a valid + * scheduler attached. It's just not valid to use the scheduler before it + * is initialized itself. + */ entity->sched_list = num_sched_list > 1 ? sched_list : NULL; - entity->last_scheduled = NULL; + RCU_INIT_POINTER(entity->last_scheduled, NULL); RB_CLEAR_NODE(&entity->rb_tree_node); - if(num_sched_list) - entity->rq = &sched_list[0]->sched_rq[entity->priority]; + if (num_sched_list && !sched_list[0]->sched_rq) { + /* Since every entry covered by num_sched_list + * should be non-NULL and therefore we warn drivers + * not to do this and to fix their DRM calling order. + */ + pr_warn("%s: called with uninitialized scheduler\n", __func__); + } else if (num_sched_list) { + /* The "priority" of an entity cannot exceed the number of run-queues of a + * scheduler. Protect against num_rqs being 0, by converting to signed. Choose + * the lowest priority available. + */ + if (entity->priority >= sched_list[0]->num_rqs) { + dev_err(sched_list[0]->dev, "entity has out-of-bounds priority: %u. num_rqs: %u\n", + entity->priority, sched_list[0]->num_rqs); + entity->priority = max_t(s32, (s32) sched_list[0]->num_rqs - 1, + (s32) DRM_SCHED_PRIORITY_KERNEL); + } + entity->rq = sched_list[0]->sched_rq[entity->priority]; + } init_completion(&entity->entity_idle); /* We start in an idle state. */ complete_all(&entity->entity_idle); - spin_lock_init(&entity->rq_lock); + spin_lock_init(&entity->lock); spsc_queue_init(&entity->job_queue); atomic_set(&entity->fence_seq, 0); @@ -111,8 +133,10 @@ void drm_sched_entity_modify_sched(struct drm_sched_entity *entity, { WARN_ON(!num_sched_list || !sched_list); + spin_lock(&entity->lock); entity->sched_list = sched_list; entity->num_sched_list = num_sched_list; + spin_unlock(&entity->lock); } EXPORT_SYMBOL(drm_sched_entity_modify_sched); @@ -128,23 +152,65 @@ static bool drm_sched_entity_is_idle(struct drm_sched_entity *entity) return false; } -/* Return true if entity could provide a job. */ -bool drm_sched_entity_is_ready(struct drm_sched_entity *entity) +/** + * drm_sched_entity_error - return error of last scheduled job + * @entity: scheduler entity to check + * + * Opportunistically return the error of the last scheduled job. Result can + * change any time when new jobs are pushed to the hw. + */ +int drm_sched_entity_error(struct drm_sched_entity *entity) { - if (spsc_queue_peek(&entity->job_queue) == NULL) - return false; + struct dma_fence *fence; + int r; - if (READ_ONCE(entity->dependency)) - return false; + rcu_read_lock(); + fence = rcu_dereference(entity->last_scheduled); + r = fence ? fence->error : 0; + rcu_read_unlock(); - return true; + return r; } +EXPORT_SYMBOL(drm_sched_entity_error); + +static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f, + struct dma_fence_cb *cb); static void drm_sched_entity_kill_jobs_work(struct work_struct *wrk) { struct drm_sched_job *job = container_of(wrk, typeof(*job), work); + struct dma_fence *f; + unsigned long index; - drm_sched_fence_finished(job->s_fence); + /* Wait for all dependencies to avoid data corruptions */ + xa_for_each(&job->dependencies, index, f) { + struct drm_sched_fence *s_fence = to_drm_sched_fence(f); + + if (s_fence && f == &s_fence->scheduled) { + /* The dependencies array had a reference on the scheduled + * fence, and the finished fence refcount might have + * dropped to zero. Use dma_fence_get_rcu() so we get + * a NULL fence in that case. + */ + f = dma_fence_get_rcu(&s_fence->finished); + + /* Now that we have a reference on the finished fence, + * we can release the reference the dependencies array + * had on the scheduled fence. + */ + dma_fence_put(&s_fence->scheduled); + } + + xa_erase(&job->dependencies, index); + if (f && !dma_fence_add_callback(f, &job->finish_cb, + drm_sched_entity_kill_jobs_cb)) + return; + + dma_fence_put(f); + } + + drm_sched_fence_scheduled(job->s_fence, NULL); + drm_sched_fence_finished(job->s_fence, -ESRCH); WARN_ON(job->s_fence->parent); job->sched->ops->free_job(job); } @@ -155,21 +221,9 @@ static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f, { struct drm_sched_job *job = container_of(cb, struct drm_sched_job, finish_cb); - int r; dma_fence_put(f); - /* Wait for all dependencies to avoid data corruptions */ - while (!xa_empty(&job->dependencies)) { - f = xa_erase(&job->dependencies, job->last_dependency++); - r = dma_fence_add_callback(f, &job->finish_cb, - drm_sched_entity_kill_jobs_cb); - if (!r) - return; - - dma_fence_put(f); - } - INIT_WORK(&job->work, drm_sched_entity_kill_jobs_work); schedule_work(&job->work); } @@ -183,24 +237,31 @@ static void drm_sched_entity_kill(struct drm_sched_entity *entity) if (!entity->rq) return; - spin_lock(&entity->rq_lock); + spin_lock(&entity->lock); entity->stopped = true; drm_sched_rq_remove_entity(entity->rq, entity); - spin_unlock(&entity->rq_lock); + spin_unlock(&entity->lock); /* Make sure this entity is not used by the scheduler at the moment */ wait_for_completion(&entity->entity_idle); - prev = dma_fence_get(entity->last_scheduled); - while ((job = to_drm_sched_job(spsc_queue_pop(&entity->job_queue)))) { + /* The entity is guaranteed to not be used by the scheduler */ + prev = rcu_dereference_check(entity->last_scheduled, true); + dma_fence_get(prev); + while ((job = drm_sched_entity_queue_pop(entity))) { struct drm_sched_fence *s_fence = job->s_fence; - dma_fence_set_error(&s_fence->finished, -ESRCH); - dma_fence_get(&s_fence->finished); - if (!prev || dma_fence_add_callback(prev, &job->finish_cb, - drm_sched_entity_kill_jobs_cb)) + if (!prev || + dma_fence_add_callback(prev, &job->finish_cb, + drm_sched_entity_kill_jobs_cb)) { + /* + * Adding callback above failed. + * dma_fence_put() checks for NULL. + */ + dma_fence_put(prev); drm_sched_entity_kill_jobs_cb(NULL, &job->finish_cb); + } prev = &s_fence->finished; } @@ -229,9 +290,9 @@ long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout) return 0; sched = entity->rq->sched; - /** - * The client will not queue more IBs during this fini, consume existing - * queued IBs or discard them on SIGKILL + /* + * The client will not queue more jobs during this fini - consume + * existing queued ones, or discard them on SIGKILL. */ if (current->flags & PF_EXITING) { if (timeout) @@ -244,9 +305,9 @@ long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout) drm_sched_entity_is_idle(entity)); } - /* For killed process disable any more IBs enqueue right now */ + /* For a killed process disallow further enqueueing of jobs. */ last_user = cmpxchg(&entity->last_user, current->group_leader, NULL); - if ((!last_user || last_user == current->group_leader) && + if (last_user == current->group_leader && (current->flags & PF_EXITING) && (current->exit_code == SIGKILL)) drm_sched_entity_kill(entity); @@ -268,9 +329,9 @@ EXPORT_SYMBOL(drm_sched_entity_flush); void drm_sched_entity_fini(struct drm_sched_entity *entity) { /* - * If consumption of existing IBs wasn't completed. Forcefully remove - * them here. Also makes sure that the scheduler won't touch this entity - * any more. + * If consumption of existing jobs wasn't completed forcefully remove + * them. Also makes sure that the scheduler won't touch this entity any + * more. */ drm_sched_entity_kill(entity); @@ -280,8 +341,8 @@ void drm_sched_entity_fini(struct drm_sched_entity *entity) entity->dependency = NULL; } - dma_fence_put(entity->last_scheduled); - entity->last_scheduled = NULL; + dma_fence_put(rcu_dereference_check(entity->last_scheduled, true)); + RCU_INIT_POINTER(entity->last_scheduled, NULL); } EXPORT_SYMBOL(drm_sched_entity_fini); @@ -299,20 +360,9 @@ void drm_sched_entity_destroy(struct drm_sched_entity *entity) } EXPORT_SYMBOL(drm_sched_entity_destroy); -/* drm_sched_entity_clear_dep - callback to clear the entities dependency */ -static void drm_sched_entity_clear_dep(struct dma_fence *f, - struct dma_fence_cb *cb) -{ - struct drm_sched_entity *entity = - container_of(cb, struct drm_sched_entity, cb); - - entity->dependency = NULL; - dma_fence_put(f); -} - /* - * drm_sched_entity_clear_dep - callback to clear the entities dependency and - * wake up scheduler + * drm_sched_entity_wakeup - callback to clear the entity's dependency and + * wake up the scheduler */ static void drm_sched_entity_wakeup(struct dma_fence *f, struct dma_fence_cb *cb) @@ -320,7 +370,8 @@ static void drm_sched_entity_wakeup(struct dma_fence *f, struct drm_sched_entity *entity = container_of(cb, struct drm_sched_entity, cb); - drm_sched_entity_clear_dep(f, cb); + entity->dependency = NULL; + dma_fence_put(f); drm_sched_wakeup(entity->rq->sched); } @@ -330,14 +381,14 @@ static void drm_sched_entity_wakeup(struct dma_fence *f, * @entity: scheduler entity * @priority: scheduler priority * - * Update the priority of runqueus used for the entity. + * Update the priority of runqueues used for the entity. */ void drm_sched_entity_set_priority(struct drm_sched_entity *entity, enum drm_sched_priority priority) { - spin_lock(&entity->rq_lock); + spin_lock(&entity->lock); entity->priority = priority; - spin_unlock(&entity->rq_lock); + spin_unlock(&entity->lock); } EXPORT_SYMBOL(drm_sched_entity_set_priority); @@ -345,7 +396,8 @@ EXPORT_SYMBOL(drm_sched_entity_set_priority); * Add a callback to the current dependency of the entity to wake up the * scheduler when the entity becomes available. */ -static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity) +static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity, + struct drm_sched_job *sched_job) { struct drm_gpu_scheduler *sched = entity->rq->sched; struct dma_fence *fence = entity->dependency; @@ -363,7 +415,7 @@ static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity) } s_fence = to_drm_sched_fence(fence); - if (s_fence && s_fence->sched == sched && + if (!fence->error && s_fence && s_fence->sched == sched && !test_bit(DRM_SCHED_FENCE_DONT_PIPELINE, &fence->flags)) { /* @@ -373,15 +425,12 @@ static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity) fence = dma_fence_get(&s_fence->scheduled); dma_fence_put(entity->dependency); entity->dependency = fence; - if (!dma_fence_add_callback(fence, &entity->cb, - drm_sched_entity_clear_dep)) - return true; - - /* Ignore it when it is already scheduled */ - dma_fence_put(fence); - return false; } + if (trace_drm_sched_job_unschedulable_enabled() && + !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &entity->dependency->flags)) + trace_drm_sched_job_unschedulable(sched_job, entity->dependency); + if (!dma_fence_add_callback(entity->dependency, &entity->cb, drm_sched_entity_wakeup)) return true; @@ -394,8 +443,17 @@ static struct dma_fence * drm_sched_job_dependency(struct drm_sched_job *job, struct drm_sched_entity *entity) { - if (!xa_empty(&job->dependencies)) - return xa_erase(&job->dependencies, job->last_dependency++); + struct dma_fence *f; + + /* We keep the fence around, so we can iterate over all dependencies + * in drm_sched_entity_kill_jobs_cb() to ensure all deps are signaled + * before killing the job. + */ + f = xa_load(&job->dependencies, job->last_dependency); + if (f) { + job->last_dependency++; + return dma_fence_get(f); + } if (job->sched->ops->prepare_job) return job->sched->ops->prepare_job(job, entity); @@ -407,15 +465,13 @@ struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity) { struct drm_sched_job *sched_job; - sched_job = to_drm_sched_job(spsc_queue_peek(&entity->job_queue)); + sched_job = drm_sched_entity_queue_peek(entity); if (!sched_job) return NULL; while ((entity->dependency = drm_sched_job_dependency(sched_job, entity))) { - trace_drm_sched_job_wait_dep(sched_job, entity->dependency); - - if (drm_sched_entity_add_dependency_cb(entity)) + if (drm_sched_entity_add_dependency_cb(entity, sched_job)) return NULL; } @@ -423,9 +479,9 @@ struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity) if (entity->guilty && atomic_read(entity->guilty)) dma_fence_set_error(&sched_job->s_fence->finished, -ECANCELED); - dma_fence_put(entity->last_scheduled); - - entity->last_scheduled = dma_fence_get(&sched_job->s_fence->finished); + dma_fence_put(rcu_dereference_check(entity->last_scheduled, true)); + rcu_assign_pointer(entity->last_scheduled, + dma_fence_get(&sched_job->s_fence->finished)); /* * If the queue is empty we allow drm_sched_entity_select_rq() to @@ -443,11 +499,26 @@ struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity) if (drm_sched_policy == DRM_SCHED_POLICY_FIFO) { struct drm_sched_job *next; - next = to_drm_sched_job(spsc_queue_peek(&entity->job_queue)); - if (next) - drm_sched_rq_update_fifo(entity, next->submit_ts); + next = drm_sched_entity_queue_peek(entity); + if (next) { + struct drm_sched_rq *rq; + + spin_lock(&entity->lock); + rq = entity->rq; + spin_lock(&rq->lock); + drm_sched_rq_update_fifo_locked(entity, rq, + next->submit_ts); + spin_unlock(&rq->lock); + spin_unlock(&entity->lock); + } } + /* Jobs and entities might have different lifecycles. Since we're + * removing the job from the entities queue, set the jobs entity pointer + * to NULL to prevent any future access of the entity through this job. + */ + sched_job->entity = NULL; + return sched_job; } @@ -466,30 +537,31 @@ void drm_sched_entity_select_rq(struct drm_sched_entity *entity) return; /* - * Only when the queue is empty are we guaranteed that the scheduler - * thread cannot change ->last_scheduled. To enforce ordering we need - * a read barrier here. See drm_sched_entity_pop_job() for the other - * side. + * Only when the queue is empty are we guaranteed that + * drm_sched_run_job_work() cannot change entity->last_scheduled. To + * enforce ordering we need a read barrier here. See + * drm_sched_entity_pop_job() for the other side. */ smp_rmb(); - fence = entity->last_scheduled; + fence = rcu_dereference_check(entity->last_scheduled, true); /* stay on the same engine if the previous job hasn't finished */ if (fence && !dma_fence_is_signaled(fence)) return; - spin_lock(&entity->rq_lock); + spin_lock(&entity->lock); sched = drm_sched_pick_best(entity->sched_list, entity->num_sched_list); - rq = sched ? &sched->sched_rq[entity->priority] : NULL; + rq = sched ? sched->sched_rq[entity->priority] : NULL; if (rq != entity->rq) { drm_sched_rq_remove_entity(entity->rq, entity); entity->rq = rq; } - spin_unlock(&entity->rq_lock); if (entity->num_sched_list == 1) entity->sched_list = NULL; + + spin_unlock(&entity->lock); } /** @@ -500,38 +572,60 @@ void drm_sched_entity_select_rq(struct drm_sched_entity *entity) * fence sequence number this function should be called with drm_sched_job_arm() * under common lock for the struct drm_sched_entity that was set up for * @sched_job in drm_sched_job_init(). - * - * Returns 0 for success, negative error code otherwise. */ void drm_sched_entity_push_job(struct drm_sched_job *sched_job) { struct drm_sched_entity *entity = sched_job->entity; bool first; + ktime_t submit_ts; + + trace_drm_sched_job_queue(sched_job, entity); - trace_drm_sched_job(sched_job, entity); + if (trace_drm_sched_job_add_dep_enabled()) { + struct dma_fence *entry; + unsigned long index; + + xa_for_each(&sched_job->dependencies, index, entry) + trace_drm_sched_job_add_dep(sched_job, entry); + } atomic_inc(entity->rq->sched->score); WRITE_ONCE(entity->last_user, current->group_leader); + + /* + * After the sched_job is pushed into the entity queue, it may be + * completed and freed up at any time. We can no longer access it. + * Make sure to set the submit_ts first, to avoid a race. + */ + sched_job->submit_ts = submit_ts = ktime_get(); first = spsc_queue_push(&entity->job_queue, &sched_job->queue_node); - sched_job->submit_ts = ktime_get(); /* first job wakes up scheduler */ if (first) { + struct drm_gpu_scheduler *sched; + struct drm_sched_rq *rq; + /* Add the entity to the run queue */ - spin_lock(&entity->rq_lock); + spin_lock(&entity->lock); if (entity->stopped) { - spin_unlock(&entity->rq_lock); + spin_unlock(&entity->lock); DRM_ERROR("Trying to push to a killed entity\n"); return; } - drm_sched_rq_add_entity(entity->rq, entity); - spin_unlock(&entity->rq_lock); + rq = entity->rq; + sched = rq->sched; + + spin_lock(&rq->lock); + drm_sched_rq_add_entity(rq, entity); if (drm_sched_policy == DRM_SCHED_POLICY_FIFO) - drm_sched_rq_update_fifo(entity, sched_job->submit_ts); + drm_sched_rq_update_fifo_locked(entity, rq, submit_ts); + + spin_unlock(&rq->lock); + spin_unlock(&entity->lock); - drm_sched_wakeup(entity->rq->sched); + drm_sched_wakeup(sched); } } EXPORT_SYMBOL(drm_sched_entity_push_job); |
