summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/scheduler
diff options
context:
space:
mode:
authorMaxime Ripard <mripard@kernel.org>2023-07-11 09:23:20 +0200
committerMaxime Ripard <mripard@kernel.org>2023-07-11 09:23:20 +0200
commit2f98e686ef59b5d19af5847d755798e2031bee3a (patch)
tree7b47919242853d088decf898ca79d6cda0d49381 /drivers/gpu/drm/scheduler
parenta2848d08742c8e8494675892c02c0d22acbe3cf8 (diff)
parent06c2afb862f9da8dc5efa4b6076a0e48c3fbaaa5 (diff)
Merge v6.5-rc1 into drm-misc-fixes
Boris needs 6.5-rc1 in drm-misc-fixes to prevent a conflict. Signed-off-by: Maxime Ripard <mripard@kernel.org>
Diffstat (limited to 'drivers/gpu/drm/scheduler')
-rw-r--r--drivers/gpu/drm/scheduler/sched_entity.c55
-rw-r--r--drivers/gpu/drm/scheduler/sched_fence.c4
-rw-r--r--drivers/gpu/drm/scheduler/sched_main.c37
3 files changed, 63 insertions, 33 deletions
diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c
index 42021d1f7e01..a42763e1429d 100644
--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -72,7 +72,7 @@ int drm_sched_entity_init(struct drm_sched_entity *entity,
entity->num_sched_list = num_sched_list;
entity->priority = priority;
entity->sched_list = num_sched_list > 1 ? sched_list : NULL;
- entity->last_scheduled = NULL;
+ RCU_INIT_POINTER(entity->last_scheduled, NULL);
RB_CLEAR_NODE(&entity->rb_tree_node);
if(num_sched_list)
@@ -140,11 +140,32 @@ bool drm_sched_entity_is_ready(struct drm_sched_entity *entity)
return true;
}
+/**
+ * drm_sched_entity_error - return error of last scheduled job
+ * @entity: scheduler entity to check
+ *
+ * Opportunistically return the error of the last scheduled job. Result can
+ * change any time when new jobs are pushed to the hw.
+ */
+int drm_sched_entity_error(struct drm_sched_entity *entity)
+{
+ struct dma_fence *fence;
+ int r;
+
+ rcu_read_lock();
+ fence = rcu_dereference(entity->last_scheduled);
+ r = fence ? fence->error : 0;
+ rcu_read_unlock();
+
+ return r;
+}
+EXPORT_SYMBOL(drm_sched_entity_error);
+
static void drm_sched_entity_kill_jobs_work(struct work_struct *wrk)
{
struct drm_sched_job *job = container_of(wrk, typeof(*job), work);
- drm_sched_fence_finished(job->s_fence);
+ drm_sched_fence_finished(job->s_fence, -ESRCH);
WARN_ON(job->s_fence->parent);
job->sched->ops->free_job(job);
}
@@ -207,12 +228,12 @@ static void drm_sched_entity_kill(struct drm_sched_entity *entity)
/* Make sure this entity is not used by the scheduler at the moment */
wait_for_completion(&entity->entity_idle);
- prev = dma_fence_get(entity->last_scheduled);
+ /* The entity is guaranteed to not be used by the scheduler */
+ prev = rcu_dereference_check(entity->last_scheduled, true);
+ dma_fence_get(prev);
while ((job = to_drm_sched_job(spsc_queue_pop(&entity->job_queue)))) {
struct drm_sched_fence *s_fence = job->s_fence;
- dma_fence_set_error(&s_fence->finished, -ESRCH);
-
dma_fence_get(&s_fence->finished);
if (!prev || dma_fence_add_callback(prev, &job->finish_cb,
drm_sched_entity_kill_jobs_cb))
@@ -296,8 +317,8 @@ void drm_sched_entity_fini(struct drm_sched_entity *entity)
entity->dependency = NULL;
}
- dma_fence_put(entity->last_scheduled);
- entity->last_scheduled = NULL;
+ dma_fence_put(rcu_dereference_check(entity->last_scheduled, true));
+ RCU_INIT_POINTER(entity->last_scheduled, NULL);
}
EXPORT_SYMBOL(drm_sched_entity_fini);
@@ -337,7 +358,7 @@ static void drm_sched_entity_wakeup(struct dma_fence *f,
container_of(cb, struct drm_sched_entity, cb);
drm_sched_entity_clear_dep(f, cb);
- drm_sched_wakeup(entity->rq->sched);
+ drm_sched_wakeup_if_can_queue(entity->rq->sched);
}
/**
@@ -379,7 +400,7 @@ static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity)
}
s_fence = to_drm_sched_fence(fence);
- if (s_fence && s_fence->sched == sched &&
+ if (!fence->error && s_fence && s_fence->sched == sched &&
!test_bit(DRM_SCHED_FENCE_DONT_PIPELINE, &fence->flags)) {
/*
@@ -448,9 +469,9 @@ struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity)
if (entity->guilty && atomic_read(entity->guilty))
dma_fence_set_error(&sched_job->s_fence->finished, -ECANCELED);
- dma_fence_put(entity->last_scheduled);
-
- entity->last_scheduled = dma_fence_get(&sched_job->s_fence->finished);
+ dma_fence_put(rcu_dereference_check(entity->last_scheduled, true));
+ rcu_assign_pointer(entity->last_scheduled,
+ dma_fence_get(&sched_job->s_fence->finished));
/*
* If the queue is empty we allow drm_sched_entity_select_rq() to
@@ -473,6 +494,12 @@ struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity)
drm_sched_rq_update_fifo(entity, next->submit_ts);
}
+ /* Jobs and entities might have different lifecycles. Since we're
+ * removing the job from the entities queue, set the jobs entity pointer
+ * to NULL to prevent any future access of the entity through this job.
+ */
+ sched_job->entity = NULL;
+
return sched_job;
}
@@ -498,7 +525,7 @@ void drm_sched_entity_select_rq(struct drm_sched_entity *entity)
*/
smp_rmb();
- fence = entity->last_scheduled;
+ fence = rcu_dereference_check(entity->last_scheduled, true);
/* stay on the same engine if the previous job hasn't finished */
if (fence && !dma_fence_is_signaled(fence))
@@ -563,7 +590,7 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job)
if (drm_sched_policy == DRM_SCHED_POLICY_FIFO)
drm_sched_rq_update_fifo(entity, submit_ts);
- drm_sched_wakeup(entity->rq->sched);
+ drm_sched_wakeup_if_can_queue(entity->rq->sched);
}
}
EXPORT_SYMBOL(drm_sched_entity_push_job);
diff --git a/drivers/gpu/drm/scheduler/sched_fence.c b/drivers/gpu/drm/scheduler/sched_fence.c
index b6e70ddb4ee5..06cedfe4b486 100644
--- a/drivers/gpu/drm/scheduler/sched_fence.c
+++ b/drivers/gpu/drm/scheduler/sched_fence.c
@@ -77,8 +77,10 @@ void drm_sched_fence_scheduled(struct drm_sched_fence *fence,
dma_fence_signal(&fence->scheduled);
}
-void drm_sched_fence_finished(struct drm_sched_fence *fence)
+void drm_sched_fence_finished(struct drm_sched_fence *fence, int result)
{
+ if (result)
+ dma_fence_set_error(&fence->finished, result);
dma_fence_signal(&fence->finished);
}
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index eff0a7f42f69..506371c42745 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -42,6 +42,10 @@
* the hardware.
*
* The jobs in a entity are always scheduled in the order that they were pushed.
+ *
+ * Note that once a job was taken from the entities queue and pushed to the
+ * hardware, i.e. the pending queue, the entity must not be referenced anymore
+ * through the jobs entity pointer.
*/
#include <linux/kthread.h>
@@ -258,7 +262,7 @@ drm_sched_rq_select_entity_fifo(struct drm_sched_rq *rq)
*
* Finish the job's fence and wake up the worker thread.
*/
-static void drm_sched_job_done(struct drm_sched_job *s_job)
+static void drm_sched_job_done(struct drm_sched_job *s_job, int result)
{
struct drm_sched_fence *s_fence = s_job->s_fence;
struct drm_gpu_scheduler *sched = s_fence->sched;
@@ -269,7 +273,7 @@ static void drm_sched_job_done(struct drm_sched_job *s_job)
trace_drm_sched_process_job(s_fence);
dma_fence_get(&s_fence->finished);
- drm_sched_fence_finished(s_fence);
+ drm_sched_fence_finished(s_fence, result);
dma_fence_put(&s_fence->finished);
wake_up_interruptible(&sched->wake_up_worker);
}
@@ -283,7 +287,7 @@ static void drm_sched_job_done_cb(struct dma_fence *f, struct dma_fence_cb *cb)
{
struct drm_sched_job *s_job = container_of(cb, struct drm_sched_job, cb);
- drm_sched_job_done(s_job);
+ drm_sched_job_done(s_job, f->error);
}
/**
@@ -534,12 +538,12 @@ void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery)
r = dma_fence_add_callback(fence, &s_job->cb,
drm_sched_job_done_cb);
if (r == -ENOENT)
- drm_sched_job_done(s_job);
+ drm_sched_job_done(s_job, fence->error);
else if (r)
DRM_DEV_ERROR(sched->dev, "fence add callback failed (%d)\n",
r);
} else
- drm_sched_job_done(s_job);
+ drm_sched_job_done(s_job, -ECANCELED);
}
if (full_recovery) {
@@ -844,27 +848,26 @@ void drm_sched_job_cleanup(struct drm_sched_job *job)
EXPORT_SYMBOL(drm_sched_job_cleanup);
/**
- * drm_sched_ready - is the scheduler ready
- *
+ * drm_sched_can_queue -- Can we queue more to the hardware?
* @sched: scheduler instance
*
* Return true if we can push more jobs to the hw, otherwise false.
*/
-static bool drm_sched_ready(struct drm_gpu_scheduler *sched)
+static bool drm_sched_can_queue(struct drm_gpu_scheduler *sched)
{
return atomic_read(&sched->hw_rq_count) <
sched->hw_submission_limit;
}
/**
- * drm_sched_wakeup - Wake up the scheduler when it is ready
- *
+ * drm_sched_wakeup_if_can_queue - Wake up the scheduler
* @sched: scheduler instance
*
+ * Wake up the scheduler if we can queue jobs.
*/
-void drm_sched_wakeup(struct drm_gpu_scheduler *sched)
+void drm_sched_wakeup_if_can_queue(struct drm_gpu_scheduler *sched)
{
- if (drm_sched_ready(sched))
+ if (drm_sched_can_queue(sched))
wake_up_interruptible(&sched->wake_up_worker);
}
@@ -881,7 +884,7 @@ drm_sched_select_entity(struct drm_gpu_scheduler *sched)
struct drm_sched_entity *entity;
int i;
- if (!drm_sched_ready(sched))
+ if (!drm_sched_can_queue(sched))
return NULL;
/* Kernel run queue has higher priority than normal run queue*/
@@ -1049,15 +1052,13 @@ static int drm_sched_main(void *param)
r = dma_fence_add_callback(fence, &sched_job->cb,
drm_sched_job_done_cb);
if (r == -ENOENT)
- drm_sched_job_done(sched_job);
+ drm_sched_job_done(sched_job, fence->error);
else if (r)
DRM_DEV_ERROR(sched->dev, "fence add callback failed (%d)\n",
r);
} else {
- if (IS_ERR(fence))
- dma_fence_set_error(&s_fence->finished, PTR_ERR(fence));
-
- drm_sched_job_done(sched_job);
+ drm_sched_job_done(sched_job, IS_ERR(fence) ?
+ PTR_ERR(fence) : 0);
}
wake_up(&sched->job_scheduled);