summaryrefslogtreecommitdiff
path: root/include/drm/gpu_scheduler.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/drm/gpu_scheduler.h')
-rw-r--r--include/drm/gpu_scheduler.h23
1 files changed, 22 insertions, 1 deletions
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index aa90ed1f1b2b..88ae7f331bb1 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -253,6 +253,24 @@ struct drm_sched_backend_ops {
* 5. Restart the scheduler using drm_sched_start(). At that point, new
* jobs can be queued, and the scheduler thread is unblocked
*
+ * Note that some GPUs have distinct hardware queues but need to reset
+ * the GPU globally, which requires extra synchronization between the
+ * timeout handler of the different &drm_gpu_scheduler. One way to
+ * achieve this synchronization is to create an ordered workqueue
+ * (using alloc_ordered_workqueue()) at the driver level, and pass this
+ * queue to drm_sched_init(), to guarantee that timeout handlers are
+ * executed sequentially. The above workflow needs to be slightly
+ * adjusted in that case:
+ *
+ * 1. Stop all schedulers impacted by the reset using drm_sched_stop()
+ * 2. Try to gracefully stop non-faulty jobs on all queues impacted by
+ * the reset (optional)
+ * 3. Issue a GPU reset on all faulty queues (driver-specific)
+ * 4. Re-submit jobs on all schedulers impacted by the reset using
+ * drm_sched_resubmit_jobs()
+ * 5. Restart all schedulers that were stopped in step #1 using
+ * drm_sched_start()
+ *
* Return DRM_GPU_SCHED_STAT_NOMINAL, when all is normal,
* and the underlying driver has started or completed recovery.
*
@@ -283,6 +301,7 @@ struct drm_sched_backend_ops {
* finished.
* @hw_rq_count: the number of jobs currently in the hardware queue.
* @job_id_count: used to assign unique id to the each job.
+ * @timeout_wq: workqueue used to queue @work_tdr
* @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the
* timeout interval is over.
* @thread: the kthread on which the scheduler which run.
@@ -307,6 +326,7 @@ struct drm_gpu_scheduler {
wait_queue_head_t job_scheduled;
atomic_t hw_rq_count;
atomic64_t job_id_count;
+ struct workqueue_struct *timeout_wq;
struct delayed_work work_tdr;
struct task_struct *thread;
struct list_head pending_list;
@@ -320,7 +340,8 @@ struct drm_gpu_scheduler {
int drm_sched_init(struct drm_gpu_scheduler *sched,
const struct drm_sched_backend_ops *ops,
- uint32_t hw_submission, unsigned hang_limit, long timeout,
+ uint32_t hw_submission, unsigned hang_limit,
+ long timeout, struct workqueue_struct *timeout_wq,
atomic_t *score, const char *name);
void drm_sched_fini(struct drm_gpu_scheduler *sched);