summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/drm/i915/i915_debugfs.c7
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c3
-rw-r--r--drivers/gpu/drm/i915/i915_gem_request.c5
-rw-r--r--drivers/gpu/drm/i915/i915_gem_request.h8
-rw-r--r--drivers/gpu/drm/i915/i915_guc_submission.c1
-rw-r--r--drivers/gpu/drm/i915/intel_engine_cs.c3
-rw-r--r--drivers/gpu/drm/i915/intel_lrc.c151
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.h3
8 files changed, 165 insertions, 16 deletions
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 5832758fccac..93e9a0e8c0f5 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -631,8 +631,9 @@ static void print_request(struct seq_file *m,
struct drm_i915_gem_request *rq,
const char *prefix)
{
- seq_printf(m, "%s%x [%x:%x] @ %d: %s\n", prefix,
+ seq_printf(m, "%s%x [%x:%x] prio=%d @ %dms: %s\n", prefix,
rq->global_seqno, rq->ctx->hw_id, rq->fence.seqno,
+ rq->priotree.priority,
jiffies_to_msecs(jiffies - rq->emitted_jiffies),
rq->timeline->common->name);
}
@@ -3218,6 +3219,7 @@ static int i915_engine_info(struct seq_file *m, void *unused)
if (i915.enable_execlists) {
u32 ptr, read, write;
+ struct rb_node *rb;
seq_printf(m, "\tExeclist status: 0x%08x %08x\n",
I915_READ(RING_EXECLIST_STATUS_LO(engine)),
@@ -3257,7 +3259,8 @@ static int i915_engine_info(struct seq_file *m, void *unused)
rcu_read_unlock();
spin_lock_irq(&engine->timeline->lock);
- list_for_each_entry(rq, &engine->execlist_queue, execlist_link) {
+ for (rb = engine->execlist_first; rb; rb = rb_next(rb)) {
+ rq = rb_entry(rb, typeof(*rq), priotree.node);
print_request(m, rq, "\t\tQ ");
}
spin_unlock_irq(&engine->timeline->lock);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index faecce3c4d21..7a43f2a73552 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2721,10 +2721,11 @@ static void i915_gem_cleanup_engine(struct intel_engine_cs *engine)
spin_lock_irqsave(&engine->timeline->lock, flags);
- INIT_LIST_HEAD(&engine->execlist_queue);
i915_gem_request_put(engine->execlist_port[0].request);
i915_gem_request_put(engine->execlist_port[1].request);
memset(engine->execlist_port, 0, sizeof(engine->execlist_port));
+ engine->execlist_queue = RB_ROOT;
+ engine->execlist_first = NULL;
spin_unlock_irqrestore(&engine->timeline->lock, flags);
}
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 78c87d94d205..13574a1e29b1 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -132,6 +132,7 @@ __i915_priotree_add_dependency(struct i915_priotree *pt,
struct i915_dependency *dep,
unsigned long flags)
{
+ INIT_LIST_HEAD(&dep->dfs_link);
list_add(&dep->wait_link, &signal->waiters_list);
list_add(&dep->signal_link, &pt->signalers_list);
dep->signaler = signal;
@@ -158,6 +159,8 @@ i915_priotree_fini(struct drm_i915_private *i915, struct i915_priotree *pt)
{
struct i915_dependency *dep, *next;
+ GEM_BUG_ON(!RB_EMPTY_NODE(&pt->node));
+
/* Everyone we depended upon (the fences we wait to be signaled)
* should retire before us and remove themselves from our list.
* However, retirement is run independently on each timeline and
@@ -182,6 +185,8 @@ i915_priotree_init(struct i915_priotree *pt)
{
INIT_LIST_HEAD(&pt->signalers_list);
INIT_LIST_HEAD(&pt->waiters_list);
+ RB_CLEAR_NODE(&pt->node);
+ pt->priority = INT_MIN;
}
void i915_gem_retire_noop(struct i915_gem_active *active,
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 943c39d2a62a..e2b077df2da0 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -48,6 +48,7 @@ struct i915_dependency {
struct i915_priotree *signaler;
struct list_head signal_link;
struct list_head wait_link;
+ struct list_head dfs_link;
unsigned long flags;
#define I915_DEPENDENCY_ALLOC BIT(0)
};
@@ -64,6 +65,10 @@ struct i915_dependency {
struct i915_priotree {
struct list_head signalers_list; /* those before us, we depend upon */
struct list_head waiters_list; /* those after us, they depend upon us */
+ struct rb_node node;
+ int priority;
+#define I915_PRIORITY_MAX 1024
+#define I915_PRIORITY_MIN (-I915_PRIORITY_MAX)
};
/**
@@ -194,9 +199,6 @@ struct drm_i915_gem_request {
struct drm_i915_file_private *file_priv;
/** file_priv list entry for this request */
struct list_head client_list;
-
- /** Link in the execlist submission queue, guarded by execlist_lock. */
- struct list_head execlist_link;
};
extern const struct dma_fence_ops i915_fence_ops;
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 942f5000d372..4462112725ef 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -1532,6 +1532,7 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv)
/* Take over from manual control of ELSP (execlists) */
for_each_engine(engine, dev_priv, id) {
engine->submit_request = i915_guc_submit;
+ engine->schedule = NULL;
/* Replay the current set of previously submitted requests */
list_for_each_entry(request,
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index c9171a058478..3da4d466e332 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -239,7 +239,8 @@ static void intel_engine_init_timeline(struct intel_engine_cs *engine)
*/
void intel_engine_setup_common(struct intel_engine_cs *engine)
{
- INIT_LIST_HEAD(&engine->execlist_queue);
+ engine->execlist_queue = RB_ROOT;
+ engine->execlist_first = NULL;
intel_engine_init_timeline(engine);
intel_engine_init_hangcheck(engine);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index d1aea7462515..f50feaa7116a 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -432,9 +432,10 @@ static bool can_merge_ctx(const struct i915_gem_context *prev,
static void execlists_dequeue(struct intel_engine_cs *engine)
{
- struct drm_i915_gem_request *cursor, *last;
+ struct drm_i915_gem_request *last;
struct execlist_port *port = engine->execlist_port;
unsigned long flags;
+ struct rb_node *rb;
bool submit = false;
last = port->request;
@@ -471,7 +472,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
*/
spin_lock_irqsave(&engine->timeline->lock, flags);
- list_for_each_entry(cursor, &engine->execlist_queue, execlist_link) {
+ rb = engine->execlist_first;
+ while (rb) {
+ struct drm_i915_gem_request *cursor =
+ rb_entry(rb, typeof(*cursor), priotree.node);
+
/* Can we combine this request with the current port? It has to
* be the same context/ringbuffer and not have any exceptions
* (e.g. GVT saying never to combine contexts).
@@ -503,6 +508,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
port++;
}
+ rb = rb_next(rb);
+ rb_erase(&cursor->priotree.node, &engine->execlist_queue);
+ RB_CLEAR_NODE(&cursor->priotree.node);
+ cursor->priotree.priority = INT_MAX;
+
/* We keep the previous context alive until we retire the
* following request. This ensures that any the context object
* is still pinned for any residual writes the HW makes into it
@@ -517,11 +527,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
submit = true;
}
if (submit) {
- /* Decouple all the requests submitted from the queue */
- engine->execlist_queue.next = &cursor->execlist_link;
- cursor->execlist_link.prev = &engine->execlist_queue;
-
i915_gem_request_assign(&port->request, last);
+ engine->execlist_first = rb;
}
spin_unlock_irqrestore(&engine->timeline->lock, flags);
@@ -626,6 +633,32 @@ static void intel_lrc_irq_handler(unsigned long data)
intel_uncore_forcewake_put(dev_priv, engine->fw_domains);
}
+static bool insert_request(struct i915_priotree *pt, struct rb_root *root)
+{
+ struct rb_node **p, *rb;
+ bool first = true;
+
+ /* most positive priority is scheduled first, equal priorities fifo */
+ rb = NULL;
+ p = &root->rb_node;
+ while (*p) {
+ struct i915_priotree *pos;
+
+ rb = *p;
+ pos = rb_entry(rb, typeof(*pos), node);
+ if (pt->priority > pos->priority) {
+ p = &rb->rb_left;
+ } else {
+ p = &rb->rb_right;
+ first = false;
+ }
+ }
+ rb_link_node(&pt->node, rb, p);
+ rb_insert_color(&pt->node, root);
+
+ return first;
+}
+
static void execlists_submit_request(struct drm_i915_gem_request *request)
{
struct intel_engine_cs *engine = request->engine;
@@ -634,13 +667,112 @@ static void execlists_submit_request(struct drm_i915_gem_request *request)
/* Will be called from irq-context when using foreign fences. */
spin_lock_irqsave(&engine->timeline->lock, flags);
- list_add_tail(&request->execlist_link, &engine->execlist_queue);
+ if (insert_request(&request->priotree, &engine->execlist_queue))
+ engine->execlist_first = &request->priotree.node;
if (execlists_elsp_idle(engine))
tasklet_hi_schedule(&engine->irq_tasklet);
spin_unlock_irqrestore(&engine->timeline->lock, flags);
}
+static struct intel_engine_cs *
+pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked)
+{
+ struct intel_engine_cs *engine;
+
+ engine = container_of(pt,
+ struct drm_i915_gem_request,
+ priotree)->engine;
+ if (engine != locked) {
+ if (locked)
+ spin_unlock_irq(&locked->timeline->lock);
+ spin_lock_irq(&engine->timeline->lock);
+ }
+
+ return engine;
+}
+
+static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
+{
+ struct intel_engine_cs *engine = NULL;
+ struct i915_dependency *dep, *p;
+ struct i915_dependency stack;
+ LIST_HEAD(dfs);
+
+ if (prio <= READ_ONCE(request->priotree.priority))
+ return;
+
+ /* Need BKL in order to use the temporary link inside i915_dependency */
+ lockdep_assert_held(&request->i915->drm.struct_mutex);
+
+ stack.signaler = &request->priotree;
+ list_add(&stack.dfs_link, &dfs);
+
+ /* Recursively bump all dependent priorities to match the new request.
+ *
+ * A naive approach would be to use recursion:
+ * static void update_priorities(struct i915_priotree *pt, prio) {
+ * list_for_each_entry(dep, &pt->signalers_list, signal_link)
+ * update_priorities(dep->signal, prio)
+ * insert_request(pt);
+ * }
+ * but that may have unlimited recursion depth and so runs a very
+ * real risk of overunning the kernel stack. Instead, we build
+ * a flat list of all dependencies starting with the current request.
+ * As we walk the list of dependencies, we add all of its dependencies
+ * to the end of the list (this may include an already visited
+ * request) and continue to walk onwards onto the new dependencies. The
+ * end result is a topological list of requests in reverse order, the
+ * last element in the list is the request we must execute first.
+ */
+ list_for_each_entry_safe(dep, p, &dfs, dfs_link) {
+ struct i915_priotree *pt = dep->signaler;
+
+ list_for_each_entry(p, &pt->signalers_list, signal_link)
+ if (prio > READ_ONCE(p->signaler->priority))
+ list_move_tail(&p->dfs_link, &dfs);
+
+ p = list_next_entry(dep, dfs_link);
+ if (!RB_EMPTY_NODE(&pt->node))
+ continue;
+
+ engine = pt_lock_engine(pt, engine);
+
+ /* If it is not already in the rbtree, we can update the
+ * priority inplace and skip over it (and its dependencies)
+ * if it is referenced *again* as we descend the dfs.
+ */
+ if (prio > pt->priority && RB_EMPTY_NODE(&pt->node)) {
+ pt->priority = prio;
+ list_del_init(&dep->dfs_link);
+ }
+ }
+
+ /* Fifo and depth-first replacement ensure our deps execute before us */
+ list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) {
+ struct i915_priotree *pt = dep->signaler;
+
+ INIT_LIST_HEAD(&dep->dfs_link);
+
+ engine = pt_lock_engine(pt, engine);
+
+ if (prio <= pt->priority)
+ continue;
+
+ GEM_BUG_ON(RB_EMPTY_NODE(&pt->node));
+
+ pt->priority = prio;
+ rb_erase(&pt->node, &engine->execlist_queue);
+ if (insert_request(pt, &engine->execlist_queue))
+ engine->execlist_first = &pt->node;
+ }
+
+ if (engine)
+ spin_unlock_irq(&engine->timeline->lock);
+
+ /* XXX Do we need to preempt to make room for us and our deps? */
+}
+
int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request)
{
struct intel_engine_cs *engine = request->engine;
@@ -1677,8 +1809,10 @@ void intel_execlists_enable_submission(struct drm_i915_private *dev_priv)
struct intel_engine_cs *engine;
enum intel_engine_id id;
- for_each_engine(engine, dev_priv, id)
+ for_each_engine(engine, dev_priv, id) {
engine->submit_request = execlists_submit_request;
+ engine->schedule = execlists_schedule;
+ }
}
static void
@@ -1691,6 +1825,7 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
engine->emit_breadcrumb = gen8_emit_breadcrumb;
engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_sz;
engine->submit_request = execlists_submit_request;
+ engine->schedule = execlists_schedule;
engine->irq_enable = gen8_logical_ring_enable_irq;
engine->irq_disable = gen8_logical_ring_disable_irq;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index b9583941eb6b..3466b4e77e7c 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -348,7 +348,8 @@ struct intel_engine_cs {
struct drm_i915_gem_request *request;
unsigned int count;
} execlist_port[2];
- struct list_head execlist_queue;
+ struct rb_root execlist_queue;
+ struct rb_node *execlist_first;
unsigned int fw_domains;
bool disable_lite_restore_wa;
bool preempt_wa;