diff options
Diffstat (limited to 'drivers/gpu/drm/i915/gt/intel_lrc.c')
-rw-r--r-- | drivers/gpu/drm/i915/gt/intel_lrc.c | 680 |
1 files changed, 659 insertions, 21 deletions
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index f263a8374273..affa5e2dfce1 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -164,6 +164,42 @@ #define WA_TAIL_DWORDS 2 #define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS) +struct virtual_engine { + struct intel_engine_cs base; + struct intel_context context; + + /* + * We allow only a single request through the virtual engine at a time + * (each request in the timeline waits for the completion fence of + * the previous before being submitted). By restricting ourselves to + * only submitting a single request, each request is placed on to a + * physical to maximise load spreading (by virtue of the late greedy + * scheduling -- each real engine takes the next available request + * upon idling). + */ + struct i915_request *request; + + /* + * We keep a rbtree of available virtual engines inside each physical + * engine, sorted by priority. Here we preallocate the nodes we need + * for the virtual engine, indexed by physical_engine->id. + */ + struct ve_node { + struct rb_node rb; + int prio; + } nodes[I915_NUM_ENGINES]; + + /* And finally, which physical engines this virtual engine maps onto. */ + unsigned int num_siblings; + struct intel_engine_cs *siblings[0]; +}; + +static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine) +{ + GEM_BUG_ON(!intel_engine_is_virtual(engine)); + return container_of(engine, struct virtual_engine, base); +} + static int execlists_context_deferred_alloc(struct intel_context *ce, struct intel_engine_cs *engine); static void execlists_init_reg_state(u32 *reg_state, @@ -216,7 +252,8 @@ static int queue_prio(const struct intel_engine_execlists *execlists) } static inline bool need_preempt(const struct intel_engine_cs *engine, - const struct i915_request *rq) + const struct i915_request *rq, + struct rb_node *rb) { int last_prio; @@ -251,6 +288,25 @@ static inline bool need_preempt(const struct intel_engine_cs *engine, rq_prio(list_next_entry(rq, link)) > last_prio) return true; + if (rb) { + struct virtual_engine *ve = + rb_entry(rb, typeof(*ve), nodes[engine->id].rb); + bool preempt = false; + + if (engine == ve->siblings[0]) { /* only preempt one sibling */ + struct i915_request *next; + + rcu_read_lock(); + next = READ_ONCE(ve->request); + if (next) + preempt = rq_prio(next) > last_prio; + rcu_read_unlock(); + } + + if (preempt) + return preempt; + } + /* * If the inflight context did not trigger the preemption, then maybe * it was the set of queued requests? Pick the highest priority in @@ -369,6 +425,8 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) list_for_each_entry_safe_reverse(rq, rn, &engine->timeline.requests, link) { + struct intel_engine_cs *owner; + if (i915_request_completed(rq)) break; @@ -377,16 +435,29 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) GEM_BUG_ON(rq->hw_context->active); - GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); - if (rq_prio(rq) != prio) { - prio = rq_prio(rq); - pl = i915_sched_lookup_priolist(engine, prio); - } - GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); - - list_add(&rq->sched.link, pl); + /* + * Push the request back into the queue for later resubmission. + * If this request is not native to this physical engine (i.e. + * it came from a virtual source), push it back onto the virtual + * engine so that it can be moved across onto another physical + * engine as load dictates. + */ + owner = rq->hw_context->engine; + if (likely(owner == engine)) { + GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); + if (rq_prio(rq) != prio) { + prio = rq_prio(rq); + pl = i915_sched_lookup_priolist(engine, prio); + } + GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); - active = rq; + list_add(&rq->sched.link, pl); + active = rq; + } else { + rq->engine = owner; + owner->submit_request(rq); + active = NULL; + } } return active; @@ -622,6 +693,90 @@ static void complete_preempt_context(struct intel_engine_execlists *execlists) execlists)); } +static void virtual_update_register_offsets(u32 *regs, + struct intel_engine_cs *engine) +{ + u32 base = engine->mmio_base; + + /* Must match execlists_init_reg_state()! */ + + regs[CTX_CONTEXT_CONTROL] = + i915_mmio_reg_offset(RING_CONTEXT_CONTROL(base)); + regs[CTX_RING_HEAD] = i915_mmio_reg_offset(RING_HEAD(base)); + regs[CTX_RING_TAIL] = i915_mmio_reg_offset(RING_TAIL(base)); + regs[CTX_RING_BUFFER_START] = i915_mmio_reg_offset(RING_START(base)); + regs[CTX_RING_BUFFER_CONTROL] = i915_mmio_reg_offset(RING_CTL(base)); + + regs[CTX_BB_HEAD_U] = i915_mmio_reg_offset(RING_BBADDR_UDW(base)); + regs[CTX_BB_HEAD_L] = i915_mmio_reg_offset(RING_BBADDR(base)); + regs[CTX_BB_STATE] = i915_mmio_reg_offset(RING_BBSTATE(base)); + regs[CTX_SECOND_BB_HEAD_U] = + i915_mmio_reg_offset(RING_SBBADDR_UDW(base)); + regs[CTX_SECOND_BB_HEAD_L] = i915_mmio_reg_offset(RING_SBBADDR(base)); + regs[CTX_SECOND_BB_STATE] = i915_mmio_reg_offset(RING_SBBSTATE(base)); + + regs[CTX_CTX_TIMESTAMP] = + i915_mmio_reg_offset(RING_CTX_TIMESTAMP(base)); + regs[CTX_PDP3_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 3)); + regs[CTX_PDP3_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 3)); + regs[CTX_PDP2_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 2)); + regs[CTX_PDP2_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 2)); + regs[CTX_PDP1_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 1)); + regs[CTX_PDP1_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 1)); + regs[CTX_PDP0_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 0)); + regs[CTX_PDP0_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 0)); + + if (engine->class == RENDER_CLASS) { + regs[CTX_RCS_INDIRECT_CTX] = + i915_mmio_reg_offset(RING_INDIRECT_CTX(base)); + regs[CTX_RCS_INDIRECT_CTX_OFFSET] = + i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(base)); + regs[CTX_BB_PER_CTX_PTR] = + i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(base)); + + regs[CTX_R_PWR_CLK_STATE] = + i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE); + } +} + +static bool virtual_matches(const struct virtual_engine *ve, + const struct i915_request *rq, + const struct intel_engine_cs *engine) +{ + const struct intel_engine_cs *active; + + /* + * We track when the HW has completed saving the context image + * (i.e. when we have seen the final CS event switching out of + * the context) and must not overwrite the context image before + * then. This restricts us to only using the active engine + * while the previous virtualized request is inflight (so + * we reuse the register offsets). This is a very small + * hystersis on the greedy seelction algorithm. + */ + active = READ_ONCE(ve->context.active); + if (active && active != engine) + return false; + + return true; +} + +static void virtual_xfer_breadcrumbs(struct virtual_engine *ve, + struct intel_engine_cs *engine) +{ + struct intel_engine_cs *old = ve->siblings[0]; + + /* All unattached (rq->engine == old) must already be completed */ + + spin_lock(&old->breadcrumbs.irq_lock); + if (!list_empty(&ve->context.signal_link)) { + list_move_tail(&ve->context.signal_link, + &engine->breadcrumbs.signalers); + intel_engine_queue_breadcrumbs(engine); + } + spin_unlock(&old->breadcrumbs.irq_lock); +} + static void execlists_dequeue(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; @@ -654,6 +809,26 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * and context switches) submission. */ + for (rb = rb_first_cached(&execlists->virtual); rb; ) { + struct virtual_engine *ve = + rb_entry(rb, typeof(*ve), nodes[engine->id].rb); + struct i915_request *rq = READ_ONCE(ve->request); + + if (!rq) { /* lazily cleanup after another engine handled rq */ + rb_erase_cached(rb, &execlists->virtual); + RB_CLEAR_NODE(rb); + rb = rb_first_cached(&execlists->virtual); + continue; + } + + if (!virtual_matches(ve, rq, engine)) { + rb = rb_next(rb); + continue; + } + + break; + } + if (last) { /* * Don't resubmit or switch until all outstanding @@ -675,7 +850,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK)) return; - if (need_preempt(engine, last)) { + if (need_preempt(engine, last, rb)) { inject_preempt_context(engine); return; } @@ -715,6 +890,92 @@ static void execlists_dequeue(struct intel_engine_cs *engine) last->tail = last->wa_tail; } + while (rb) { /* XXX virtual is always taking precedence */ + struct virtual_engine *ve = + rb_entry(rb, typeof(*ve), nodes[engine->id].rb); + struct i915_request *rq; + + spin_lock(&ve->base.timeline.lock); + + rq = ve->request; + if (unlikely(!rq)) { /* lost the race to a sibling */ + spin_unlock(&ve->base.timeline.lock); + rb_erase_cached(rb, &execlists->virtual); + RB_CLEAR_NODE(rb); + rb = rb_first_cached(&execlists->virtual); + continue; + } + + GEM_BUG_ON(rq != ve->request); + GEM_BUG_ON(rq->engine != &ve->base); + GEM_BUG_ON(rq->hw_context != &ve->context); + + if (rq_prio(rq) >= queue_prio(execlists)) { + if (!virtual_matches(ve, rq, engine)) { + spin_unlock(&ve->base.timeline.lock); + rb = rb_next(rb); + continue; + } + + if (last && !can_merge_rq(last, rq)) { + spin_unlock(&ve->base.timeline.lock); + return; /* leave this rq for another engine */ + } + + GEM_TRACE("%s: virtual rq=%llx:%lld%s, new engine? %s\n", + engine->name, + rq->fence.context, + rq->fence.seqno, + i915_request_completed(rq) ? "!" : + i915_request_started(rq) ? "*" : + "", + yesno(engine != ve->siblings[0])); + + ve->request = NULL; + ve->base.execlists.queue_priority_hint = INT_MIN; + rb_erase_cached(rb, &execlists->virtual); + RB_CLEAR_NODE(rb); + + rq->engine = engine; + + if (engine != ve->siblings[0]) { + u32 *regs = ve->context.lrc_reg_state; + unsigned int n; + + GEM_BUG_ON(READ_ONCE(ve->context.active)); + virtual_update_register_offsets(regs, engine); + + if (!list_empty(&ve->context.signals)) + virtual_xfer_breadcrumbs(ve, engine); + + /* + * Move the bound engine to the top of the list + * for future execution. We then kick this + * tasklet first before checking others, so that + * we preferentially reuse this set of bound + * registers. + */ + for (n = 1; n < ve->num_siblings; n++) { + if (ve->siblings[n] == engine) { + swap(ve->siblings[n], + ve->siblings[0]); + break; + } + } + + GEM_BUG_ON(ve->siblings[0] != engine); + } + + __i915_request_submit(rq); + trace_i915_request_in(rq, port_index(port, execlists)); + submit = true; + last = rq; + } + + spin_unlock(&ve->base.timeline.lock); + break; + } + while ((rb = rb_first_cached(&execlists->queue))) { struct i915_priolist *p = to_priolist(rb); struct i915_request *rq, *rn; @@ -1838,6 +2099,25 @@ static void reset_csb_pointers(struct intel_engine_execlists *execlists) &execlists->csb_status[reset_value]); } +static struct i915_request *active_request(struct i915_request *rq) +{ + const struct list_head * const list = &rq->engine->timeline.requests; + const struct intel_context * const context = rq->hw_context; + struct i915_request *active = NULL; + + list_for_each_entry_from_reverse(rq, list, link) { + if (i915_request_completed(rq)) + break; + + if (rq->hw_context != context) + break; + + active = rq; + } + + return active; +} + static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) { struct intel_engine_execlists * const execlists = &engine->execlists; @@ -1858,7 +2138,8 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) if (!port_isset(execlists->port)) goto out_clear; - ce = port_request(execlists->port)->hw_context; + rq = port_request(execlists->port); + ce = rq->hw_context; /* * Catch up with any missed context-switch interrupts. @@ -1871,16 +2152,10 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) */ execlists_cancel_port_requests(execlists); - /* Push back any incomplete requests for replay after the reset. */ - rq = __unwind_incomplete_requests(engine); + rq = active_request(rq); if (!rq) goto out_replay; - if (rq->hw_context != ce) { /* caught just before a CS event */ - rq = NULL; - goto out_replay; - } - /* * If this request hasn't started yet, e.g. it is waiting on a * semaphore, we need to avoid skipping the request or else we @@ -1927,13 +2202,16 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) } execlists_init_reg_state(regs, ce, engine, ce->ring); - /* Rerun the request; its payload has been neutered (if guilty). */ out_replay: + /* Rerun the request; its payload has been neutered (if guilty). */ ce->ring->head = rq ? intel_ring_wrap(ce->ring, rq->head) : ce->ring->tail; intel_ring_update_space(ce->ring); __execlists_update_reg_state(ce, engine); + /* Push back any incomplete requests for replay after the reset. */ + __unwind_incomplete_requests(engine); + out_clear: execlists_clear_all_active(execlists); } @@ -2007,6 +2285,26 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) i915_priolist_free(p); } + /* Cancel all attached virtual engines */ + while ((rb = rb_first_cached(&execlists->virtual))) { + struct virtual_engine *ve = + rb_entry(rb, typeof(*ve), nodes[engine->id].rb); + + rb_erase_cached(rb, &execlists->virtual); + RB_CLEAR_NODE(rb); + + spin_lock(&ve->base.timeline.lock); + if (ve->request) { + ve->request->engine = engine; + __i915_request_submit(ve->request); + dma_fence_set_error(&ve->request->fence, -EIO); + i915_request_mark_complete(ve->request); + ve->base.execlists.queue_priority_hint = INT_MIN; + ve->request = NULL; + } + spin_unlock(&ve->base.timeline.lock); + } + /* Remaining _unready_ requests will be nop'ed when submitted */ execlists->queue_priority_hint = INT_MIN; @@ -2491,12 +2789,15 @@ static void execlists_init_reg_state(u32 *regs, bool rcs = engine->class == RENDER_CLASS; u32 base = engine->mmio_base; - /* A context is actually a big batch buffer with several + /* + * A context is actually a big batch buffer with several * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The * values we are setting here are only for the first context restore: * on a subsequent save, the GPU will recreate this batchbuffer with new * values (including all the missing MI_LOAD_REGISTER_IMM commands that * we are not initializing here). + * + * Must keep consistent with virtual_update_register_offsets(). */ regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) | MI_LRI_FORCE_POSTED; @@ -2715,6 +3016,320 @@ error_deref_obj: return ret; } +static void virtual_context_destroy(struct kref *kref) +{ + struct virtual_engine *ve = + container_of(kref, typeof(*ve), context.ref); + unsigned int n; + + GEM_BUG_ON(ve->request); + GEM_BUG_ON(ve->context.active); + + for (n = 0; n < ve->num_siblings; n++) { + struct intel_engine_cs *sibling = ve->siblings[n]; + struct rb_node *node = &ve->nodes[sibling->id].rb; + + if (RB_EMPTY_NODE(node)) + continue; + + spin_lock_irq(&sibling->timeline.lock); + + /* Detachment is lazily performed in the execlists tasklet */ + if (!RB_EMPTY_NODE(node)) + rb_erase_cached(node, &sibling->execlists.virtual); + + spin_unlock_irq(&sibling->timeline.lock); + } + GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet)); + + if (ve->context.state) + __execlists_context_fini(&ve->context); + + i915_timeline_fini(&ve->base.timeline); + kfree(ve); +} + +static void virtual_engine_initial_hint(struct virtual_engine *ve) +{ + int swp; + + /* + * Pick a random sibling on starting to help spread the load around. + * + * New contexts are typically created with exactly the same order + * of siblings, and often started in batches. Due to the way we iterate + * the array of sibling when submitting requests, sibling[0] is + * prioritised for dequeuing. If we make sure that sibling[0] is fairly + * randomised across the system, we also help spread the load by the + * first engine we inspect being different each time. + * + * NB This does not force us to execute on this engine, it will just + * typically be the first we inspect for submission. + */ + swp = prandom_u32_max(ve->num_siblings); + if (!swp) + return; + + swap(ve->siblings[swp], ve->siblings[0]); + virtual_update_register_offsets(ve->context.lrc_reg_state, + ve->siblings[0]); +} + +static int virtual_context_pin(struct intel_context *ce) +{ + struct virtual_engine *ve = container_of(ce, typeof(*ve), context); + int err; + + /* Note: we must use a real engine class for setting up reg state */ + err = __execlists_context_pin(ce, ve->siblings[0]); + if (err) + return err; + + virtual_engine_initial_hint(ve); + return 0; +} + +static void virtual_context_enter(struct intel_context *ce) +{ + struct virtual_engine *ve = container_of(ce, typeof(*ve), context); + unsigned int n; + + for (n = 0; n < ve->num_siblings; n++) + intel_engine_pm_get(ve->siblings[n]); +} + +static void virtual_context_exit(struct intel_context *ce) +{ + struct virtual_engine *ve = container_of(ce, typeof(*ve), context); + unsigned int n; + + ce->saturated = 0; + for (n = 0; n < ve->num_siblings; n++) + intel_engine_pm_put(ve->siblings[n]); +} + +static const struct intel_context_ops virtual_context_ops = { + .pin = virtual_context_pin, + .unpin = execlists_context_unpin, + + .enter = virtual_context_enter, + .exit = virtual_context_exit, + + .destroy = virtual_context_destroy, +}; + +static void virtual_submission_tasklet(unsigned long data) +{ + struct virtual_engine * const ve = (struct virtual_engine *)data; + const int prio = ve->base.execlists.queue_priority_hint; + unsigned int n; + + local_irq_disable(); + for (n = 0; READ_ONCE(ve->request) && n < ve->num_siblings; n++) { + struct intel_engine_cs *sibling = ve->siblings[n]; + struct ve_node * const node = &ve->nodes[sibling->id]; + struct rb_node **parent, *rb; + bool first; + + spin_lock(&sibling->timeline.lock); + + if (!RB_EMPTY_NODE(&node->rb)) { + /* + * Cheat and avoid rebalancing the tree if we can + * reuse this node in situ. + */ + first = rb_first_cached(&sibling->execlists.virtual) == + &node->rb; + if (prio == node->prio || (prio > node->prio && first)) + goto submit_engine; + + rb_erase_cached(&node->rb, &sibling->execlists.virtual); + } + + rb = NULL; + first = true; + parent = &sibling->execlists.virtual.rb_root.rb_node; + while (*parent) { + struct ve_node *other; + + rb = *parent; + other = rb_entry(rb, typeof(*other), rb); + if (prio > other->prio) { + parent = &rb->rb_left; + } else { + parent = &rb->rb_right; + first = false; + } + } + + rb_link_node(&node->rb, rb, parent); + rb_insert_color_cached(&node->rb, + &sibling->execlists.virtual, + first); + +submit_engine: + GEM_BUG_ON(RB_EMPTY_NODE(&node->rb)); + node->prio = prio; + if (first && prio > sibling->execlists.queue_priority_hint) { + sibling->execlists.queue_priority_hint = prio; + tasklet_hi_schedule(&sibling->execlists.tasklet); + } + + spin_unlock(&sibling->timeline.lock); + } + local_irq_enable(); +} + +static void virtual_submit_request(struct i915_request *rq) +{ + struct virtual_engine *ve = to_virtual_engine(rq->engine); + + GEM_TRACE("%s: rq=%llx:%lld\n", + ve->base.name, + rq->fence.context, + rq->fence.seqno); + + GEM_BUG_ON(ve->base.submit_request != virtual_submit_request); + + GEM_BUG_ON(ve->request); + ve->base.execlists.queue_priority_hint = rq_prio(rq); + WRITE_ONCE(ve->request, rq); + + tasklet_schedule(&ve->base.execlists.tasklet); +} + +struct intel_context * +intel_execlists_create_virtual(struct i915_gem_context *ctx, + struct intel_engine_cs **siblings, + unsigned int count) +{ + struct virtual_engine *ve; + unsigned int n; + int err; + + if (count == 0) + return ERR_PTR(-EINVAL); + + if (count == 1) + return intel_context_create(ctx, siblings[0]); + + ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL); + if (!ve) + return ERR_PTR(-ENOMEM); + + ve->base.i915 = ctx->i915; + ve->base.id = -1; + ve->base.class = OTHER_CLASS; + ve->base.uabi_class = I915_ENGINE_CLASS_INVALID; + ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; + ve->base.flags = I915_ENGINE_IS_VIRTUAL; + + snprintf(ve->base.name, sizeof(ve->base.name), "virtual"); + + err = i915_timeline_init(ctx->i915, &ve->base.timeline, NULL); + if (err) + goto err_put; + i915_timeline_set_subclass(&ve->base.timeline, TIMELINE_VIRTUAL); + + intel_engine_init_execlists(&ve->base); + + ve->base.cops = &virtual_context_ops; + ve->base.request_alloc = execlists_request_alloc; + + ve->base.schedule = i915_schedule; + ve->base.submit_request = virtual_submit_request; + + ve->base.execlists.queue_priority_hint = INT_MIN; + tasklet_init(&ve->base.execlists.tasklet, + virtual_submission_tasklet, + (unsigned long)ve); + + intel_context_init(&ve->context, ctx, &ve->base); + + for (n = 0; n < count; n++) { + struct intel_engine_cs *sibling = siblings[n]; + + GEM_BUG_ON(!is_power_of_2(sibling->mask)); + if (sibling->mask & ve->base.mask) { + DRM_DEBUG("duplicate %s entry in load balancer\n", + sibling->name); + err = -EINVAL; + goto err_put; + } + + /* + * The virtual engine implementation is tightly coupled to + * the execlists backend -- we push out request directly + * into a tree inside each physical engine. We could support + * layering if we handle cloning of the requests and + * submitting a copy into each backend. + */ + if (sibling->execlists.tasklet.func != + execlists_submission_tasklet) { + err = -ENODEV; + goto err_put; + } + + GEM_BUG_ON(RB_EMPTY_NODE(&ve->nodes[sibling->id].rb)); + RB_CLEAR_NODE(&ve->nodes[sibling->id].rb); + + ve->siblings[ve->num_siblings++] = sibling; + ve->base.mask |= sibling->mask; + + /* + * All physical engines must be compatible for their emission + * functions (as we build the instructions during request + * construction and do not alter them before submission + * on the physical engine). We use the engine class as a guide + * here, although that could be refined. + */ + if (ve->base.class != OTHER_CLASS) { + if (ve->base.class != sibling->class) { + DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n", + sibling->class, ve->base.class); + err = -EINVAL; + goto err_put; + } + continue; + } + + ve->base.class = sibling->class; + ve->base.uabi_class = sibling->uabi_class; + snprintf(ve->base.name, sizeof(ve->base.name), + "v%dx%d", ve->base.class, count); + ve->base.context_size = sibling->context_size; + + ve->base.emit_bb_start = sibling->emit_bb_start; + ve->base.emit_flush = sibling->emit_flush; + ve->base.emit_init_breadcrumb = sibling->emit_init_breadcrumb; + ve->base.emit_fini_breadcrumb = sibling->emit_fini_breadcrumb; + ve->base.emit_fini_breadcrumb_dw = + sibling->emit_fini_breadcrumb_dw; + } + + return &ve->context; + +err_put: + intel_context_put(&ve->context); + return ERR_PTR(err); +} + +struct intel_context * +intel_execlists_clone_virtual(struct i915_gem_context *ctx, + struct intel_engine_cs *src) +{ + struct virtual_engine *se = to_virtual_engine(src); + struct intel_context *dst; + + dst = intel_execlists_create_virtual(ctx, + se->siblings, + se->num_siblings); + if (IS_ERR(dst)) + return dst; + + return dst; +} + void intel_execlists_show_requests(struct intel_engine_cs *engine, struct drm_printer *m, void (*show_request)(struct drm_printer *m, @@ -2772,6 +3387,29 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, show_request(m, last, "\t\tQ "); } + last = NULL; + count = 0; + for (rb = rb_first_cached(&execlists->virtual); rb; rb = rb_next(rb)) { + struct virtual_engine *ve = + rb_entry(rb, typeof(*ve), nodes[engine->id].rb); + struct i915_request *rq = READ_ONCE(ve->request); + + if (rq) { + if (count++ < max - 1) + show_request(m, rq, "\t\tV "); + else + last = rq; + } + } + if (last) { + if (count > max) { + drm_printf(m, + "\t\t...skipping %d virtual requests...\n", + count - max); + } + show_request(m, last, "\t\tV "); + } + spin_unlock_irqrestore(&engine->timeline.lock, flags); } |