diff options
Diffstat (limited to 'drivers/gpu/drm/i915/gt/intel_lrc.c')
-rw-r--r-- | drivers/gpu/drm/i915/gt/intel_lrc.c | 212 |
1 files changed, 129 insertions, 83 deletions
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 468438fb47af..d0088d020220 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -247,8 +247,12 @@ static void __context_pin_release(struct intel_context *ce) static void mark_eio(struct i915_request *rq) { - if (!i915_request_signaled(rq)) - dma_fence_set_error(&rq->fence, -EIO); + if (i915_request_completed(rq)) + return; + + GEM_BUG_ON(i915_request_signaled(rq)); + + dma_fence_set_error(&rq->fence, -EIO); i915_request_mark_complete(rq); } @@ -348,10 +352,15 @@ static inline bool need_preempt(const struct intel_engine_cs *engine, * However, the priority hint is a mere hint that we may need to * preempt. If that hint is stale or we may be trying to preempt * ourselves, ignore the request. + * + * More naturally we would write + * prio >= max(0, last); + * except that we wish to prevent triggering preemption at the same + * priority level: the task that is running should remain running + * to preserve FIFO ordering of dependencies. */ - last_prio = effective_prio(rq); - if (!i915_scheduler_need_preempt(engine->execlists.queue_priority_hint, - last_prio)) + last_prio = max(effective_prio(rq), I915_PRIORITY_NORMAL - 1); + if (engine->execlists.queue_priority_hint <= last_prio) return false; /* @@ -669,64 +678,6 @@ static const u8 gen12_xcs_offsets[] = { REG16(0x274), REG16(0x270), - NOP(13), - LRI(2, POSTED), - REG16(0x200), - REG16(0x204), - - NOP(11), - LRI(50, POSTED), - REG16(0x588), - REG16(0x588), - REG16(0x588), - REG16(0x588), - REG16(0x588), - REG16(0x588), - REG(0x028), - REG(0x09c), - REG(0x0c0), - REG(0x178), - REG(0x17c), - REG16(0x358), - REG(0x170), - REG(0x150), - REG(0x154), - REG(0x158), - REG16(0x41c), - REG16(0x600), - REG16(0x604), - REG16(0x608), - REG16(0x60c), - REG16(0x610), - REG16(0x614), - REG16(0x618), - REG16(0x61c), - REG16(0x620), - REG16(0x624), - REG16(0x628), - REG16(0x62c), - REG16(0x630), - REG16(0x634), - REG16(0x638), - REG16(0x63c), - REG16(0x640), - REG16(0x644), - REG16(0x648), - REG16(0x64c), - REG16(0x650), - REG16(0x654), - REG16(0x658), - REG16(0x65c), - REG16(0x660), - REG16(0x664), - REG16(0x668), - REG16(0x66c), - REG16(0x670), - REG16(0x674), - REG16(0x678), - REG16(0x67c), - REG(0x068), - END(), }; @@ -857,6 +808,15 @@ static const u8 gen12_rcs_offsets[] = { static const u8 *reg_offsets(const struct intel_engine_cs *engine) { + /* + * The gen12+ lists only have the registers we program in the basic + * default state. We rely on the context image using relative + * addressing to automatic fixup the register state between the + * physical engines for virtual engine. + */ + GEM_BUG_ON(INTEL_GEN(engine->i915) >= 12 && + !intel_engine_has_relative_mmio(engine)); + if (engine->class == RENDER_CLASS) { if (INTEL_GEN(engine->i915) >= 12) return gen12_rcs_offsets; @@ -892,7 +852,6 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) list_for_each_entry_safe_reverse(rq, rn, &engine->active.requests, sched.link) { - struct intel_engine_cs *owner; if (i915_request_completed(rq)) continue; /* XXX */ @@ -907,8 +866,7 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) * engine so that it can be moved across onto another physical * engine as load dictates. */ - owner = rq->hw_context->engine; - if (likely(owner == engine)) { + if (likely(rq->execution_mask == engine->mask)) { GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); if (rq_prio(rq) != prio) { prio = rq_prio(rq); @@ -919,6 +877,8 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) list_move(&rq->sched.link, pl); active = rq; } else { + struct intel_engine_cs *owner = rq->hw_context->engine; + /* * Decouple the virtual breadcrumb before moving it * back to the virtual engine -- we don't want the @@ -928,7 +888,8 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) */ if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags)) { - spin_lock(&rq->lock); + spin_lock_nested(&rq->lock, + SINGLE_DEPTH_NESTING); i915_request_cancel_breadcrumb(rq); spin_unlock(&rq->lock); } @@ -1092,6 +1053,10 @@ static u64 execlists_update_context(const struct i915_request *rq) desc = ce->lrc_desc; ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE; + /* Wa_1607138340:tgl */ + if (IS_TGL_REVID(rq->i915, TGL_REVID_A0, TGL_REVID_A0)) + desc |= CTX_DESC_FORCE_RESTORE; + return desc; } @@ -1137,25 +1102,45 @@ assert_pending_valid(const struct intel_engine_execlists *execlists, trace_ports(execlists, msg, execlists->pending); - if (!execlists->pending[0]) + if (!execlists->pending[0]) { + GEM_TRACE_ERR("Nothing pending for promotion!\n"); return false; + } - if (execlists->pending[execlists_num_ports(execlists)]) + if (execlists->pending[execlists_num_ports(execlists)]) { + GEM_TRACE_ERR("Excess pending[%d] for promotion!\n", + execlists_num_ports(execlists)); return false; + } for (port = execlists->pending; (rq = *port); port++) { - if (ce == rq->hw_context) + if (ce == rq->hw_context) { + GEM_TRACE_ERR("Duplicate context in pending[%zd]\n", + port - execlists->pending); return false; + } ce = rq->hw_context; if (i915_request_completed(rq)) continue; - if (i915_active_is_idle(&ce->active)) + if (i915_active_is_idle(&ce->active)) { + GEM_TRACE_ERR("Inactive context in pending[%zd]\n", + port - execlists->pending); return false; + } + + if (!i915_vma_is_pinned(ce->state)) { + GEM_TRACE_ERR("Unpinned context in pending[%zd]\n", + port - execlists->pending); + return false; + } - if (!i915_vma_is_pinned(ce->state)) + if (!i915_vma_is_pinned(ce->ring->vma)) { + GEM_TRACE_ERR("Unpinned ringbuffer in pending[%zd]\n", + port - execlists->pending); return false; + } } return ce; @@ -1232,6 +1217,10 @@ static bool can_merge_rq(const struct i915_request *prev, if (i915_request_completed(next)) return true; + if (unlikely((prev->flags ^ next->flags) & + (I915_REQUEST_NOPREEMPT | I915_REQUEST_SENTINEL))) + return false; + if (!can_merge_ctx(prev->hw_context, next->hw_context)) return false; @@ -1288,7 +1277,7 @@ static void virtual_xfer_breadcrumbs(struct virtual_engine *ve, static struct i915_request * last_active(const struct intel_engine_execlists *execlists) { - struct i915_request * const *last = execlists->active; + struct i915_request * const *last = READ_ONCE(execlists->active); while (*last && i915_request_completed(*last)) last++; @@ -1489,7 +1478,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) last->hw_context->lrc_desc |= CTX_DESC_FORCE_RESTORE; last = NULL; } else if (need_timeslice(engine, last) && - !timer_pending(&engine->execlists.timer)) { + timer_expired(&engine->execlists.timer)) { GEM_TRACE("%s: expired last=%llx:%lld, prio=%d, hint=%d\n", engine->name, last->fence.context, @@ -1525,8 +1514,17 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * submission. */ if (!list_is_last(&last->sched.link, - &engine->active.requests)) + &engine->active.requests)) { + /* + * Even if ELSP[1] is occupied and not worthy + * of timeslices, our queue might be. + */ + if (!execlists->timer.expires && + need_timeslice(engine, last)) + mod_timer(&execlists->timer, + jiffies + 1); return; + } /* * WaIdleLiteRestore:bdw,skl @@ -1680,6 +1678,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine) if (last->hw_context == rq->hw_context) goto done; + if (i915_request_has_sentinel(last)) + goto done; + /* * If GVT overrides us we only ever submit * port[0], leaving port[1] empty. Note that we @@ -1859,6 +1860,13 @@ static void process_csb(struct intel_engine_cs *engine) const u8 num_entries = execlists->csb_size; u8 head, tail; + /* + * As we modify our execlists state tracking we require exclusive + * access. Either we are inside the tasklet, or the tasklet is disabled + * and we assume that is only inside the reset paths and so serialised. + */ + GEM_BUG_ON(!tasklet_is_locked(&execlists->tasklet) && + !reset_in_progress(execlists)); GEM_BUG_ON(USES_GUC_SUBMISSION(engine->i915)); /* @@ -1920,6 +1928,9 @@ static void process_csb(struct intel_engine_cs *engine) else promote = gen8_csb_parse(execlists, buf + 2 * head); if (promote) { + if (!inject_preempt_hang(execlists)) + ring_set_paused(engine, 0); + /* cancel old inflight, prepare for switch */ trace_ports(execlists, "preempted", execlists->active); while (*execlists->active) @@ -1935,9 +1946,8 @@ static void process_csb(struct intel_engine_cs *engine) if (enable_timeslice(execlists)) mod_timer(&execlists->timer, jiffies + 1); - - if (!inject_preempt_hang(execlists)) - ring_set_paused(engine, 0); + else + cancel_timer(&execlists->timer); WRITE_ONCE(execlists->pending[0], NULL); } else { @@ -1980,8 +1990,11 @@ static void process_csb(struct intel_engine_cs *engine) static void __execlists_submission_tasklet(struct intel_engine_cs *const engine) { lockdep_assert_held(&engine->active.lock); - if (!engine->execlists.pending[0]) + if (!engine->execlists.pending[0]) { + rcu_read_lock(); /* protect peeking at execlists->active */ execlists_dequeue(engine); + rcu_read_unlock(); + } } /* @@ -2773,8 +2786,10 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) if (!rq) goto unwind; + /* We still have requests in-flight; the engine should be active */ + GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); + ce = rq->hw_context; - GEM_BUG_ON(i915_active_is_idle(&ce->active)); GEM_BUG_ON(!i915_vma_is_pinned(ce->state)); /* Proclaim we have exclusive access to the context image! */ @@ -2782,10 +2797,13 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) rq = active_request(rq); if (!rq) { + /* Idle context; tidy up the ring so we can restart afresh */ ce->ring->head = ce->ring->tail; goto out_replay; } + /* Context has requests still in-flight; it should not be idle! */ + GEM_BUG_ON(i915_active_is_idle(&ce->active)); ce->ring->head = intel_ring_wrap(ce->ring, rq->head); /* @@ -3205,8 +3223,11 @@ static int gen12_emit_flush_render(struct i915_request *request, flags |= PIPE_CONTROL_TILE_CACHE_FLUSH; flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; + /* Wa_1409600907:tgl */ + flags |= PIPE_CONTROL_DEPTH_STALL; flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; flags |= PIPE_CONTROL_FLUSH_ENABLE; + flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH; flags |= PIPE_CONTROL_STORE_DATA_INDEX; flags |= PIPE_CONTROL_QW_WRITE; @@ -3232,6 +3253,7 @@ static int gen12_emit_flush_render(struct i915_request *request, flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_L3_RO_CACHE_INVALIDATE; flags |= PIPE_CONTROL_STORE_DATA_INDEX; flags |= PIPE_CONTROL_QW_WRITE; @@ -3253,6 +3275,26 @@ static int gen12_emit_flush_render(struct i915_request *request, *cs++ = preparser_disable(false); intel_ring_advance(request, cs); + + /* + * Wa_1604544889:tgl + */ + if (IS_TGL_REVID(request->i915, TGL_REVID_A0, TGL_REVID_A0)) { + flags = 0; + flags |= PIPE_CONTROL_CS_STALL; + flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH; + + flags |= PIPE_CONTROL_STORE_DATA_INDEX; + flags |= PIPE_CONTROL_QW_WRITE; + + cs = intel_ring_begin(request, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + cs = gen8_emit_pipe_control(cs, flags, + LRC_PPHWSP_SCRATCH_ADDR); + intel_ring_advance(request, cs); + } } return 0; @@ -3415,15 +3457,18 @@ gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) PIPE_CONTROL_TILE_CACHE_FLUSH | PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | PIPE_CONTROL_DEPTH_CACHE_FLUSH | + /* Wa_1409600907:tgl */ + PIPE_CONTROL_DEPTH_STALL | PIPE_CONTROL_DC_FLUSH_ENABLE | - PIPE_CONTROL_FLUSH_ENABLE); + PIPE_CONTROL_FLUSH_ENABLE | + PIPE_CONTROL_HDC_PIPELINE_FLUSH); return gen12_emit_fini_breadcrumb_footer(request, cs); } static void execlists_park(struct intel_engine_cs *engine) { - del_timer(&engine->execlists.timer); + cancel_timer(&engine->execlists.timer); } void intel_execlists_set_default_submission(struct intel_engine_cs *engine) @@ -3447,7 +3492,7 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine) engine->flags |= I915_ENGINE_HAS_PREEMPTION; } - if (engine->class != COPY_ENGINE_CLASS && INTEL_GEN(engine->i915) >= 12) + if (INTEL_GEN(engine->i915) >= 12) engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO; } @@ -4169,6 +4214,7 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx, ve->base.i915 = ctx->i915; ve->base.gt = siblings[0]->gt; + ve->base.uncore = siblings[0]->uncore; ve->base.id = -1; ve->base.class = OTHER_CLASS; ve->base.uabi_class = I915_ENGINE_CLASS_INVALID; |