diff options
Diffstat (limited to 'drivers/gpu/drm/i915/gt')
38 files changed, 1120 insertions, 398 deletions
diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c index c30adc05fa98..680bd9442eb0 100644 --- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c @@ -131,17 +131,17 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, vaddr = kmap_atomic_px(i915_pt_entry(pd, act_pt)); do { - GEM_BUG_ON(iter.sg->length < I915_GTT_PAGE_SIZE); + GEM_BUG_ON(sg_dma_len(iter.sg) < I915_GTT_PAGE_SIZE); vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma); iter.dma += I915_GTT_PAGE_SIZE; if (iter.dma == iter.max) { iter.sg = __sg_next(iter.sg); - if (!iter.sg) + if (!iter.sg || sg_dma_len(iter.sg) == 0) break; iter.dma = sg_dma_address(iter.sg); - iter.max = iter.dma + iter.sg->length; + iter.max = iter.dma + sg_dma_len(iter.sg); } if (++act_pte == GEN6_PTES) { diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 38c7069b7749..a37c968ef8f7 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -372,19 +372,19 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt, pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2)); vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1))); do { - GEM_BUG_ON(iter->sg->length < I915_GTT_PAGE_SIZE); + GEM_BUG_ON(sg_dma_len(iter->sg) < I915_GTT_PAGE_SIZE); vaddr[gen8_pd_index(idx, 0)] = pte_encode | iter->dma; iter->dma += I915_GTT_PAGE_SIZE; if (iter->dma >= iter->max) { iter->sg = __sg_next(iter->sg); - if (!iter->sg) { + if (!iter->sg || sg_dma_len(iter->sg) == 0) { idx = 0; break; } iter->dma = sg_dma_address(iter->sg); - iter->max = iter->dma + iter->sg->length; + iter->max = iter->dma + sg_dma_len(iter->sg); } if (gen8_pd_index(++idx, 0) == 0) { @@ -413,8 +413,8 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma, u32 flags) { const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags); + unsigned int rem = sg_dma_len(iter->sg); u64 start = vma->node.start; - dma_addr_t rem = iter->sg->length; GEM_BUG_ON(!i915_vm_is_4lvl(vma->vm)); @@ -456,7 +456,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma, } do { - GEM_BUG_ON(iter->sg->length < page_size); + GEM_BUG_ON(sg_dma_len(iter->sg) < page_size); vaddr[index++] = encode | iter->dma; start += page_size; @@ -467,7 +467,10 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma, if (!iter->sg) break; - rem = iter->sg->length; + rem = sg_dma_len(iter->sg); + if (!rem) + break; + iter->dma = sg_dma_address(iter->sg); iter->max = iter->dma + rem; @@ -525,7 +528,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma, } vma->page_sizes.gtt |= page_size; - } while (iter->sg); + } while (iter->sg && sg_dma_len(iter->sg)); } static void gen8_ppgtt_insert(struct i915_address_space *vm, diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c index d8b206e53660..a24cc1ff08a0 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c @@ -30,18 +30,21 @@ #include "i915_trace.h" #include "intel_breadcrumbs.h" #include "intel_context.h" +#include "intel_engine_pm.h" #include "intel_gt_pm.h" #include "intel_gt_requests.h" -static void irq_enable(struct intel_engine_cs *engine) +static bool irq_enable(struct intel_engine_cs *engine) { if (!engine->irq_enable) - return; + return false; /* Caller disables interrupts */ spin_lock(&engine->gt->irq_lock); engine->irq_enable(engine); spin_unlock(&engine->gt->irq_lock); + + return true; } static void irq_disable(struct intel_engine_cs *engine) @@ -57,12 +60,11 @@ static void irq_disable(struct intel_engine_cs *engine) static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) { - lockdep_assert_held(&b->irq_lock); - - if (!b->irq_engine || b->irq_armed) - return; - - if (!intel_gt_pm_get_if_awake(b->irq_engine->gt)) + /* + * Since we are waiting on a request, the GPU should be busy + * and should have its own rpm reference. + */ + if (GEM_WARN_ON(!intel_gt_pm_get_if_awake(b->irq_engine->gt))) return; /* @@ -73,25 +75,24 @@ static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) */ WRITE_ONCE(b->irq_armed, true); - /* - * Since we are waiting on a request, the GPU should be busy - * and should have its own rpm reference. This is tracked - * by i915->gt.awake, we can forgo holding our own wakref - * for the interrupt as before i915->gt.awake is released (when - * the driver is idle) we disarm the breadcrumbs. - */ - - if (!b->irq_enabled++) - irq_enable(b->irq_engine); + /* Requests may have completed before we could enable the interrupt. */ + if (!b->irq_enabled++ && irq_enable(b->irq_engine)) + irq_work_queue(&b->irq_work); } -static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b) +static void intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) { - lockdep_assert_held(&b->irq_lock); - - if (!b->irq_engine || !b->irq_armed) + if (!b->irq_engine) return; + spin_lock(&b->irq_lock); + if (!b->irq_armed) + __intel_breadcrumbs_arm_irq(b); + spin_unlock(&b->irq_lock); +} + +static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b) +{ GEM_BUG_ON(!b->irq_enabled); if (!--b->irq_enabled) irq_disable(b->irq_engine); @@ -100,20 +101,37 @@ static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b) intel_gt_pm_put_async(b->irq_engine->gt); } +static void intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b) +{ + spin_lock(&b->irq_lock); + if (b->irq_armed) + __intel_breadcrumbs_disarm_irq(b); + spin_unlock(&b->irq_lock); +} + static void add_signaling_context(struct intel_breadcrumbs *b, struct intel_context *ce) { - intel_context_get(ce); - list_add_tail(&ce->signal_link, &b->signalers); - if (list_is_first(&ce->signal_link, &b->signalers)) - __intel_breadcrumbs_arm_irq(b); + lockdep_assert_held(&ce->signal_lock); + + spin_lock(&b->signalers_lock); + list_add_rcu(&ce->signal_link, &b->signalers); + spin_unlock(&b->signalers_lock); } -static void remove_signaling_context(struct intel_breadcrumbs *b, +static bool remove_signaling_context(struct intel_breadcrumbs *b, struct intel_context *ce) { - list_del(&ce->signal_link); - intel_context_put(ce); + lockdep_assert_held(&ce->signal_lock); + + if (!list_empty(&ce->signals)) + return false; + + spin_lock(&b->signalers_lock); + list_del_rcu(&ce->signal_link); + spin_unlock(&b->signalers_lock); + + return true; } static inline bool __request_completed(const struct i915_request *rq) @@ -174,73 +192,103 @@ static void add_retire(struct intel_breadcrumbs *b, struct intel_timeline *tl) intel_engine_add_retire(b->irq_engine, tl); } -static bool __signal_request(struct i915_request *rq, struct list_head *signals) +static bool __signal_request(struct i915_request *rq) { - clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); + GEM_BUG_ON(test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)); if (!__dma_fence_signal(&rq->fence)) { i915_request_put(rq); return false; } - list_add_tail(&rq->signal_link, signals); return true; } +static struct llist_node * +slist_add(struct llist_node *node, struct llist_node *head) +{ + node->next = head; + return node; +} + static void signal_irq_work(struct irq_work *work) { struct intel_breadcrumbs *b = container_of(work, typeof(*b), irq_work); const ktime_t timestamp = ktime_get(); - struct intel_context *ce, *cn; - struct list_head *pos, *next; - LIST_HEAD(signal); - - spin_lock(&b->irq_lock); + struct llist_node *signal, *sn; + struct intel_context *ce; - if (list_empty(&b->signalers)) - __intel_breadcrumbs_disarm_irq(b); + signal = NULL; + if (unlikely(!llist_empty(&b->signaled_requests))) + signal = llist_del_all(&b->signaled_requests); - list_splice_init(&b->signaled_requests, &signal); + /* + * Keep the irq armed until the interrupt after all listeners are gone. + * + * Enabling/disabling the interrupt is rather costly, roughly a couple + * of hundred microseconds. If we are proactive and enable/disable + * the interrupt around every request that wants a breadcrumb, we + * quickly drown in the extra orders of magnitude of latency imposed + * on request submission. + * + * So we try to be lazy, and keep the interrupts enabled until no + * more listeners appear within a breadcrumb interrupt interval (that + * is until a request completes that no one cares about). The + * observation is that listeners come in batches, and will often + * listen to a bunch of requests in succession. Though note on icl+, + * interrupts are always enabled due to concerns with rc6 being + * dysfunctional with per-engine interrupt masking. + * + * We also try to avoid raising too many interrupts, as they may + * be generated by userspace batches and it is unfortunately rather + * too easy to drown the CPU under a flood of GPU interrupts. Thus + * whenever no one appears to be listening, we turn off the interrupts. + * Fewer interrupts should conserve power -- at the very least, fewer + * interrupt draw less ire from other users of the system and tools + * like powertop. + */ + if (!signal && READ_ONCE(b->irq_armed) && list_empty(&b->signalers)) + intel_breadcrumbs_disarm_irq(b); - list_for_each_entry_safe(ce, cn, &b->signalers, signal_link) { - GEM_BUG_ON(list_empty(&ce->signals)); + rcu_read_lock(); + list_for_each_entry_rcu(ce, &b->signalers, signal_link) { + struct i915_request *rq; - list_for_each_safe(pos, next, &ce->signals) { - struct i915_request *rq = - list_entry(pos, typeof(*rq), signal_link); + list_for_each_entry_rcu(rq, &ce->signals, signal_link) { + bool release; - GEM_BUG_ON(!check_signal_order(ce, rq)); if (!__request_completed(rq)) break; + if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL, + &rq->fence.flags)) + break; + /* * Queue for execution after dropping the signaling * spinlock as the callback chain may end up adding * more signalers to the same context or engine. */ - __signal_request(rq, &signal); - } + spin_lock(&ce->signal_lock); + list_del_rcu(&rq->signal_link); + release = remove_signaling_context(b, ce); + spin_unlock(&ce->signal_lock); - /* - * We process the list deletion in bulk, only using a list_add - * (not list_move) above but keeping the status of - * rq->signal_link known with the I915_FENCE_FLAG_SIGNAL bit. - */ - if (!list_is_first(pos, &ce->signals)) { - /* Advance the list to the first incomplete request */ - __list_del_many(&ce->signals, pos); - if (&ce->signals == pos) { /* now empty */ + if (__signal_request(rq)) + /* We own signal_node now, xfer to local list */ + signal = slist_add(&rq->signal_node, signal); + + if (release) { add_retire(b, ce->timeline); - remove_signaling_context(b, ce); + intel_context_put(ce); } } } + rcu_read_unlock(); - spin_unlock(&b->irq_lock); - - list_for_each_safe(pos, next, &signal) { + llist_for_each_safe(signal, sn, signal) { struct i915_request *rq = - list_entry(pos, typeof(*rq), signal_link); + llist_entry(signal, typeof(*rq), signal_node); struct list_head cb_list; spin_lock(&rq->lock); @@ -251,6 +299,9 @@ static void signal_irq_work(struct irq_work *work) i915_request_put(rq); } + + if (!READ_ONCE(b->irq_armed) && !list_empty(&b->signalers)) + intel_breadcrumbs_arm_irq(b); } struct intel_breadcrumbs * @@ -262,14 +313,15 @@ intel_breadcrumbs_create(struct intel_engine_cs *irq_engine) if (!b) return NULL; - spin_lock_init(&b->irq_lock); + b->irq_engine = irq_engine; + + spin_lock_init(&b->signalers_lock); INIT_LIST_HEAD(&b->signalers); - INIT_LIST_HEAD(&b->signaled_requests); + init_llist_head(&b->signaled_requests); + spin_lock_init(&b->irq_lock); init_irq_work(&b->irq_work, signal_irq_work); - b->irq_engine = irq_engine; - return b; } @@ -292,27 +344,28 @@ void intel_breadcrumbs_reset(struct intel_breadcrumbs *b) void intel_breadcrumbs_park(struct intel_breadcrumbs *b) { - unsigned long flags; - - if (!READ_ONCE(b->irq_armed)) - return; - - spin_lock_irqsave(&b->irq_lock, flags); - __intel_breadcrumbs_disarm_irq(b); - spin_unlock_irqrestore(&b->irq_lock, flags); - - if (!list_empty(&b->signalers)) - irq_work_queue(&b->irq_work); + /* Kick the work once more to drain the signalers */ + irq_work_sync(&b->irq_work); + while (unlikely(READ_ONCE(b->irq_armed))) { + local_irq_disable(); + signal_irq_work(&b->irq_work); + local_irq_enable(); + cond_resched(); + } + GEM_BUG_ON(!list_empty(&b->signalers)); } void intel_breadcrumbs_free(struct intel_breadcrumbs *b) { + irq_work_sync(&b->irq_work); + GEM_BUG_ON(!list_empty(&b->signalers)); + GEM_BUG_ON(b->irq_armed); kfree(b); } -static void insert_breadcrumb(struct i915_request *rq, - struct intel_breadcrumbs *b) +static void insert_breadcrumb(struct i915_request *rq) { + struct intel_breadcrumbs *b = READ_ONCE(rq->engine)->breadcrumbs; struct intel_context *ce = rq->context; struct list_head *pos; @@ -327,12 +380,14 @@ static void insert_breadcrumb(struct i915_request *rq, * its signal completion. */ if (__request_completed(rq)) { - if (__signal_request(rq, &b->signaled_requests)) + if (__signal_request(rq) && + llist_add(&rq->signal_node, &b->signaled_requests)) irq_work_queue(&b->irq_work); return; } if (list_empty(&ce->signals)) { + intel_context_get(ce); add_signaling_context(b, ce); pos = &ce->signals; } else { @@ -358,18 +413,22 @@ static void insert_breadcrumb(struct i915_request *rq, break; } } - list_add(&rq->signal_link, pos); + list_add_rcu(&rq->signal_link, pos); GEM_BUG_ON(!check_signal_order(ce, rq)); + GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)); set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); - /* Check after attaching to irq, interrupt may have already fired. */ - if (__request_completed(rq)) - irq_work_queue(&b->irq_work); + /* + * Defer enabling the interrupt to after HW submission and recheck + * the request as it may have completed and raised the interrupt as + * we were attaching it into the lists. + */ + irq_work_queue(&b->irq_work); } bool i915_request_enable_breadcrumb(struct i915_request *rq) { - struct intel_breadcrumbs *b; + struct intel_context *ce = rq->context; /* Serialises with i915_request_retire() using rq->lock */ if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)) @@ -384,67 +443,30 @@ bool i915_request_enable_breadcrumb(struct i915_request *rq) if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) return true; - /* - * rq->engine is locked by rq->engine->active.lock. That however - * is not known until after rq->engine has been dereferenced and - * the lock acquired. Hence we acquire the lock and then validate - * that rq->engine still matches the lock we hold for it. - * - * Here, we are using the breadcrumb lock as a proxy for the - * rq->engine->active.lock, and we know that since the breadcrumb - * will be serialised within i915_request_submit/i915_request_unsubmit, - * the engine cannot change while active as long as we hold the - * breadcrumb lock on that engine. - * - * From the dma_fence_enable_signaling() path, we are outside of the - * request submit/unsubmit path, and so we must be more careful to - * acquire the right lock. - */ - b = READ_ONCE(rq->engine)->breadcrumbs; - spin_lock(&b->irq_lock); - while (unlikely(b != READ_ONCE(rq->engine)->breadcrumbs)) { - spin_unlock(&b->irq_lock); - b = READ_ONCE(rq->engine)->breadcrumbs; - spin_lock(&b->irq_lock); - } - - /* - * Now that we are finally serialised with request submit/unsubmit, - * [with b->irq_lock] and with i915_request_retire() [via checking - * SIGNALED with rq->lock] confirm the request is indeed active. If - * it is no longer active, the breadcrumb will be attached upon - * i915_request_submit(). - */ + spin_lock(&ce->signal_lock); if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) - insert_breadcrumb(rq, b); - - spin_unlock(&b->irq_lock); + insert_breadcrumb(rq); + spin_unlock(&ce->signal_lock); return true; } void i915_request_cancel_breadcrumb(struct i915_request *rq) { - struct intel_breadcrumbs *b = rq->engine->breadcrumbs; + struct intel_context *ce = rq->context; + bool release; - /* - * We must wait for b->irq_lock so that we know the interrupt handler - * has released its reference to the intel_context and has completed - * the DMA_FENCE_FLAG_SIGNALED_BIT/I915_FENCE_FLAG_SIGNAL dance (if - * required). - */ - spin_lock(&b->irq_lock); - if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) { - struct intel_context *ce = rq->context; + if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) + return; - list_del(&rq->signal_link); - if (list_empty(&ce->signals)) - remove_signaling_context(b, ce); + spin_lock(&ce->signal_lock); + list_del_rcu(&rq->signal_link); + release = remove_signaling_context(rq->engine->breadcrumbs, ce); + spin_unlock(&ce->signal_lock); + if (release) + intel_context_put(ce); - clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); - i915_request_put(rq); - } - spin_unlock(&b->irq_lock); + i915_request_put(rq); } static void print_signals(struct intel_breadcrumbs *b, struct drm_printer *p) @@ -454,18 +476,17 @@ static void print_signals(struct intel_breadcrumbs *b, struct drm_printer *p) drm_printf(p, "Signals:\n"); - spin_lock_irq(&b->irq_lock); - list_for_each_entry(ce, &b->signalers, signal_link) { - list_for_each_entry(rq, &ce->signals, signal_link) { + rcu_read_lock(); + list_for_each_entry_rcu(ce, &b->signalers, signal_link) { + list_for_each_entry_rcu(rq, &ce->signals, signal_link) drm_printf(p, "\t[%llx:%llx%s] @ %dms\n", rq->fence.context, rq->fence.seqno, i915_request_completed(rq) ? "!" : i915_request_started(rq) ? "*" : "", jiffies_to_msecs(jiffies - rq->emitted_jiffies)); - } } - spin_unlock_irq(&b->irq_lock); + rcu_read_unlock(); } void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine, diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h index 8e53b9942695..a74bb3062bd8 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h @@ -29,18 +29,16 @@ * the overhead of waking that client is much preferred. */ struct intel_breadcrumbs { - spinlock_t irq_lock; /* protects the lists used in hardirq context */ - /* Not all breadcrumbs are attached to physical HW */ struct intel_engine_cs *irq_engine; + spinlock_t signalers_lock; /* protects the list of signalers */ struct list_head signalers; - struct list_head signaled_requests; + struct llist_head signaled_requests; + spinlock_t irq_lock; /* protects the interrupt from hardirq context */ struct irq_work irq_work; /* for use from inside irq_lock */ - unsigned int irq_enabled; - bool irq_armed; }; diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 92a3f25c4006..349e7fa1488d 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -25,11 +25,18 @@ static struct intel_context *intel_context_alloc(void) return kmem_cache_zalloc(global.slab_ce, GFP_KERNEL); } -void intel_context_free(struct intel_context *ce) +static void rcu_context_free(struct rcu_head *rcu) { + struct intel_context *ce = container_of(rcu, typeof(*ce), rcu); + kmem_cache_free(global.slab_ce, ce); } +void intel_context_free(struct intel_context *ce) +{ + call_rcu(&ce->rcu, rcu_context_free); +} + struct intel_context * intel_context_create(struct intel_engine_cs *engine) { @@ -356,8 +363,7 @@ static int __intel_context_active(struct i915_active *active) } void -intel_context_init(struct intel_context *ce, - struct intel_engine_cs *engine) +intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine) { GEM_BUG_ON(!engine->cops); GEM_BUG_ON(!engine->gt->vm); @@ -373,7 +379,8 @@ intel_context_init(struct intel_context *ce, ce->vm = i915_vm_get(engine->gt->vm); - INIT_LIST_HEAD(&ce->signal_link); + /* NB ce->signal_link/lock is used under RCU */ + spin_lock_init(&ce->signal_lock); INIT_LIST_HEAD(&ce->signals); mutex_init(&ce->pin_mutex); diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 552cb57a2e8c..52fa9c132746 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -25,6 +25,7 @@ DECLARE_EWMA(runtime, 3, 8); struct i915_gem_context; struct i915_gem_ww_ctx; struct i915_vma; +struct intel_breadcrumbs; struct intel_context; struct intel_ring; @@ -44,7 +45,16 @@ struct intel_context_ops { }; struct intel_context { - struct kref ref; + /* + * Note: Some fields may be accessed under RCU. + * + * Unless otherwise noted a field can safely be assumed to be protected + * by strong reference counting. + */ + union { + struct kref ref; /* no kref_get_unless_zero()! */ + struct rcu_head rcu; + }; struct intel_engine_cs *engine; struct intel_engine_cs *inflight; @@ -54,8 +64,15 @@ struct intel_context { struct i915_address_space *vm; struct i915_gem_context __rcu *gem_context; - struct list_head signal_link; - struct list_head signals; + /* + * @signal_lock protects the list of requests that need signaling, + * @signals. While there are any requests that need signaling, + * we add the context to the breadcrumbs worker, and remove it + * upon completion/cancellation of the last request. + */ + struct list_head signal_link; /* Accessed under RCU */ + struct list_head signals; /* Guarded by signal_lock */ + spinlock_t signal_lock; /* protects signals, the list of requests */ struct i915_vma *state; struct intel_ring *ring; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 5bfb5f7ed02c..0b31670343f5 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -305,8 +305,9 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) engine->i915 = i915; engine->gt = gt; engine->uncore = gt->uncore; - engine->hw_id = engine->guc_id = info->hw_id; engine->mmio_base = __engine_mmio_base(i915, info->mmio_bases); + engine->hw_id = info->hw_id; + engine->guc_id = MAKE_GUC_ID(info->class, info->instance); engine->class = info->class; engine->instance = info->instance; @@ -371,7 +372,8 @@ static void __setup_engine_capabilities(struct intel_engine_cs *engine) * instances. */ if ((INTEL_GEN(i915) >= 11 && - engine->gt->info.vdbox_sfc_access & engine->mask) || + (engine->gt->info.vdbox_sfc_access & + BIT(engine->instance))) || (INTEL_GEN(i915) >= 9 && engine->instance == 0)) engine->uabi_capabilities |= I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC; @@ -1599,6 +1601,41 @@ static unsigned long list_count(struct list_head *list) return count; } +static unsigned long read_ul(void *p, size_t x) +{ + return *(unsigned long *)(p + x); +} + +static void print_properties(struct intel_engine_cs *engine, + struct drm_printer *m) +{ + static const struct pmap { + size_t offset; + const char *name; + } props[] = { +#define P(x) { \ + .offset = offsetof(typeof(engine->props), x), \ + .name = #x \ +} + P(heartbeat_interval_ms), + P(max_busywait_duration_ns), + P(preempt_timeout_ms), + P(stop_timeout_ms), + P(timeslice_duration_ms), + + {}, +#undef P + }; + const struct pmap *p; + + drm_printf(m, "\tProperties:\n"); + for (p = props; p->name; p++) + drm_printf(m, "\t\t%s: %lu [default %lu]\n", + p->name, + read_ul(&engine->props, p->offset), + read_ul(&engine->defaults, p->offset)); +} + void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m, const char *header, ...) @@ -1641,6 +1678,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, drm_printf(m, "\tReset count: %d (global %d)\n", i915_reset_engine_count(error, engine), i915_reset_count(error)); + print_properties(engine, m); drm_printf(m, "\tRequests:\n"); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c index 5067d0524d4b..9060385cd69e 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c @@ -41,6 +41,8 @@ static void idle_pulse(struct intel_engine_cs *engine, struct i915_request *rq) { engine->wakeref_serial = READ_ONCE(engine->serial) + 1; i915_request_add_active_barriers(rq); + if (!engine->heartbeat.systole && intel_engine_has_heartbeat(engine)) + engine->heartbeat.systole = i915_request_get(rq); } static void show_heartbeat(const struct i915_request *rq, @@ -144,8 +146,6 @@ static void heartbeat(struct work_struct *wrk) goto unlock; idle_pulse(engine, rq); - if (engine->i915->params.enable_hangcheck) - engine->heartbeat.systole = i915_request_get(rq); __i915_request_commit(rq); __i915_request_queue(rq, &attr); @@ -153,7 +153,7 @@ static void heartbeat(struct work_struct *wrk) unlock: mutex_unlock(&ce->timeline->mutex); out: - if (!next_heartbeat(engine)) + if (!engine->i915->params.enable_hangcheck || !next_heartbeat(engine)) i915_request_put(fetch_and_zero(&engine->heartbeat.systole)); intel_engine_pm_put(engine); } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index f7b2e07e2229..499b09cb4acf 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -17,6 +17,25 @@ #include "intel_ring.h" #include "shmem_utils.h" +static void dbg_poison_ce(struct intel_context *ce) +{ + if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + return; + + if (ce->state) { + struct drm_i915_gem_object *obj = ce->state->obj; + int type = i915_coherent_map_type(ce->engine->i915); + void *map; + + map = i915_gem_object_pin_map(obj, type); + if (!IS_ERR(map)) { + memset(map, CONTEXT_REDZONE, obj->base.size); + i915_gem_object_flush_map(obj); + i915_gem_object_unpin_map(obj); + } + } +} + static int __engine_unpark(struct intel_wakeref *wf) { struct intel_engine_cs *engine = @@ -32,20 +51,14 @@ static int __engine_unpark(struct intel_wakeref *wf) if (ce) { GEM_BUG_ON(test_bit(CONTEXT_VALID_BIT, &ce->flags)); + /* Flush all pending HW writes before we touch the context */ + while (unlikely(intel_context_inflight(ce))) + intel_engine_flush_submission(engine); + /* First poison the image to verify we never fully trust it */ - if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && ce->state) { - struct drm_i915_gem_object *obj = ce->state->obj; - int type = i915_coherent_map_type(engine->i915); - void *map; - - map = i915_gem_object_pin_map(obj, type); - if (!IS_ERR(map)) { - memset(map, CONTEXT_REDZONE, obj->base.size); - i915_gem_object_flush_map(obj); - i915_gem_object_unpin_map(obj); - } - } + dbg_poison_ce(ce); + /* Scrub the context image after our loss of control */ ce->ops->reset(ce); } diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 188a5f70177d..cf94525be2c1 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -1383,7 +1383,7 @@ intel_partial_pages(const struct i915_ggtt_view *view, if (ret) goto err_sg_alloc; - iter = i915_gem_object_get_sg(obj, view->partial.offset, &offset); + iter = i915_gem_object_get_sg_dma(obj, view->partial.offset, &offset); GEM_BUG_ON(!iter); sg = st->sgl; @@ -1391,7 +1391,7 @@ intel_partial_pages(const struct i915_ggtt_view *view, do { unsigned int len; - len = min(iter->length - (offset << PAGE_SHIFT), + len = min(sg_dma_len(iter) - (offset << PAGE_SHIFT), count << PAGE_SHIFT); sg_set_page(sg, NULL, len, 0); sg_dma_address(sg) = diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 39b428c5049c..44f1d51e5ae5 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -614,6 +614,8 @@ void intel_gt_driver_remove(struct intel_gt *gt) void intel_gt_driver_unregister(struct intel_gt *gt) { + intel_wakeref_t wakeref; + intel_rps_driver_unregister(>->rps); /* @@ -622,16 +624,15 @@ void intel_gt_driver_unregister(struct intel_gt *gt) * resources. */ intel_gt_set_wedged(gt); + + /* Scrub all HW state upon release */ + with_intel_runtime_pm(gt->uncore->rpm, wakeref) + __intel_gt_reset(gt, ALL_ENGINES); } void intel_gt_driver_release(struct intel_gt *gt) { struct i915_address_space *vm; - intel_wakeref_t wakeref; - - /* Scrub all HW state upon release */ - with_intel_runtime_pm(gt->uncore->rpm, wakeref) - __intel_gt_reset(gt, ALL_ENGINES); vm = fetch_and_zero(>->vm); if (vm) /* FIXME being called twice on error paths :( */ diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index 3f1114b58b01..7bfe9072be9a 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -324,7 +324,7 @@ static void cnl_setup_private_ppat(struct intel_uncore *uncore) GEN8_PPAT_WC | GEN8_PPAT_LLCELLC); intel_uncore_write(uncore, GEN10_PAT_INDEX(2), - GEN8_PPAT_WT | GEN8_PPAT_LLCELLC); + GEN8_PPAT_WB | GEN8_PPAT_ELLC_OVERRIDE); intel_uncore_write(uncore, GEN10_PAT_INDEX(3), GEN8_PPAT_UC); @@ -349,17 +349,23 @@ static void cnl_setup_private_ppat(struct intel_uncore *uncore) */ static void bdw_setup_private_ppat(struct intel_uncore *uncore) { + struct drm_i915_private *i915 = uncore->i915; u64 pat; pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */ GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */ - GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */ GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */ GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) | GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) | GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) | GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); + /* for scanout with eLLC */ + if (INTEL_GEN(i915) >= 9) + pat |= GEN8_PPAT(2, GEN8_PPAT_WB | GEN8_PPAT_ELLC_OVERRIDE); + else + pat |= GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC); + intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat)); intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat)); } diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index c13c650ced22..8a33940a71f3 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -580,7 +580,7 @@ static inline struct sgt_dma { struct scatterlist *sg = vma->pages->sgl; dma_addr_t addr = sg_dma_address(sg); - return (struct sgt_dma){ sg, addr, addr + sg->length }; + return (struct sgt_dma){ sg, addr, addr + sg_dma_len(sg) }; } #endif diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index f82c6dd1de18..7614a3d24fca 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -182,6 +182,7 @@ struct virtual_engine { struct intel_engine_cs base; struct intel_context context; + struct rcu_work rcu; /* * We allow only a single request through the virtual engine at a time @@ -1215,7 +1216,8 @@ static void intel_engine_context_out(struct intel_engine_cs *engine) static void execlists_check_context(const struct intel_context *ce, - const struct intel_engine_cs *engine) + const struct intel_engine_cs *engine, + const char *when) { const struct intel_ring *ring = ce->ring; u32 *regs = ce->lrc_reg_state; @@ -1250,7 +1252,7 @@ execlists_check_context(const struct intel_context *ce, valid = false; } - WARN_ONCE(!valid, "Invalid lrc state found before submission\n"); + WARN_ONCE(!valid, "Invalid lrc state found %s submission\n", when); } static void restore_default_state(struct intel_context *ce, @@ -1346,7 +1348,7 @@ __execlists_schedule_in(struct i915_request *rq) reset_active(rq, engine); if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) - execlists_check_context(ce, engine); + execlists_check_context(ce, engine, "before"); if (ce->tag) { /* Use a fixed tag for OA and friends */ @@ -1417,6 +1419,9 @@ __execlists_schedule_out(struct i915_request *rq, * refrain from doing non-trivial work here. */ + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + execlists_check_context(ce, engine, "after"); + /* * If we have just completed this context, the engine may now be * idle and we want to re-enter powersaving. @@ -2495,25 +2500,11 @@ invalidate_csb_entries(const u64 *first, const u64 *last) * bits 47-57: sw context id of the lrc the GT switched away from * bits 58-63: sw counter of the lrc the GT switched away from */ -static inline bool gen12_csb_parse(const u64 *csb) +static inline bool gen12_csb_parse(const u64 csb) { - bool ctx_away_valid; - bool new_queue; - u64 entry; - - /* HSD#22011248461 */ - entry = READ_ONCE(*csb); - if (unlikely(entry == -1)) { - preempt_disable(); - if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50)) - GEM_WARN_ON("50us CSB timeout"); - preempt_enable(); - } - WRITE_ONCE(*(u64 *)csb, -1); - - ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry)); - new_queue = - lower_32_bits(entry) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE; + bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(csb)); + bool new_queue = + lower_32_bits(csb) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE; /* * The context switch detail is not guaranteed to be 5 when a preemption @@ -2523,7 +2514,7 @@ static inline bool gen12_csb_parse(const u64 *csb) * would require some extra handling, but we don't support that. */ if (!ctx_away_valid || new_queue) { - GEM_BUG_ON(!GEN12_CSB_CTX_VALID(lower_32_bits(entry))); + GEM_BUG_ON(!GEN12_CSB_CTX_VALID(lower_32_bits(csb))); return true; } @@ -2532,19 +2523,79 @@ static inline bool gen12_csb_parse(const u64 *csb) * context switch on an unsuccessful wait instruction since we always * use polling mode. */ - GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_32_bits(entry))); + GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_32_bits(csb))); return false; } -static inline bool gen8_csb_parse(const u64 *csb) +static inline bool gen8_csb_parse(const u64 csb) { - return *csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED); + return csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED); +} + +static noinline u64 +wa_csb_read(const struct intel_engine_cs *engine, u64 * const csb) +{ + u64 entry; + + /* + * Reading from the HWSP has one particular advantage: we can detect + * a stale entry. Since the write into HWSP is broken, we have no reason + * to trust the HW at all, the mmio entry may equally be unordered, so + * we prefer the path that is self-checking and as a last resort, + * return the mmio value. + * + * tgl,dg1:HSDES#22011327657 + */ + preempt_disable(); + if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 10)) { + int idx = csb - engine->execlists.csb_status; + int status; + + status = GEN8_EXECLISTS_STATUS_BUF; + if (idx >= 6) { + status = GEN11_EXECLISTS_STATUS_BUF2; + idx -= 6; + } + status += sizeof(u64) * idx; + + entry = intel_uncore_read64(engine->uncore, + _MMIO(engine->mmio_base + status)); + } + preempt_enable(); + + return entry; +} + +static inline u64 +csb_read(const struct intel_engine_cs *engine, u64 * const csb) +{ + u64 entry = READ_ONCE(*csb); + + /* + * Unfortunately, the GPU does not always serialise its write + * of the CSB entries before its write of the CSB pointer, at least + * from the perspective of the CPU, using what is known as a Global + * Observation Point. We may read a new CSB tail pointer, but then + * read the stale CSB entries, causing us to misinterpret the + * context-switch events, and eventually declare the GPU hung. + * + * icl:HSDES#1806554093 + * tgl:HSDES#22011248461 + */ + if (unlikely(entry == -1)) + entry = wa_csb_read(engine, csb); + + /* Consume this entry so that we can spot its future reuse. */ + WRITE_ONCE(*csb, -1); + + /* ELSP is an implicit wmb() before the GPU wraps and overwrites csb */ + return entry; } static void process_csb(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; - const u64 * const buf = execlists->csb_status; + u64 * const buf = execlists->csb_status; const u8 num_entries = execlists->csb_size; u8 head, tail; @@ -2602,6 +2653,7 @@ static void process_csb(struct intel_engine_cs *engine) rmb(); do { bool promote; + u64 csb; if (++head == num_entries) head = 0; @@ -2624,15 +2676,14 @@ static void process_csb(struct intel_engine_cs *engine) * status notifier. */ + csb = csb_read(engine, buf + head); ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n", - head, - upper_32_bits(buf[head]), - lower_32_bits(buf[head])); + head, upper_32_bits(csb), lower_32_bits(csb)); if (INTEL_GEN(engine->i915) >= 12) - promote = gen12_csb_parse(buf + head); + promote = gen12_csb_parse(csb); else - promote = gen8_csb_parse(buf + head); + promote = gen8_csb_parse(csb); if (promote) { struct i915_request * const *old = execlists->active; @@ -2787,6 +2838,9 @@ static void __execlists_hold(struct i915_request *rq) static bool execlists_hold(struct intel_engine_cs *engine, struct i915_request *rq) { + if (i915_request_on_hold(rq)) + return false; + spin_lock_irq(&engine->active.lock); if (i915_request_completed(rq)) { /* too late! */ @@ -2987,6 +3041,8 @@ static struct execlists_capture *capture_regs(struct intel_engine_cs *engine) if (!cap->error->gt->engine) goto err_gt; + cap->error->gt->engine->hung = true; + return cap; err_gt: @@ -3168,8 +3224,10 @@ static void execlists_submission_tasklet(unsigned long data) spin_unlock_irqrestore(&engine->active.lock, flags); /* Recheck after serialising with direct-submission */ - if (unlikely(timeout && preempt_timeout(engine))) + if (unlikely(timeout && preempt_timeout(engine))) { + cancel_timer(&engine->execlists.preempt); execlists_reset(engine, "preemption time out"); + } } } @@ -4047,6 +4105,8 @@ static void reset_csb_pointers(struct intel_engine_cs *engine) static void execlists_sanitize(struct intel_engine_cs *engine) { + GEM_BUG_ON(execlists_active(&engine->execlists)); + /* * Poison residual state on resume, in case the suspend didn't! * @@ -4376,6 +4436,7 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine) /* Mark all executing requests as skipped. */ list_for_each_entry(rq, &engine->active.requests, sched.link) mark_eio(rq); + intel_engine_signal_breadcrumbs(engine); /* Flush the queued requests to the timeline list (for retiring). */ while ((rb = rb_first_cached(&execlists->queue))) { @@ -5425,44 +5486,90 @@ static struct list_head *virtual_queue(struct virtual_engine *ve) return &ve->base.execlists.default_priolist.requests[0]; } -static void virtual_context_destroy(struct kref *kref) +static void rcu_virtual_context_destroy(struct work_struct *wrk) { struct virtual_engine *ve = - container_of(kref, typeof(*ve), context.ref); + container_of(wrk, typeof(*ve), rcu.work); unsigned int n; - GEM_BUG_ON(!list_empty(virtual_queue(ve))); - GEM_BUG_ON(ve->request); GEM_BUG_ON(ve->context.inflight); + /* Preempt-to-busy may leave a stale request behind. */ + if (unlikely(ve->request)) { + struct i915_request *old; + + spin_lock_irq(&ve->base.active.lock); + + old = fetch_and_zero(&ve->request); + if (old) { + GEM_BUG_ON(!i915_request_completed(old)); + __i915_request_submit(old); + i915_request_put(old); + } + + spin_unlock_irq(&ve->base.active.lock); + } + + /* + * Flush the tasklet in case it is still running on another core. + * + * This needs to be done before we remove ourselves from the siblings' + * rbtrees as in the case it is running in parallel, it may reinsert + * the rb_node into a sibling. + */ + tasklet_kill(&ve->base.execlists.tasklet); + + /* Decouple ourselves from the siblings, no more access allowed. */ for (n = 0; n < ve->num_siblings; n++) { struct intel_engine_cs *sibling = ve->siblings[n]; struct rb_node *node = &ve->nodes[sibling->id].rb; - unsigned long flags; if (RB_EMPTY_NODE(node)) continue; - spin_lock_irqsave(&sibling->active.lock, flags); + spin_lock_irq(&sibling->active.lock); /* Detachment is lazily performed in the execlists tasklet */ if (!RB_EMPTY_NODE(node)) rb_erase_cached(node, &sibling->execlists.virtual); - spin_unlock_irqrestore(&sibling->active.lock, flags); + spin_unlock_irq(&sibling->active.lock); } GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet)); + GEM_BUG_ON(!list_empty(virtual_queue(ve))); if (ve->context.state) __execlists_context_fini(&ve->context); intel_context_fini(&ve->context); + intel_breadcrumbs_free(ve->base.breadcrumbs); intel_engine_free_request_pool(&ve->base); kfree(ve->bonds); kfree(ve); } +static void virtual_context_destroy(struct kref *kref) +{ + struct virtual_engine *ve = + container_of(kref, typeof(*ve), context.ref); + + GEM_BUG_ON(!list_empty(&ve->context.signals)); + + /* + * When destroying the virtual engine, we have to be aware that + * it may still be in use from an hardirq/softirq context causing + * the resubmission of a completed request (background completion + * due to preempt-to-busy). Before we can free the engine, we need + * to flush the submission code and tasklets that are still potentially + * accessing the engine. Flushing the tasklets requires process context, + * and since we can guard the resubmit onto the engine with an RCU read + * lock, we can delegate the free of the engine to an RCU worker. + */ + INIT_RCU_WORK(&ve->rcu, rcu_virtual_context_destroy); + queue_rcu_work(system_wq, &ve->rcu); +} + static void virtual_engine_initial_hint(struct virtual_engine *ve) { int swp; @@ -5922,18 +6029,6 @@ int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine, return 0; } -struct intel_engine_cs * -intel_virtual_engine_get_sibling(struct intel_engine_cs *engine, - unsigned int sibling) -{ - struct virtual_engine *ve = to_virtual_engine(engine); - - if (sibling >= ve->num_siblings) - return NULL; - - return ve->siblings[sibling]; -} - void intel_execlists_show_requests(struct intel_engine_cs *engine, struct drm_printer *m, void (*show_request)(struct drm_printer *m, diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h index 91fd8e452d9b..c2d287f25497 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h @@ -121,10 +121,6 @@ int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine, const struct intel_engine_cs *master, const struct intel_engine_cs *sibling); -struct intel_engine_cs * -intel_virtual_engine_get_sibling(struct intel_engine_cs *engine, - unsigned int sibling); - bool intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h index 93cb6c460508..1b51f7b9a5c3 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h @@ -49,4 +49,7 @@ #define GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x1A #define GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0xD +#define GEN8_EXECLISTS_STATUS_BUF 0x370 +#define GEN11_EXECLISTS_STATUS_BUF2 0x3c0 + #endif /* _INTEL_LRC_REG_H_ */ diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 39179a3eee98..ab6870242e18 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -59,8 +59,7 @@ struct drm_i915_mocs_table { #define _L3_CACHEABILITY(value) ((value) << 4) /* Helper defines */ -#define GEN9_NUM_MOCS_ENTRIES 62 /* 62 out of 64 - 63 & 64 are reserved. */ -#define GEN11_NUM_MOCS_ENTRIES 64 /* 63-64 are reserved, but configured. */ +#define GEN9_NUM_MOCS_ENTRIES 64 /* 63-64 are reserved, but configured. */ /* (e)LLC caching options */ /* @@ -124,14 +123,26 @@ struct drm_i915_mocs_table { LE_1_UC | LE_TC_2_LLC_ELLC, \ L3_1_UC), \ MOCS_ENTRY(I915_MOCS_PTE, \ - LE_0_PAGETABLE | LE_TC_2_LLC_ELLC | LE_LRUM(3), \ + LE_0_PAGETABLE | LE_TC_0_PAGETABLE | LE_LRUM(3), \ L3_3_WB) static const struct drm_i915_mocs_entry skl_mocs_table[] = { GEN9_MOCS_ENTRIES, MOCS_ENTRY(I915_MOCS_CACHED, LE_3_WB | LE_TC_2_LLC_ELLC | LE_LRUM(3), - L3_3_WB) + L3_3_WB), + + /* + * mocs:63 + * - used by the L3 for all of its evictions. + * Thus it is expected to allow LLC cacheability to enable coherent + * flows to be maintained. + * - used to force L3 uncachable cycles. + * Thus it is expected to make the surface L3 uncacheable. + */ + MOCS_ENTRY(63, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_1_UC) }; /* NOTE: the LE_TGT_CACHE is not used on Broxton */ @@ -243,8 +254,9 @@ static const struct drm_i915_mocs_entry tgl_mocs_table[] = { * only, __init_mocs_table() take care to program unused index with * this entry. */ - MOCS_ENTRY(1, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), - L3_3_WB), + MOCS_ENTRY(I915_MOCS_PTE, + LE_0_PAGETABLE | LE_TC_0_PAGETABLE, + L3_1_UC), GEN11_MOCS_ENTRIES, /* Implicitly enable L1 - HDC:L1 + L3 + LLC */ @@ -280,7 +292,7 @@ static const struct drm_i915_mocs_entry icl_mocs_table[] = { L3_1_UC), /* Base - L3 + LeCC:PAT (Deprecated) */ MOCS_ENTRY(I915_MOCS_PTE, - LE_0_PAGETABLE | LE_TC_1_LLC, + LE_0_PAGETABLE | LE_TC_0_PAGETABLE, L3_3_WB), GEN11_MOCS_ENTRIES @@ -348,15 +360,15 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, if (IS_DG1(i915)) { table->size = ARRAY_SIZE(dg1_mocs_table); table->table = dg1_mocs_table; - table->n_entries = GEN11_NUM_MOCS_ENTRIES; + table->n_entries = GEN9_NUM_MOCS_ENTRIES; } else if (INTEL_GEN(i915) >= 12) { table->size = ARRAY_SIZE(tgl_mocs_table); table->table = tgl_mocs_table; - table->n_entries = GEN11_NUM_MOCS_ENTRIES; + table->n_entries = GEN9_NUM_MOCS_ENTRIES; } else if (IS_GEN(i915, 11)) { table->size = ARRAY_SIZE(icl_mocs_table); table->table = icl_mocs_table; - table->n_entries = GEN11_NUM_MOCS_ENTRIES; + table->n_entries = GEN9_NUM_MOCS_ENTRIES; } else if (IS_GEN9_BC(i915) || IS_CANNONLAKE(i915)) { table->size = ARRAY_SIZE(skl_mocs_table); table->n_entries = GEN9_NUM_MOCS_ENTRIES; diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index ab675d35030d..d7b8e4457fc2 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -56,9 +56,12 @@ static inline void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val) static void gen11_rc6_enable(struct intel_rc6 *rc6) { - struct intel_uncore *uncore = rc6_to_uncore(rc6); + struct intel_gt *gt = rc6_to_gt(rc6); + struct intel_uncore *uncore = gt->uncore; struct intel_engine_cs *engine; enum intel_engine_id id; + u32 pg_enable; + int i; /* 2b: Program RC6 thresholds.*/ set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85); @@ -102,10 +105,19 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6) GEN6_RC_CTL_RC6_ENABLE | GEN6_RC_CTL_EI_MODE(1); - set(uncore, GEN9_PG_ENABLE, - GEN9_RENDER_PG_ENABLE | - GEN9_MEDIA_PG_ENABLE | - GEN11_MEDIA_SAMPLER_PG_ENABLE); + pg_enable = + GEN9_RENDER_PG_ENABLE | + GEN9_MEDIA_PG_ENABLE | + GEN11_MEDIA_SAMPLER_PG_ENABLE; + + if (INTEL_GEN(gt->i915) >= 12) { + for (i = 0; i < I915_MAX_VCS; i++) + if (HAS_ENGINE(gt, _VCS(i))) + pg_enable |= (VDN_HCP_POWERGATE_ENABLE(i) | + VDN_MFX_POWERGATE_ENABLE(i)); + } + + set(uncore, GEN9_PG_ENABLE, pg_enable); } static void gen9_rc6_enable(struct intel_rc6 *rc6) diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index ac36b67fb46b..3654c955e6be 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -19,6 +19,7 @@ #include "intel_engine_pm.h" #include "intel_gt.h" #include "intel_gt_pm.h" +#include "intel_gt_requests.h" #include "intel_reset.h" #include "uc/intel_guc.h" @@ -1190,14 +1191,14 @@ static void intel_gt_reset_global(struct intel_gt *gt, /* Use a watchdog to ensure that our reset completes */ intel_wedge_on_timeout(&w, gt, 5 * HZ) { - intel_prepare_reset(gt->i915); + intel_display_prepare_reset(gt->i915); /* Flush everyone using a resource about to be clobbered */ synchronize_srcu_expedited(>->reset.backoff_srcu); intel_gt_reset(gt, engine_mask, reason); - intel_finish_reset(gt->i915); + intel_display_finish_reset(gt->i915); } if (!test_bit(I915_WEDGED, >->reset.flags)) @@ -1250,7 +1251,7 @@ void intel_gt_handle_error(struct intel_gt *gt, engine_mask &= gt->info.engine_mask; if (flags & I915_ERROR_CAPTURE) { - i915_capture_error_state(gt->i915); + i915_capture_error_state(gt, engine_mask); intel_gt_clear_error_registers(gt, engine_mask); } @@ -1370,6 +1371,7 @@ void intel_gt_set_wedged_on_fini(struct intel_gt *gt) { intel_gt_set_wedged(gt); set_bit(I915_WEDGED_ON_FINI, >->reset.flags); + intel_gt_retire_requests(gt); /* cleanup any wedged requests */ } void intel_gt_init_reset(struct intel_gt *gt) diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index 16b48e72c369..a41b43f445b8 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -444,6 +444,7 @@ static void reset_cancel(struct intel_engine_cs *engine) i915_request_set_error_once(request, -EIO); i915_request_mark_complete(request); } + intel_engine_signal_breadcrumbs(engine); /* Remaining _unready_ requests will be nop'ed when submitted */ diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 466ec671b379..b629eeb14002 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -897,6 +897,10 @@ void intel_rps_park(struct intel_rps *rps) adj = -2; rps->last_adj = adj; rps->cur_freq = max_t(int, rps->cur_freq + adj, rps->min_freq); + if (rps->cur_freq < rps->efficient_freq) { + rps->cur_freq = rps->efficient_freq; + rps->last_adj = 0; + } GT_TRACE(rps_to_gt(rps), "park:%x\n", rps->cur_freq); } @@ -1973,7 +1977,7 @@ static struct drm_i915_private *mchdev_get(void) rcu_read_lock(); i915 = rcu_dereference(ips_mchdev); - if (!kref_get_unless_zero(&i915->drm.ref)) + if (i915 && !kref_get_unless_zero(&i915->drm.ref)) i915 = NULL; rcu_read_unlock(); diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index fed9503a7c4e..adc9a8ea410a 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -131,8 +131,10 @@ static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa) return; } - if (wal->list) + if (wal->list) { memcpy(list, wal->list, sizeof(*wa) * wal->count); + kfree(wal->list); + } wal->list = list; } diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index dfd1cfb8a7ec..2f830017c51d 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -245,18 +245,39 @@ static void mock_reset_rewind(struct intel_engine_cs *engine, bool stalled) GEM_BUG_ON(stalled); } +static void mark_eio(struct i915_request *rq) +{ + if (i915_request_completed(rq)) + return; + + GEM_BUG_ON(i915_request_signaled(rq)); + + i915_request_set_error_once(rq, -EIO); + i915_request_mark_complete(rq); +} + static void mock_reset_cancel(struct intel_engine_cs *engine) { - struct i915_request *request; + struct mock_engine *mock = + container_of(engine, typeof(*mock), base); + struct i915_request *rq; unsigned long flags; + del_timer_sync(&mock->hw_delay); + spin_lock_irqsave(&engine->active.lock, flags); /* Mark all submitted requests as skipped. */ - list_for_each_entry(request, &engine->active.requests, sched.link) { - i915_request_set_error_once(request, -EIO); - i915_request_mark_complete(request); + list_for_each_entry(rq, &engine->active.requests, sched.link) + mark_eio(rq); + intel_engine_signal_breadcrumbs(engine); + + /* Cancel and submit all pending requests. */ + list_for_each_entry(rq, &mock->hw_queue, mock.link) { + mark_eio(rq); + __i915_request_submit(rq); } + INIT_LIST_HEAD(&mock->hw_queue); spin_unlock_irqrestore(&engine->active.lock, flags); } diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c index e73854dd2fe0..b88aa35ad75b 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c @@ -215,16 +215,17 @@ static int __live_heartbeat_fast(struct intel_engine_cs *engine) goto err_pm; for (i = 0; i < ARRAY_SIZE(times); i++) { - /* Manufacture a tick */ do { - while (READ_ONCE(engine->heartbeat.systole)) - flush_delayed_work(&engine->heartbeat.work); + /* Manufacture a tick */ + intel_engine_park_heartbeat(engine); + GEM_BUG_ON(engine->heartbeat.systole); + engine->serial++; /* pretend we are not idle! */ + intel_engine_unpark_heartbeat(engine); - engine->serial++; /* quick, pretend we are not idle! */ flush_delayed_work(&engine->heartbeat.work); if (!delayed_work_pending(&engine->heartbeat.work)) { - pr_err("%s: heartbeat did not start\n", - engine->name); + pr_err("%s: heartbeat %d did not start\n", + engine->name, i); err = -EINVAL; goto err_pm; } diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c index 3540ba9bd459..aa5675ecb5cc 100644 --- a/drivers/gpu/drm/i915/gt/selftest_rps.c +++ b/drivers/gpu/drm/i915/gt/selftest_rps.c @@ -219,7 +219,7 @@ int live_rps_clock_interval(void *arg) struct igt_spinner spin; int err = 0; - if (!intel_rps_is_enabled(rps)) + if (!intel_rps_is_enabled(rps) || INTEL_GEN(gt->i915) < 6) return 0; if (igt_spinner_init(&spin, gt)) @@ -1028,7 +1028,7 @@ int live_rps_interrupt(void *arg) * First, let's check whether or not we are receiving interrupts. */ - if (!intel_rps_has_interrupts(rps)) + if (!intel_rps_has_interrupts(rps) || INTEL_GEN(gt->i915) < 6) return 0; intel_gt_pm_get(gt); @@ -1133,7 +1133,7 @@ int live_rps_power(void *arg) * that theory. */ - if (!intel_rps_is_enabled(rps)) + if (!intel_rps_is_enabled(rps) || INTEL_GEN(gt->i915) < 6) return 0; if (!librapl_energy_uJ()) @@ -1237,7 +1237,7 @@ int live_rps_dynamic(void *arg) * moving parts into dynamic reclocking based on load. */ - if (!intel_rps_is_enabled(rps)) + if (!intel_rps_is_enabled(rps) || INTEL_GEN(gt->i915) < 6) return 0; if (igt_spinner_init(&spin, gt)) diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c index 19c2cb166e7c..2edf2b15885f 100644 --- a/drivers/gpu/drm/i915/gt/selftest_timeline.c +++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c @@ -17,8 +17,9 @@ #include "../selftests/i915_random.h" #include "../i915_selftest.h" -#include "../selftests/igt_flush_test.h" -#include "../selftests/mock_gem_device.h" +#include "selftests/igt_flush_test.h" +#include "selftests/lib_sw_fence.h" +#include "selftests/mock_gem_device.h" #include "selftests/mock_timeline.h" static struct page *hwsp_page(struct intel_timeline *tl) @@ -755,6 +756,378 @@ out_free: return err; } +static int emit_read_hwsp(struct i915_request *rq, + u32 seqno, u32 hwsp, + u32 *addr) +{ + const u32 gpr = i915_mmio_reg_offset(GEN8_RING_CS_GPR(rq->engine->mmio_base, 0)); + u32 *cs; + + cs = intel_ring_begin(rq, 12); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = *addr; + *cs++ = 0; + *cs++ = seqno; + *addr += 4; + + *cs++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_USE_GGTT; + *cs++ = gpr; + *cs++ = hwsp; + *cs++ = 0; + + *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; + *cs++ = gpr; + *cs++ = *addr; + *cs++ = 0; + *addr += 4; + + intel_ring_advance(rq, cs); + + return 0; +} + +struct hwsp_watcher { + struct i915_vma *vma; + struct i915_request *rq; + u32 addr; + u32 *map; +}; + +static bool cmp_lt(u32 a, u32 b) +{ + return a < b; +} + +static bool cmp_gte(u32 a, u32 b) +{ + return a >= b; +} + +static int setup_watcher(struct hwsp_watcher *w, struct intel_gt *gt) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + + obj = i915_gem_object_create_internal(gt->i915, SZ_2M); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + w->map = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(w->map)) { + i915_gem_object_put(obj); + return PTR_ERR(w->map); + } + + vma = i915_gem_object_ggtt_pin_ww(obj, NULL, NULL, 0, 0, 0); + if (IS_ERR(vma)) { + i915_gem_object_put(obj); + return PTR_ERR(vma); + } + + w->vma = vma; + w->addr = i915_ggtt_offset(vma); + return 0; +} + +static int create_watcher(struct hwsp_watcher *w, + struct intel_engine_cs *engine, + int ringsz) +{ + struct intel_context *ce; + struct intel_timeline *tl; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + ce->ring = __intel_context_ring_size(ringsz); + w->rq = intel_context_create_request(ce); + intel_context_put(ce); + if (IS_ERR(w->rq)) + return PTR_ERR(w->rq); + + w->addr = i915_ggtt_offset(w->vma); + tl = w->rq->context->timeline; + + /* some light mutex juggling required; think co-routines */ + lockdep_unpin_lock(&tl->mutex, w->rq->cookie); + mutex_unlock(&tl->mutex); + + return 0; +} + +static int check_watcher(struct hwsp_watcher *w, const char *name, + bool (*op)(u32 hwsp, u32 seqno)) +{ + struct i915_request *rq = fetch_and_zero(&w->rq); + struct intel_timeline *tl = rq->context->timeline; + u32 offset, end; + int err; + + GEM_BUG_ON(w->addr - i915_ggtt_offset(w->vma) > w->vma->size); + + i915_request_get(rq); + mutex_lock(&tl->mutex); + rq->cookie = lockdep_pin_lock(&tl->mutex); + i915_request_add(rq); + + if (i915_request_wait(rq, 0, HZ) < 0) { + err = -ETIME; + goto out; + } + + err = 0; + offset = 0; + end = (w->addr - i915_ggtt_offset(w->vma)) / sizeof(*w->map); + while (offset < end) { + if (!op(w->map[offset + 1], w->map[offset])) { + pr_err("Watcher '%s' found HWSP value %x for seqno %x\n", + name, w->map[offset + 1], w->map[offset]); + err = -EINVAL; + } + + offset += 2; + } + +out: + i915_request_put(rq); + return err; +} + +static void cleanup_watcher(struct hwsp_watcher *w) +{ + if (w->rq) { + struct intel_timeline *tl = w->rq->context->timeline; + + mutex_lock(&tl->mutex); + w->rq->cookie = lockdep_pin_lock(&tl->mutex); + + i915_request_add(w->rq); + } + + i915_vma_unpin_and_release(&w->vma, I915_VMA_RELEASE_MAP); +} + +static bool retire_requests(struct intel_timeline *tl) +{ + struct i915_request *rq, *rn; + + mutex_lock(&tl->mutex); + list_for_each_entry_safe(rq, rn, &tl->requests, link) + if (!i915_request_retire(rq)) + break; + mutex_unlock(&tl->mutex); + + return !i915_active_fence_isset(&tl->last_request); +} + +static struct i915_request *wrap_timeline(struct i915_request *rq) +{ + struct intel_context *ce = rq->context; + struct intel_timeline *tl = ce->timeline; + u32 seqno = rq->fence.seqno; + + while (tl->seqno >= seqno) { /* Cause a wrap */ + i915_request_put(rq); + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) + return rq; + + i915_request_get(rq); + i915_request_add(rq); + } + + i915_request_put(rq); + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) + return rq; + + i915_request_get(rq); + i915_request_add(rq); + + return rq; +} + +static int live_hwsp_read(void *arg) +{ + struct intel_gt *gt = arg; + struct hwsp_watcher watcher[2] = {}; + struct intel_engine_cs *engine; + struct intel_timeline *tl; + enum intel_engine_id id; + int err = 0; + int i; + + /* + * If we take a reference to the HWSP for reading on the GPU, that + * read may be arbitrarily delayed (either by foreign fence or + * priority saturation) and a wrap can happen within 30 minutes. + * When the GPU read is finally submitted it should be correct, + * even across multiple wraps. + */ + + if (INTEL_GEN(gt->i915) < 8) /* CS convenience [SRM/LRM] */ + return 0; + + tl = intel_timeline_create(gt); + if (IS_ERR(tl)) + return PTR_ERR(tl); + + if (!tl->hwsp_cacheline) + goto out_free; + + for (i = 0; i < ARRAY_SIZE(watcher); i++) { + err = setup_watcher(&watcher[i], gt); + if (err) + goto out; + } + + for_each_engine(engine, gt, id) { + struct intel_context *ce; + unsigned long count = 0; + IGT_TIMEOUT(end_time); + + /* Create a request we can use for remote reading of the HWSP */ + err = create_watcher(&watcher[1], engine, SZ_512K); + if (err) + goto out; + + do { + struct i915_sw_fence *submit; + struct i915_request *rq; + u32 hwsp; + + submit = heap_fence_create(GFP_KERNEL); + if (!submit) { + err = -ENOMEM; + goto out; + } + + err = create_watcher(&watcher[0], engine, SZ_4K); + if (err) + goto out; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + goto out; + } + + /* Skip to the end, saving 30 minutes of nops */ + tl->seqno = -10u + 2 * (count & 3); + WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno); + ce->timeline = intel_timeline_get(tl); + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + intel_context_put(ce); + goto out; + } + + err = i915_sw_fence_await_dma_fence(&rq->submit, + &watcher[0].rq->fence, 0, + GFP_KERNEL); + if (err < 0) { + i915_request_add(rq); + intel_context_put(ce); + goto out; + } + + mutex_lock(&watcher[0].rq->context->timeline->mutex); + err = intel_timeline_read_hwsp(rq, watcher[0].rq, &hwsp); + if (err == 0) + err = emit_read_hwsp(watcher[0].rq, /* before */ + rq->fence.seqno, hwsp, + &watcher[0].addr); + mutex_unlock(&watcher[0].rq->context->timeline->mutex); + if (err) { + i915_request_add(rq); + intel_context_put(ce); + goto out; + } + + mutex_lock(&watcher[1].rq->context->timeline->mutex); + err = intel_timeline_read_hwsp(rq, watcher[1].rq, &hwsp); + if (err == 0) + err = emit_read_hwsp(watcher[1].rq, /* after */ + rq->fence.seqno, hwsp, + &watcher[1].addr); + mutex_unlock(&watcher[1].rq->context->timeline->mutex); + if (err) { + i915_request_add(rq); + intel_context_put(ce); + goto out; + } + + i915_request_get(rq); + i915_request_add(rq); + + rq = wrap_timeline(rq); + intel_context_put(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out; + } + + err = i915_sw_fence_await_dma_fence(&watcher[1].rq->submit, + &rq->fence, 0, + GFP_KERNEL); + if (err < 0) { + i915_request_put(rq); + goto out; + } + + err = check_watcher(&watcher[0], "before", cmp_lt); + i915_sw_fence_commit(submit); + heap_fence_put(submit); + if (err) { + i915_request_put(rq); + goto out; + } + count++; + + if (8 * watcher[1].rq->ring->emit > + 3 * watcher[1].rq->ring->size) { + i915_request_put(rq); + break; + } + + /* Flush the timeline before manually wrapping again */ + if (i915_request_wait(rq, + I915_WAIT_INTERRUPTIBLE, + HZ) < 0) { + err = -ETIME; + i915_request_put(rq); + goto out; + } + + retire_requests(tl); + i915_request_put(rq); + } while (!__igt_timeout(end_time, NULL)); + WRITE_ONCE(*(u32 *)tl->hwsp_seqno, 0xdeadbeef); + + pr_info("%s: simulated %lu wraps\n", engine->name, count); + err = check_watcher(&watcher[1], "after", cmp_gte); + if (err) + goto out; + } + +out: + for (i = 0; i < ARRAY_SIZE(watcher); i++) + cleanup_watcher(&watcher[i]); + + if (igt_flush_test(gt->i915)) + err = -EIO; + +out_free: + intel_timeline_put(tl); + return err; +} + static int live_hwsp_rollover_kernel(void *arg) { struct intel_gt *gt = arg; @@ -998,6 +1371,7 @@ int intel_timeline_live_selftests(struct drm_i915_private *i915) SUBTEST(live_hwsp_engine), SUBTEST(live_hwsp_alternate), SUBTEST(live_hwsp_wrap), + SUBTEST(live_hwsp_read), SUBTEST(live_hwsp_rollover_kernel), SUBTEST(live_hwsp_rollover_user), }; diff --git a/drivers/gpu/drm/i915/gt/shmem_utils.c b/drivers/gpu/drm/i915/gt/shmem_utils.c index f011ea42487e..5982b62f913d 100644 --- a/drivers/gpu/drm/i915/gt/shmem_utils.c +++ b/drivers/gpu/drm/i915/gt/shmem_utils.c @@ -73,7 +73,7 @@ void *shmem_pin_map(struct file *file) mapping_set_unevictable(file->f_mapping); return vaddr; err_page: - while (--i >= 0) + while (i--) put_page(pages[i]); kvfree(pages); return NULL; @@ -103,10 +103,13 @@ static int __shmem_rw(struct file *file, loff_t off, return PTR_ERR(page); vaddr = kmap(page); - if (write) + if (write) { memcpy(vaddr + offset_in_page(off), ptr, this); - else + set_page_dirty(page); + } else { memcpy(ptr, vaddr + offset_in_page(off), this); + } + mark_page_accessed(page); kunmap(page); put_page(page); diff --git a/drivers/gpu/drm/i915/gt/sysfs_engines.c b/drivers/gpu/drm/i915/gt/sysfs_engines.c index 535cc1169e54..967031056202 100644 --- a/drivers/gpu/drm/i915/gt/sysfs_engines.c +++ b/drivers/gpu/drm/i915/gt/sysfs_engines.c @@ -79,14 +79,12 @@ static ssize_t repr_trim(char *buf, ssize_t len) static ssize_t __caps_show(struct intel_engine_cs *engine, - u32 caps, char *buf, bool show_unknown) + unsigned long caps, char *buf, bool show_unknown) { const char * const *repr; int count, n; ssize_t len; - BUILD_BUG_ON(!typecheck(typeof(caps), engine->uabi_capabilities)); - switch (engine->class) { case VIDEO_DECODE_CLASS: repr = vcs_caps; @@ -103,12 +101,10 @@ __caps_show(struct intel_engine_cs *engine, count = 0; break; } - GEM_BUG_ON(count > BITS_PER_TYPE(typeof(caps))); + GEM_BUG_ON(count > BITS_PER_LONG); len = 0; - for_each_set_bit(n, - (unsigned long *)&caps, - show_unknown ? BITS_PER_TYPE(typeof(caps)) : count) { + for_each_set_bit(n, &caps, show_unknown ? BITS_PER_LONG : count) { if (n >= count || !repr[n]) { if (GEM_WARN_ON(show_unknown)) len += snprintf(buf + len, PAGE_SIZE - len, diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index e4aaa5f29796..2a343a977987 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -213,23 +213,6 @@ static u32 guc_ctl_feature_flags(struct intel_guc *guc) return flags; } -static u32 guc_ctl_ctxinfo_flags(struct intel_guc *guc) -{ - u32 flags = 0; - - if (intel_guc_submission_is_used(guc)) { - u32 ctxnum, base; - - base = intel_guc_ggtt_offset(guc, guc->stage_desc_pool); - ctxnum = GUC_MAX_STAGE_DESCRIPTORS / 16; - - base >>= PAGE_SHIFT; - flags |= (base << GUC_CTL_BASE_ADDR_SHIFT) | - (ctxnum << GUC_CTL_CTXNUM_IN16_SHIFT); - } - return flags; -} - static u32 guc_ctl_log_params_flags(struct intel_guc *guc) { u32 offset = intel_guc_ggtt_offset(guc, guc->log.vma) >> PAGE_SHIFT; @@ -291,7 +274,6 @@ static void guc_init_params(struct intel_guc *guc) BUILD_BUG_ON(sizeof(guc->params) != GUC_CTL_MAX_DWORDS * sizeof(u32)); - params[GUC_CTL_CTXINFO] = guc_ctl_ctxinfo_flags(guc); params[GUC_CTL_LOG_PARAMS] = guc_ctl_log_params_flags(guc); params[GUC_CTL_FEATURE] = guc_ctl_feature_flags(guc); params[GUC_CTL_DEBUG] = guc_ctl_debug_flags(guc); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index d44061033f23..5212ff844292 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -10,11 +10,52 @@ /* * The Additional Data Struct (ADS) has pointers for different buffers used by - * the GuC. One single gem object contains the ADS struct itself (guc_ads), the - * scheduling policies (guc_policies), a structure describing a collection of - * register sets (guc_mmio_reg_state) and some extra pages for the GuC to save - * its internal state for sleep. + * the GuC. One single gem object contains the ADS struct itself (guc_ads) and + * all the extra buffers indirectly linked via the ADS struct's entries. + * + * Layout of the ADS blob allocated for the GuC: + * + * +---------------------------------------+ <== base + * | guc_ads | + * +---------------------------------------+ + * | guc_policies | + * +---------------------------------------+ + * | guc_gt_system_info | + * +---------------------------------------+ + * | guc_clients_info | + * +---------------------------------------+ + * | guc_ct_pool_entry[size] | + * +---------------------------------------+ + * | padding | + * +---------------------------------------+ <== 4K aligned + * | private data | + * +---------------------------------------+ + * | padding | + * +---------------------------------------+ <== 4K aligned */ +struct __guc_ads_blob { + struct guc_ads ads; + struct guc_policies policies; + struct guc_gt_system_info system_info; + struct guc_clients_info clients_info; + struct guc_ct_pool_entry ct_pool[GUC_CT_POOL_SIZE]; +} __packed; + +static u32 guc_ads_private_data_size(struct intel_guc *guc) +{ + return PAGE_ALIGN(guc->fw.private_data_size); +} + +static u32 guc_ads_private_data_offset(struct intel_guc *guc) +{ + return PAGE_ALIGN(sizeof(struct __guc_ads_blob)); +} + +static u32 guc_ads_blob_size(struct intel_guc *guc) +{ + return guc_ads_private_data_offset(guc) + + guc_ads_private_data_size(guc); +} static void guc_policy_init(struct guc_policy *policy) { @@ -48,26 +89,37 @@ static void guc_ct_pool_entries_init(struct guc_ct_pool_entry *pool, u32 num) memset(pool, 0, num * sizeof(*pool)); } +static void guc_mapping_table_init(struct intel_gt *gt, + struct guc_gt_system_info *system_info) +{ + unsigned int i, j; + struct intel_engine_cs *engine; + enum intel_engine_id id; + + /* Table must be set to invalid values for entries not used */ + for (i = 0; i < GUC_MAX_ENGINE_CLASSES; ++i) + for (j = 0; j < GUC_MAX_INSTANCES_PER_CLASS; ++j) + system_info->mapping_table[i][j] = + GUC_MAX_INSTANCES_PER_CLASS; + + for_each_engine(engine, gt, id) { + u8 guc_class = engine->class; + + system_info->mapping_table[guc_class][engine->instance] = + engine->instance; + } +} + /* * The first 80 dwords of the register state context, containing the * execlists and ppgtt registers. */ #define LR_HW_CONTEXT_SIZE (80 * sizeof(u32)) -/* The ads obj includes the struct itself and buffers passed to GuC */ -struct __guc_ads_blob { - struct guc_ads ads; - struct guc_policies policies; - struct guc_mmio_reg_state reg_state; - struct guc_gt_system_info system_info; - struct guc_clients_info clients_info; - struct guc_ct_pool_entry ct_pool[GUC_CT_POOL_SIZE]; - u8 reg_state_buffer[GUC_S3_SAVE_SPACE_PAGES * PAGE_SIZE]; -} __packed; - static void __guc_ads_init(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); + struct drm_i915_private *i915 = gt->i915; struct __guc_ads_blob *blob = guc->ads_blob; const u32 skipped_size = LRC_PPHWSP_SZ * PAGE_SIZE + LR_HW_CONTEXT_SIZE; u32 base; @@ -99,13 +151,25 @@ static void __guc_ads_init(struct intel_guc *guc) } /* System info */ - blob->system_info.slice_enabled = hweight8(gt->info.sseu.slice_mask); - blob->system_info.rcs_enabled = 1; - blob->system_info.bcs_enabled = 1; + blob->system_info.engine_enabled_masks[RENDER_CLASS] = 1; + blob->system_info.engine_enabled_masks[COPY_ENGINE_CLASS] = 1; + blob->system_info.engine_enabled_masks[VIDEO_DECODE_CLASS] = VDBOX_MASK(gt); + blob->system_info.engine_enabled_masks[VIDEO_ENHANCEMENT_CLASS] = VEBOX_MASK(gt); + + blob->system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_SLICE_ENABLED] = + hweight8(gt->info.sseu.slice_mask); + blob->system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_VDBOX_SFC_SUPPORT_MASK] = + gt->info.vdbox_sfc_access; + + if (INTEL_GEN(i915) >= 12 && !IS_DGFX(i915)) { + u32 distdbreg = intel_uncore_read(gt->uncore, + GEN12_DIST_DBS_POPULATED); + blob->system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_DOORBELL_COUNT_PER_SQIDI] = + ((distdbreg >> GEN12_DOORBELLS_PER_SQIDI_SHIFT) & + GEN12_DOORBELLS_PER_SQIDI) + 1; + } - blob->system_info.vdbox_enable_mask = VDBOX_MASK(gt); - blob->system_info.vebox_enable_mask = VEBOX_MASK(gt); - blob->system_info.vdbox_sfc_support_mask = gt->info.vdbox_sfc_access; + guc_mapping_table_init(guc_to_gt(guc), &blob->system_info); base = intel_guc_ggtt_offset(guc, guc->ads_vma); @@ -118,11 +182,12 @@ static void __guc_ads_init(struct intel_guc *guc) /* ADS */ blob->ads.scheduler_policies = base + ptr_offset(blob, policies); - blob->ads.reg_state_buffer = base + ptr_offset(blob, reg_state_buffer); - blob->ads.reg_state_addr = base + ptr_offset(blob, reg_state); blob->ads.gt_system_info = base + ptr_offset(blob, system_info); blob->ads.clients_info = base + ptr_offset(blob, clients_info); + /* Private Data */ + blob->ads.private_data = base + guc_ads_private_data_offset(guc); + i915_gem_object_flush_map(guc->ads_vma->obj); } @@ -135,14 +200,15 @@ static void __guc_ads_init(struct intel_guc *guc) */ int intel_guc_ads_create(struct intel_guc *guc) { - const u32 size = PAGE_ALIGN(sizeof(struct __guc_ads_blob)); + u32 size; int ret; GEM_BUG_ON(guc->ads_vma); + size = guc_ads_blob_size(guc); + ret = intel_guc_allocate_and_map_vma(guc, size, &guc->ads_vma, (void **)&guc->ads_blob); - if (ret) return ret; @@ -154,6 +220,19 @@ int intel_guc_ads_create(struct intel_guc *guc) void intel_guc_ads_destroy(struct intel_guc *guc) { i915_vma_unpin_and_release(&guc->ads_vma, I915_VMA_RELEASE_MAP); + guc->ads_blob = NULL; +} + +static void guc_ads_private_data_reset(struct intel_guc *guc) +{ + u32 size; + + size = guc_ads_private_data_size(guc); + if (!size) + return; + + memset((void *)guc->ads_blob + guc_ads_private_data_offset(guc), 0, + size); } /** @@ -168,5 +247,8 @@ void intel_guc_ads_reset(struct intel_guc *guc) { if (!guc->ads_vma) return; + __guc_ads_init(guc); + + guc_ads_private_data_reset(guc); } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index 11742fca0e9e..fa9e048cc65f 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -210,6 +210,7 @@ void intel_guc_ct_fini(struct intel_guc_ct *ct) GEM_BUG_ON(ct->enabled); i915_vma_unpin_and_release(&ct->vma, I915_VMA_RELEASE_MAP); + memset(ct, 0, sizeof(*ct)); } /** diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c index d4a87f4c9421..f9d0907ea1a5 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c @@ -76,6 +76,7 @@ static inline bool guc_ready(struct intel_uncore *uncore, u32 *status) static int guc_wait_ucode(struct intel_uncore *uncore) { + struct drm_device *drm = &uncore->i915->drm; u32 status; int ret; @@ -90,15 +91,27 @@ static int guc_wait_ucode(struct intel_uncore *uncore) ret = wait_for(guc_ready(uncore, &status), 100); DRM_DEBUG_DRIVER("GuC status %#x\n", status); - if ((status & GS_BOOTROM_MASK) == GS_BOOTROM_RSA_FAILED) { - DRM_ERROR("GuC firmware signature verification failed\n"); - ret = -ENOEXEC; - } - - if ((status & GS_UKERNEL_MASK) == GS_UKERNEL_EXCEPTION) { - DRM_ERROR("GuC firmware exception. EIP: %#x\n", - intel_uncore_read(uncore, SOFT_SCRATCH(13))); - ret = -ENXIO; + if (ret) { + drm_err(drm, "GuC load failed: status = 0x%08X\n", status); + drm_err(drm, "GuC load failed: status: Reset = %d, " + "BootROM = 0x%02X, UKernel = 0x%02X, " + "MIA = 0x%02X, Auth = 0x%02X\n", + REG_FIELD_GET(GS_MIA_IN_RESET, status), + REG_FIELD_GET(GS_BOOTROM_MASK, status), + REG_FIELD_GET(GS_UKERNEL_MASK, status), + REG_FIELD_GET(GS_MIA_MASK, status), + REG_FIELD_GET(GS_AUTH_STATUS_MASK, status)); + + if ((status & GS_BOOTROM_MASK) == GS_BOOTROM_RSA_FAILED) { + drm_err(drm, "GuC firmware signature verification failed\n"); + ret = -ENOEXEC; + } + + if ((status & GS_UKERNEL_MASK) == GS_UKERNEL_EXCEPTION) { + drm_err(drm, "GuC firmware exception. EIP: %#x\n", + intel_uncore_read(uncore, SOFT_SCRATCH(13))); + ret = -ENXIO; + } } return ret; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h index a6b733c146c9..79c560d9c0b6 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h @@ -26,8 +26,8 @@ #define GUC_VIDEO_ENGINE2 4 #define GUC_MAX_ENGINES_NUM (GUC_VIDEO_ENGINE2 + 1) -#define GUC_MAX_ENGINE_CLASSES 5 -#define GUC_MAX_INSTANCES_PER_CLASS 16 +#define GUC_MAX_ENGINE_CLASSES 16 +#define GUC_MAX_INSTANCES_PER_CLASS 32 #define GUC_DOORBELL_INVALID 256 @@ -62,12 +62,7 @@ #define GUC_STAGE_DESC_ATTR_PCH BIT(6) #define GUC_STAGE_DESC_ATTR_TERMINATED BIT(7) -/* New GuC control data */ -#define GUC_CTL_CTXINFO 0 -#define GUC_CTL_CTXNUM_IN16_SHIFT 0 -#define GUC_CTL_BASE_ADDR_SHIFT 12 - -#define GUC_CTL_LOG_PARAMS 1 +#define GUC_CTL_LOG_PARAMS 0 #define GUC_LOG_VALID (1 << 0) #define GUC_LOG_NOTIFY_ON_HALF_FULL (1 << 1) #define GUC_LOG_ALLOC_IN_MEGABYTE (1 << 3) @@ -79,11 +74,11 @@ #define GUC_LOG_ISR_MASK (0x7 << GUC_LOG_ISR_SHIFT) #define GUC_LOG_BUF_ADDR_SHIFT 12 -#define GUC_CTL_WA 2 -#define GUC_CTL_FEATURE 3 +#define GUC_CTL_WA 1 +#define GUC_CTL_FEATURE 2 #define GUC_CTL_DISABLE_SCHEDULER (1 << 14) -#define GUC_CTL_DEBUG 4 +#define GUC_CTL_DEBUG 3 #define GUC_LOG_VERBOSITY_SHIFT 0 #define GUC_LOG_VERBOSITY_LOW (0 << GUC_LOG_VERBOSITY_SHIFT) #define GUC_LOG_VERBOSITY_MED (1 << GUC_LOG_VERBOSITY_SHIFT) @@ -97,12 +92,37 @@ #define GUC_LOG_DISABLED (1 << 6) #define GUC_PROFILE_ENABLED (1 << 7) -#define GUC_CTL_ADS 5 +#define GUC_CTL_ADS 4 #define GUC_ADS_ADDR_SHIFT 1 #define GUC_ADS_ADDR_MASK (0xFFFFF << GUC_ADS_ADDR_SHIFT) #define GUC_CTL_MAX_DWORDS (SOFT_SCRATCH_COUNT - 2) /* [1..14] */ +/* Generic GT SysInfo data types */ +#define GUC_GENERIC_GT_SYSINFO_SLICE_ENABLED 0 +#define GUC_GENERIC_GT_SYSINFO_VDBOX_SFC_SUPPORT_MASK 1 +#define GUC_GENERIC_GT_SYSINFO_DOORBELL_COUNT_PER_SQIDI 2 +#define GUC_GENERIC_GT_SYSINFO_MAX 16 + +/* + * The class goes in bits [0..2] of the GuC ID, the instance in bits [3..6]. + * Bit 7 can be used for operations that apply to all engine classes&instances. + */ +#define GUC_ENGINE_CLASS_SHIFT 0 +#define GUC_ENGINE_CLASS_MASK (0x7 << GUC_ENGINE_CLASS_SHIFT) +#define GUC_ENGINE_INSTANCE_SHIFT 3 +#define GUC_ENGINE_INSTANCE_MASK (0xf << GUC_ENGINE_INSTANCE_SHIFT) +#define GUC_ENGINE_ALL_INSTANCES BIT(7) + +#define MAKE_GUC_ID(class, instance) \ + (((class) << GUC_ENGINE_CLASS_SHIFT) | \ + ((instance) << GUC_ENGINE_INSTANCE_SHIFT)) + +#define GUC_ID_TO_ENGINE_CLASS(guc_id) \ + (((guc_id) & GUC_ENGINE_CLASS_MASK) >> GUC_ENGINE_CLASS_SHIFT) +#define GUC_ID_TO_ENGINE_INSTANCE(guc_id) \ + (((guc_id) & GUC_ENGINE_INSTANCE_MASK) >> GUC_ENGINE_INSTANCE_SHIFT) + /* Work item for submitting workloads into work queue of GuC. */ struct guc_wq_item { u32 header; @@ -336,11 +356,6 @@ struct guc_policies { } __packed; /* GuC MMIO reg state struct */ - - -#define GUC_REGSET_MAX_REGISTERS 64 -#define GUC_S3_SAVE_SPACE_PAGES 10 - struct guc_mmio_reg { u32 offset; u32 value; @@ -348,28 +363,18 @@ struct guc_mmio_reg { #define GUC_REGSET_MASKED (1 << 0) } __packed; -struct guc_mmio_regset { - struct guc_mmio_reg registers[GUC_REGSET_MAX_REGISTERS]; - u32 values_valid; - u32 number_of_registers; -} __packed; - /* GuC register sets */ -struct guc_mmio_reg_state { - struct guc_mmio_regset engine_reg[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS]; - u32 reserved[98]; +struct guc_mmio_reg_set { + u32 address; + u16 count; + u16 reserved; } __packed; /* HW info */ struct guc_gt_system_info { - u32 slice_enabled; - u32 rcs_enabled; - u32 reserved0; - u32 bcs_enabled; - u32 vdbox_enable_mask; - u32 vdbox_sfc_support_mask; - u32 vebox_enable_mask; - u32 reserved[9]; + u8 mapping_table[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS]; + u32 engine_enabled_masks[GUC_MAX_ENGINE_CLASSES]; + u32 generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_MAX]; } __packed; /* Clients info */ @@ -390,15 +395,16 @@ struct guc_clients_info { /* GuC Additional Data Struct */ struct guc_ads { - u32 reg_state_addr; - u32 reg_state_buffer; + struct guc_mmio_reg_set reg_state_list[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS]; + u32 reserved0; u32 scheduler_policies; u32 gt_system_info; u32 clients_info; u32 control_data; u32 golden_context_lrca[GUC_MAX_ENGINE_CLASSES]; u32 eng_state_size[GUC_MAX_ENGINE_CLASSES]; - u32 reserved[16]; + u32 private_data; + u32 reserved[15]; } __packed; /* GuC logging structures */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h index 1949346e714e..b37fc2ffaef2 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h @@ -118,6 +118,11 @@ struct guc_doorbell_info { #define GEN8_DRB_VALID (1<<0) #define GEN8_DRBREGU(x) _MMIO(0x1000 + (x) * 8 + 4) +#define GEN12_DIST_DBS_POPULATED _MMIO(0xd08) +#define GEN12_DOORBELLS_PER_SQIDI_SHIFT 16 +#define GEN12_DOORBELLS_PER_SQIDI (0xff) +#define GEN12_SQIDIS_DOORBELL_EXIST (0xffff) + #define DE_GUCRMR _MMIO(0x44054) #define GUC_BCS_RCS_IER _MMIO(0xC550) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index d6f55f70889d..4e6070e95fe9 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -231,13 +231,15 @@ static int guc_enable_communication(struct intel_guc *guc) intel_guc_ct_event_handler(&guc->ct); spin_unlock_irq(&i915->irq_lock); - DRM_INFO("GuC communication enabled\n"); + drm_dbg(&i915->drm, "GuC communication enabled\n"); return 0; } static void guc_disable_communication(struct intel_guc *guc) { + struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + /* * Events generated during or after CT disable are logged by guc in * via mmio. Make sure the register is clear before disabling CT since @@ -257,7 +259,7 @@ static void guc_disable_communication(struct intel_guc *guc) */ guc_get_mmio_msg(guc); - DRM_INFO("GuC communication disabled\n"); + drm_dbg(&i915->drm, "GuC communication disabled\n"); } static void __uc_fetch_firmwares(struct intel_uc *uc) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index 037bcaf3c8b5..180c23e2e25e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -44,24 +44,20 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, * List of required GuC and HuC binaries per-platform. * Must be ordered based on platform + revid, from newer to older. * - * TGL 35.2 is interface-compatible with 33.0 for previous Gens. The deltas - * between 33.0 and 35.2 are only related to new additions to support new Gen12 - * features. - * * Note that RKL uses the same firmware as TGL. */ #define INTEL_UC_FIRMWARE_DEFS(fw_def, guc_def, huc_def) \ - fw_def(ROCKETLAKE, 0, guc_def(tgl, 35, 2, 0), huc_def(tgl, 7, 5, 0)) \ - fw_def(TIGERLAKE, 0, guc_def(tgl, 35, 2, 0), huc_def(tgl, 7, 5, 0)) \ - fw_def(JASPERLAKE, 0, guc_def(ehl, 33, 0, 4), huc_def(ehl, 9, 0, 0)) \ - fw_def(ELKHARTLAKE, 0, guc_def(ehl, 33, 0, 4), huc_def(ehl, 9, 0, 0)) \ - fw_def(ICELAKE, 0, guc_def(icl, 33, 0, 0), huc_def(icl, 9, 0, 0)) \ - fw_def(COMETLAKE, 5, guc_def(cml, 33, 0, 0), huc_def(cml, 4, 0, 0)) \ - fw_def(COFFEELAKE, 0, guc_def(kbl, 33, 0, 0), huc_def(kbl, 4, 0, 0)) \ - fw_def(GEMINILAKE, 0, guc_def(glk, 33, 0, 0), huc_def(glk, 4, 0, 0)) \ - fw_def(KABYLAKE, 0, guc_def(kbl, 33, 0, 0), huc_def(kbl, 4, 0, 0)) \ - fw_def(BROXTON, 0, guc_def(bxt, 33, 0, 0), huc_def(bxt, 2, 0, 0)) \ - fw_def(SKYLAKE, 0, guc_def(skl, 33, 0, 0), huc_def(skl, 2, 0, 0)) + fw_def(ROCKETLAKE, 0, guc_def(tgl, 49, 0, 1), huc_def(tgl, 7, 5, 0)) \ + fw_def(TIGERLAKE, 0, guc_def(tgl, 49, 0, 1), huc_def(tgl, 7, 5, 0)) \ + fw_def(JASPERLAKE, 0, guc_def(ehl, 49, 0, 1), huc_def(ehl, 9, 0, 0)) \ + fw_def(ELKHARTLAKE, 0, guc_def(ehl, 49, 0, 1), huc_def(ehl, 9, 0, 0)) \ + fw_def(ICELAKE, 0, guc_def(icl, 49, 0, 1), huc_def(icl, 9, 0, 0)) \ + fw_def(COMETLAKE, 5, guc_def(cml, 49, 0, 1), huc_def(cml, 4, 0, 0)) \ + fw_def(COFFEELAKE, 0, guc_def(kbl, 49, 0, 1), huc_def(kbl, 4, 0, 0)) \ + fw_def(GEMINILAKE, 0, guc_def(glk, 49, 0, 1), huc_def(glk, 4, 0, 0)) \ + fw_def(KABYLAKE, 0, guc_def(kbl, 49, 0, 1), huc_def(kbl, 4, 0, 0)) \ + fw_def(BROXTON, 0, guc_def(bxt, 49, 0, 1), huc_def(bxt, 2, 0, 0)) \ + fw_def(SKYLAKE, 0, guc_def(skl, 49, 0, 1), huc_def(skl, 2, 0, 0)) #define __MAKE_UC_FW_PATH(prefix_, name_, major_, minor_, patch_) \ "i915/" \ @@ -372,6 +368,9 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) } } + if (uc_fw->type == INTEL_UC_FW_TYPE_GUC) + uc_fw->private_data_size = css->private_data_size; + obj = i915_gem_object_create_shmem_from_data(i915, fw->data, fw->size); if (IS_ERR(obj)) { err = PTR_ERR(obj); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h index 23d3a423ac0f..99bb1fe1af66 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h @@ -88,6 +88,8 @@ struct intel_uc_fw { u32 rsa_size; u32 ucode_size; + + u32 private_data_size; }; #ifdef CONFIG_DRM_I915_DEBUG_GUC diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h index 029214cdedd5..e41ffc7a7fbc 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h @@ -69,7 +69,11 @@ struct uc_css_header { #define CSS_SW_VERSION_UC_MAJOR (0xFF << 16) #define CSS_SW_VERSION_UC_MINOR (0xFF << 8) #define CSS_SW_VERSION_UC_PATCH (0xFF << 0) - u32 reserved[14]; + u32 reserved0[13]; + union { + u32 private_data_size; /* only applies to GuC */ + u32 reserved1; + }; u32 header_info; } __packed; static_assert(sizeof(struct uc_css_header) == 128); |